Ejemplo n.º 1
0
function rss_to_array($tag, $array, $url)
{
    $doc = new DOMdocument();
    $doc->load($url);
    $rss_array = array();
    $items = array();
    foreach ($doc->getElementsByTagName($tag) as $node) {
        foreach ($array as $key => $value) {
            $items[$value] = $node->getElementsByTagName($value)->item(0)->nodeValue;
        }
        array_push($rss_array, $items);
    }
    return $rss_array;
}
Ejemplo n.º 2
0
//To remove warning that are pre-existing in a site
extract($_POST);
set_time_limit(150);
session_start();
if (isset($submit)) {
    $title = "";
    //title of the page
    $image_src = "";
    //Src of a page
    $desc = "";
    //decription of a page in text
    $dom = new DOMdocument();
    $dom->loadHTMLFile($theUrl);
    if (strpos($theUrl, 'www.youtube.com/watch?v') !== false) {
        //echo "hi";
        foreach ($dom->getElementsByTagName("link") as $link_tag) {
            if ($link_tag->getAttribute('itemprop') == 'embedURL') {
                $image_src = $link_tag->getAttribute('href');
                break;
            }
        }
    }
    /** Image Array  **/
    $image_src_attr = array('link' => '1', 'aspect-ratio' => 10000);
    //Title of a page
    foreach ($dom->getElementsByTagName("meta") as $meta_ob) {
        //  echo"</br>"; for testing this code
        $meta_content = $meta_ob->getAttribute("content");
        $meta_name = $meta_ob->getAttribute("name");
        // echo $meta_name.$meta_content;//for testing this code
        if (strcasecmp($meta_name, 'og_title') == 0) {
Ejemplo n.º 3
0
/**
 * rss_fetch 
 * 
 * Retrieves rss feed of $url provided in an array. Can
 * choose number of array items to fetch, and to start at
 * a certain number.
 *
 * @param mixed $url to fetch rss feed from 
 * @param string $tagname , optional "item" is the default
 * @access public
 * @return array or bool false
 */
function rss_fetch($url, $tagname = 'item')
{
    $dom = new DOMdocument();
    $success = $dom->load($url);
    if (!$success) {
        return false;
    }
    $elements = $dom->getElementsByTagName($tagname);
    $items = array();
    foreach ($elements as $element) {
        $item = array();
        if ($element->childNodes->length) {
            foreach ($element->childNodes as $node) {
                $item[$node->nodeName] = $node->nodeValue;
            }
            $items[] = $item;
        }
    }
    return $items;
}
Ejemplo n.º 4
0
  $postedItemsList = nxs_loadFromOKV($okv); prr($postedItemsList);
  if ($postedItemsList!='') $postedItemsList = explode("\n",$postedItemsList); else $postedItemsList = array();  prr($postedItemsList);
  
 
  global $nxs_gCookiesArr;
  $loginError = doConnectToGooglePlus2($email, $pass); 
  if (!$loginError)
  { 
    if ($isOutput) echo "### Logged In - No Problems<br/>";
    $doc = new DOMdocument();
    $doc->load($url);
    $rss_array = array();
    $items = array();
    $tag = 'item';
    //## Reversing RSS
    foreach($doc->getElementsByTagName($tag) as $node) 
    {    
        $rss_array[] = $node;
    }
    $rss_array = array_reverse($rss_array);
    //## Importing
    foreach($rss_array as $node) 
    {    
      $postDate = $node->getElementsByTagName('pubDate')->item(0)->nodeValue;  
      $title = $node->getElementsByTagName('title')->item(0)->nodeValue;      
      $guid = $node->getElementsByTagName('guid')->item(0)->nodeValue;  
      $guid = str_ireplace($commonGuid, "", $guid);
      
      if ($isOutput) echo "### Found Item: ".$title."<br/>";       
      if (in_array($guid, $postedItemsList)) { if ($isOutput) echo "&nbsp;&nbsp;&nbsp;&nbsp;### ".$guid." ******* Skipped<br/>"; continue; }      
      
Ejemplo n.º 5
0
http://api.tudou.com/v6/video/info?app_key=23365aeb6a339f06&format=xml&itemCodes=sample
=====
2.通过API获取视频信息(XML格式)
3.分析XML,并获得无广告地址
4.输出
*/
if ($_POST['tudou']) {
    //1.获取视频的ID
    $tudou = $_POST['tudou'];
    $id = basename($tudou, '.html');
    $app_key = '23365aeb6a339f06';
    $api = 'http://api.tudou.com/v6/video/info?app_key=' . $app_key . '&format=xml&itemCodes=' . $id;
    $xml = file_get_contents($api);
    $dom = new DOMdocument('1.0', 'utf-8');
    $dom->loadxml($xml);
    $nl = $dom->getElementsByTagName('outerGPlayerUrl');
    $tudou2 = $nl->item(0)->textContent;
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="zh-CN">
<head>
<title>xml demo-利用XML获取无广告土豆视屏</title>
<p></p>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="description" content="" />
</head>
    <body>
    	<h1>感觉不会再看土豆广告了</h1>    	
    	<p>测试地址:</p>
    	<table border="1">
function crawl_product_details($ASIN_URL, $ASIN, $uniqid)
{
    // Crawl details from amazon
    try {
        $image1 = $image2 = $image3 = $image4 = $image5 = $image6 = $image7 = $image8 = $image9 = $image10 = '';
        $body = file_get_contents($ASIN_URL);
        $title = $brand = $list_price = $price = $you_save = $shipping = $category = $description = $feature = '';
        $doc = new DOMdocument();
        libxml_use_internal_errors(true);
        $doc->loadHTML($body);
        $docxpath = new DOMXpath($doc);
        $title = $doc->getElementById('productTitle');
        $brand = $doc->getElementById('brand');
        $price = $doc->getElementById('priceblock_ourprice');
        $you_save = $doc->getElementById('regularprice_savings');
        $feature = $doc->getElementById('feature-bullets');
        $available = FALSE;
        $shipping = "FREE";
        $availablity_element = $docxpath->query('//span[@class="a-size-medium a-color-success"]');
        foreach ($availablity_element as $availablity) {
            $available = TRUE;
        }
        if ($available == FALSE) {
            return;
        }
        if (isset($title->textContent)) {
            $title = $title->textContent;
        }
        if (isset($brand->textContent)) {
            $brand = $brand->textContent;
        }
        if (isset($price->textContent)) {
            $price = $price->textContent;
        }
        if (isset($you_save->textContent)) {
            $you_save = $you_save->textContent;
        }
        if (isset($feature->textContent)) {
            $feature = $feature->textContent;
        }
        $image_element = $doc->getElementById('landingImage');
        $images = $image_element->getAttribute('data-a-dynamic-image');
        $regex = '/\\b(https?|ftp|file|http):\\/\\/[-A-Z0-9+&@#\\/%?=~_|$!:,.;]*[A-Z0-9+&@#\\/%=~_|$]/i';
        preg_match_all($regex, $images, $matches);
        $urls = $matches[0];
        // go over all links
        $i = 0;
        foreach ($urls as $url) {
            if (strpos($url, 'L.jpg') !== false) {
                $i++;
                ${'image' . $i} = $url;
            }
        }
        $list_prices = $docxpath->query('//td[@class="a-span12 a-color-secondary a-size-base a-text-strike"]');
        foreach ($list_prices as $list_price) {
            $list_price = $list_price->textContent;
        }
        $shipping_prices = $docxpath->query('//span[@class="a-size-small a-color-secondary shipping3P"]');
        foreach ($shipping_prices as $shipping_price) {
            $shipping_price = $shipping_price->textContent;
            $shipping_price = preg_replace('/[a-zA-Z]+/', '', $shipping_price);
            $shipping_price = str_replace('+', '', $shipping_price);
            $shipping_price;
        }
        $category_element = $docxpath->query('//a[@class="nav-a nav-b"]');
        foreach ($category_element as $category_meta) {
            $category = $category_meta->textContent;
        }
        $description_element = $doc->getElementsByTagName('meta');
        foreach ($description_element as $description_meta) {
            if ($description_meta->getAttribute('name') === 'description') {
                $description = $description_meta->getAttribute('content');
            }
        }
        /*
          $Product = array(
          'ASIN' => $ASIN,
          'Title' => $title,
          'Brand' => $brand,
          'List Price' => $list_price,
          'Price' => $price,
          'You Save' => $you_save,
          'Shipping' => $shipping,
          'Category' => $category,
          'Description' => $description,
          'Feature' => $feature,
          'Image1' => $image1,
          'Image2' => $image2,
          'Image3' => $image3,
          'Image4' => $image4,
          'Image5' => $image5,
          'Image6' => $image6,
          'Image7' => $image7,
          'Image8' => $image8,
          'Image9' => $image9,
          'Image10' => $image10
        );*/
        $product = array($ASIN, $title, $brand, $list_price, $price, $you_save, $shipping, $category, $description, $feature, $image1, $image2, $image3, $image4, $image5, $image6, $image7, $image8, $image9, $image10);
        $GLOBALS['count'] = $GLOBALS['count'] + 1;
        if ($GLOBALS['count'] % 100 == 0) {
            sleep(5);
        }
        $upload_dir = wp_upload_dir();
        $fileOutput = $upload_dir['path'] . '/' . $uniqid . '.csv';
        $fp = fopen($fileOutput, "a");
        fputcsv($fp, $product);
        fclose($fp);
    } catch (Exception $e) {
        print $e;
    }
}