function rss_to_array($tag, $array, $url) { $doc = new DOMdocument(); $doc->load($url); $rss_array = array(); $items = array(); foreach ($doc->getElementsByTagName($tag) as $node) { foreach ($array as $key => $value) { $items[$value] = $node->getElementsByTagName($value)->item(0)->nodeValue; } array_push($rss_array, $items); } return $rss_array; }
//To remove warning that are pre-existing in a site extract($_POST); set_time_limit(150); session_start(); if (isset($submit)) { $title = ""; //title of the page $image_src = ""; //Src of a page $desc = ""; //decription of a page in text $dom = new DOMdocument(); $dom->loadHTMLFile($theUrl); if (strpos($theUrl, 'www.youtube.com/watch?v') !== false) { //echo "hi"; foreach ($dom->getElementsByTagName("link") as $link_tag) { if ($link_tag->getAttribute('itemprop') == 'embedURL') { $image_src = $link_tag->getAttribute('href'); break; } } } /** Image Array **/ $image_src_attr = array('link' => '1', 'aspect-ratio' => 10000); //Title of a page foreach ($dom->getElementsByTagName("meta") as $meta_ob) { // echo"</br>"; for testing this code $meta_content = $meta_ob->getAttribute("content"); $meta_name = $meta_ob->getAttribute("name"); // echo $meta_name.$meta_content;//for testing this code if (strcasecmp($meta_name, 'og_title') == 0) {
/** * rss_fetch * * Retrieves rss feed of $url provided in an array. Can * choose number of array items to fetch, and to start at * a certain number. * * @param mixed $url to fetch rss feed from * @param string $tagname , optional "item" is the default * @access public * @return array or bool false */ function rss_fetch($url, $tagname = 'item') { $dom = new DOMdocument(); $success = $dom->load($url); if (!$success) { return false; } $elements = $dom->getElementsByTagName($tagname); $items = array(); foreach ($elements as $element) { $item = array(); if ($element->childNodes->length) { foreach ($element->childNodes as $node) { $item[$node->nodeName] = $node->nodeValue; } $items[] = $item; } } return $items; }
$postedItemsList = nxs_loadFromOKV($okv); prr($postedItemsList); if ($postedItemsList!='') $postedItemsList = explode("\n",$postedItemsList); else $postedItemsList = array(); prr($postedItemsList); global $nxs_gCookiesArr; $loginError = doConnectToGooglePlus2($email, $pass); if (!$loginError) { if ($isOutput) echo "### Logged In - No Problems<br/>"; $doc = new DOMdocument(); $doc->load($url); $rss_array = array(); $items = array(); $tag = 'item'; //## Reversing RSS foreach($doc->getElementsByTagName($tag) as $node) { $rss_array[] = $node; } $rss_array = array_reverse($rss_array); //## Importing foreach($rss_array as $node) { $postDate = $node->getElementsByTagName('pubDate')->item(0)->nodeValue; $title = $node->getElementsByTagName('title')->item(0)->nodeValue; $guid = $node->getElementsByTagName('guid')->item(0)->nodeValue; $guid = str_ireplace($commonGuid, "", $guid); if ($isOutput) echo "### Found Item: ".$title."<br/>"; if (in_array($guid, $postedItemsList)) { if ($isOutput) echo " ### ".$guid." ******* Skipped<br/>"; continue; }
http://api.tudou.com/v6/video/info?app_key=23365aeb6a339f06&format=xml&itemCodes=sample ===== 2.通过API获取视频信息(XML格式) 3.分析XML,并获得无广告地址 4.输出 */ if ($_POST['tudou']) { //1.获取视频的ID $tudou = $_POST['tudou']; $id = basename($tudou, '.html'); $app_key = '23365aeb6a339f06'; $api = 'http://api.tudou.com/v6/video/info?app_key=' . $app_key . '&format=xml&itemCodes=' . $id; $xml = file_get_contents($api); $dom = new DOMdocument('1.0', 'utf-8'); $dom->loadxml($xml); $nl = $dom->getElementsByTagName('outerGPlayerUrl'); $tudou2 = $nl->item(0)->textContent; } ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="zh-CN"> <head> <title>xml demo-利用XML获取无广告土豆视屏</title> <p></p> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <meta name="description" content="" /> </head> <body> <h1>感觉不会再看土豆广告了</h1> <p>测试地址:</p> <table border="1">
function crawl_product_details($ASIN_URL, $ASIN, $uniqid) { // Crawl details from amazon try { $image1 = $image2 = $image3 = $image4 = $image5 = $image6 = $image7 = $image8 = $image9 = $image10 = ''; $body = file_get_contents($ASIN_URL); $title = $brand = $list_price = $price = $you_save = $shipping = $category = $description = $feature = ''; $doc = new DOMdocument(); libxml_use_internal_errors(true); $doc->loadHTML($body); $docxpath = new DOMXpath($doc); $title = $doc->getElementById('productTitle'); $brand = $doc->getElementById('brand'); $price = $doc->getElementById('priceblock_ourprice'); $you_save = $doc->getElementById('regularprice_savings'); $feature = $doc->getElementById('feature-bullets'); $available = FALSE; $shipping = "FREE"; $availablity_element = $docxpath->query('//span[@class="a-size-medium a-color-success"]'); foreach ($availablity_element as $availablity) { $available = TRUE; } if ($available == FALSE) { return; } if (isset($title->textContent)) { $title = $title->textContent; } if (isset($brand->textContent)) { $brand = $brand->textContent; } if (isset($price->textContent)) { $price = $price->textContent; } if (isset($you_save->textContent)) { $you_save = $you_save->textContent; } if (isset($feature->textContent)) { $feature = $feature->textContent; } $image_element = $doc->getElementById('landingImage'); $images = $image_element->getAttribute('data-a-dynamic-image'); $regex = '/\\b(https?|ftp|file|http):\\/\\/[-A-Z0-9+&@#\\/%?=~_|$!:,.;]*[A-Z0-9+&@#\\/%=~_|$]/i'; preg_match_all($regex, $images, $matches); $urls = $matches[0]; // go over all links $i = 0; foreach ($urls as $url) { if (strpos($url, 'L.jpg') !== false) { $i++; ${'image' . $i} = $url; } } $list_prices = $docxpath->query('//td[@class="a-span12 a-color-secondary a-size-base a-text-strike"]'); foreach ($list_prices as $list_price) { $list_price = $list_price->textContent; } $shipping_prices = $docxpath->query('//span[@class="a-size-small a-color-secondary shipping3P"]'); foreach ($shipping_prices as $shipping_price) { $shipping_price = $shipping_price->textContent; $shipping_price = preg_replace('/[a-zA-Z]+/', '', $shipping_price); $shipping_price = str_replace('+', '', $shipping_price); $shipping_price; } $category_element = $docxpath->query('//a[@class="nav-a nav-b"]'); foreach ($category_element as $category_meta) { $category = $category_meta->textContent; } $description_element = $doc->getElementsByTagName('meta'); foreach ($description_element as $description_meta) { if ($description_meta->getAttribute('name') === 'description') { $description = $description_meta->getAttribute('content'); } } /* $Product = array( 'ASIN' => $ASIN, 'Title' => $title, 'Brand' => $brand, 'List Price' => $list_price, 'Price' => $price, 'You Save' => $you_save, 'Shipping' => $shipping, 'Category' => $category, 'Description' => $description, 'Feature' => $feature, 'Image1' => $image1, 'Image2' => $image2, 'Image3' => $image3, 'Image4' => $image4, 'Image5' => $image5, 'Image6' => $image6, 'Image7' => $image7, 'Image8' => $image8, 'Image9' => $image9, 'Image10' => $image10 );*/ $product = array($ASIN, $title, $brand, $list_price, $price, $you_save, $shipping, $category, $description, $feature, $image1, $image2, $image3, $image4, $image5, $image6, $image7, $image8, $image9, $image10); $GLOBALS['count'] = $GLOBALS['count'] + 1; if ($GLOBALS['count'] % 100 == 0) { sleep(5); } $upload_dir = wp_upload_dir(); $fileOutput = $upload_dir['path'] . '/' . $uniqid . '.csv'; $fp = fopen($fileOutput, "a"); fputcsv($fp, $product); fclose($fp); } catch (Exception $e) { print $e; } }