<?php $cookies = ''; $setCookies = false; ScrapeProductPage('http://www.nutritionxcellence.com/index.php/amino-acids/alri-chaind-out-blue-raspberry-60-srv-1253-detail#.UoYPlVDPHGA'); function ScrapeProductPage($URL) { $prodPageSource = GetPageSource($URL); $dom = GetDomObject($prodPageSource); $xPath = GetXPathObject($dom); $Category = ''; $Manufacturer = ''; $ProductTitle = ''; $ProductDescription = ''; $SKUNumber = 'N/A'; $SalesPrice = ''; #(USD) $MSRPPrice = ''; $ProductImages = ''; $NutritionFacts = ''; $OtherIngredients = ''; $AllerginInfo = ''; $Warnings = ''; $title = $dom->getElementsByTagName('title')->item(0)->nodeValue; $titleArr = explode(':', $title); if (sizeof($titleArr) == 3) { $Category = trim($titleArr[0]); $Manufacturer = trim($titleArr[1]); $ProductTitle = trim($titleArr[2]); } $descNode = $xPath->query("//*[contains(@class, 'product-description')]");
array_push($arr, $nextURL); $listingPageSource = GetProductsLinks($nextURL, $products); $nextURL = ''; } else { break; } } $cats[$catName] = $products; echo 'Prodcuts colected from ' . $catName . '<br />'; } echo sizeof($products) . ' found in all catgories' . '<br />'; foreach ($cats as $catName => $prodsURLs) { echo $catName . ':<br />'; foreach ($prodsURLs as $productURL) { $productURL = preg_replace("/http:/", "https:", $productURL); ScrapeProductPage($productURL); } } function GetPageSource($URL) { // is cURL installed yet? if (!function_exists('curl_init')) { die('Sorry cURL is not installed!'); } // OK cool - then let's create a new cURL resource handle $ch = curl_init(); // Now set some options (most are optional) // Set URL to download curl_setopt($ch, CURLOPT_URL, $URL); // Set a referer //curl_setopt($ch, CURLOPT_REFERER, "http://www.example.org/yay.htm");