示例#1
0
<?php

$cookies = '';
$setCookies = false;
ScrapeProductPage('http://www.nutritionxcellence.com/index.php/amino-acids/alri-chaind-out-blue-raspberry-60-srv-1253-detail#.UoYPlVDPHGA');
function ScrapeProductPage($URL)
{
    $prodPageSource = GetPageSource($URL);
    $dom = GetDomObject($prodPageSource);
    $xPath = GetXPathObject($dom);
    $Category = '';
    $Manufacturer = '';
    $ProductTitle = '';
    $ProductDescription = '';
    $SKUNumber = 'N/A';
    $SalesPrice = '';
    #(USD)
    $MSRPPrice = '';
    $ProductImages = '';
    $NutritionFacts = '';
    $OtherIngredients = '';
    $AllerginInfo = '';
    $Warnings = '';
    $title = $dom->getElementsByTagName('title')->item(0)->nodeValue;
    $titleArr = explode(':', $title);
    if (sizeof($titleArr) == 3) {
        $Category = trim($titleArr[0]);
        $Manufacturer = trim($titleArr[1]);
        $ProductTitle = trim($titleArr[2]);
    }
    $descNode = $xPath->query("//*[contains(@class, 'product-description')]");
            array_push($arr, $nextURL);
            $listingPageSource = GetProductsLinks($nextURL, $products);
            $nextURL = '';
        } else {
            break;
        }
    }
    $cats[$catName] = $products;
    echo 'Prodcuts colected from ' . $catName . '<br />';
}
echo sizeof($products) . ' found in all catgories' . '<br />';
foreach ($cats as $catName => $prodsURLs) {
    echo $catName . ':<br />';
    foreach ($prodsURLs as $productURL) {
        $productURL = preg_replace("/http:/", "https:", $productURL);
        ScrapeProductPage($productURL);
    }
}
function GetPageSource($URL)
{
    // is cURL installed yet?
    if (!function_exists('curl_init')) {
        die('Sorry cURL is not installed!');
    }
    // OK cool - then let's create a new cURL resource handle
    $ch = curl_init();
    // Now set some options (most are optional)
    // Set URL to download
    curl_setopt($ch, CURLOPT_URL, $URL);
    // Set a referer
    //curl_setopt($ch, CURLOPT_REFERER, "http://www.example.org/yay.htm");