Esempio n. 1
0
<?php

require_once 'simpletest/browser.php';
$browser =& new SimpleBrowser();
$browser->useCookies();
$source = GetPageSource('https://www.nutritionxcellence.com/protein');
$dom = GetDomObject($source);
$xpath = GetXPathObject($dom);
$subcCategoriesLinksNodes = $xpath->query('//a');
foreach ($subcCategoriesLinksNodes as $node1) {
    $ttt = $node1->attributes->item(0)->nodeValue;
    echo $ttt . '<br />';
}
echo 'asdf';
function GetPageSource($URL)
{
    $GLOBALS['browser']->get($URL);
    $output = $GLOBALS['browser']->getContent();
    return $output;
}
function GetDomObject($pageSource)
{
    $dom = new DOMDocument('1.0');
    libxml_use_internal_errors(true);
    if (!is_null($pageSource)) {
        $dom->loadHTML($pageSource);
        return $dom;
    } else {
        return;
    }
}
Esempio n. 2
0
function GetProductsLinks($catURL, &$products)
{
    $listingPageSource = GetPageSource($catURL);
    if (empty($listingPageSource)) {
        die('Sorry cURL failed to reterive page source');
    }
    $dom = GetDomObject($listingPageSource);
    $xpath = GetXPathObject($dom);
    $imgs = $dom->getElementsByTagName('img');
    foreach ($imgs as $img) {
        $imgParentAnchor = $img->parentNode->getAttribute('href');
        $imgParentAnchor = 'http://www.nutritionxcellence.com' . $imgParentAnchor;
        if (strpos($imgParentAnchor, '/index.php/') != false) {
            array_push($products, $imgParentAnchor);
        }
    }
    return $listingPageSource;
}
Esempio n. 3
0
function ScrapeProductPage($URL, $catName)
{
    $prodPageSource = GetPageSource($URL);
    $dom = GetDomObject($prodPageSource);
    $xPath = GetXPathObject($dom);
    $Category = '';
    $Manufacturer = '';
    $ProductTitle = '';
    $ProductDescription = '';
    $SalesPrice = '';
    #(USD)
    $MSRPPrice = '';
    $InStock = 'In stock';
    $ProductImages = '';
    $NutritionFacts = '';
    $OtherIngredients = '';
    $AllerginInfo = '';
    $Warnings = '';
    if (strpos($prodPageSource, 'Notify Me') != false) {
        $InStock = "Out of stock";
    }
    $Category = $catName;
    $manufacturerNode = $xPath->query("//*[contains(@class, 'manufacturer')]");
    if (!is_null($manufacturerNode)) {
        $Manufacturer = $manufacturerNode->item(0)->nodeValue;
        $Manufacturer = trim(str_replace('Manufacturer:', '', $Manufacturer));
    }
    $titleNode = $xPath->query('//*[@id="main"]/div[2]/h1');
    if (!is_null($titleNode)) {
        $title = $titleNode->item(0)->nodeValue;
        $ProductTitle = trim(str_replace("{$Manufacturer}:", '', $title));
    }
    $descNode = $xPath->query("//*[contains(@class, 'product-description')]");
    if (!is_null($descNode)) {
        $ProductDescription = $descNode->item(0)->nodeValue;
        $ProductDescription = trim(preg_replace('/Description/', '', $ProductDescription));
        $ProductDescription = PrepareField($ProductDescription);
    }
    $SalesPriceNode = $xPath->query("//*[contains(@class, 'PricesalesPrice')]");
    if (!is_null($SalesPriceNode)) {
        $SalesPrice = $SalesPriceNode->item(0)->nodeValue;
        if (preg_match("/\\d+\\.\\d+/", $SalesPrice, $matches)) {
            $SalesPrice = $matches[0];
        }
    }
    preg_match("/MSRP:.+?<s>(.+?)<\\/s>/", $prodPageSource, $matchs);
    if (!is_null($matchs)) {
        $MSRPPrice = $matchs[1];
    }
    $imageNode = $dom->getElementById('medium-image');
    if (!is_null($imageNode)) {
        $ProductImages = 'http://www.nutritionxcellence.com' . $imageNode->getAttribute('src');
    }
    for ($i = 1; $i <= 4; $i++) {
        $tabNode = $dom->getElementById("tabs-{$i}");
        if (!is_null($tabNode)) {
            switch ($i) {
                case 1:
                    $NutritionFacts = PrepareField($tabNode->nodeValue);
                    break;
                case 2:
                    $OtherIngredients = PrepareField($tabNode->nodeValue);
                    break;
                case 3:
                    $AllerginInfo = PrepareField($tabNode->nodeValue);
                    break;
                case 4:
                    $Warnings = PrepareField($tabNode->nodeValue);
            }
        }
    }
    # Push data to a csv file.
    $headerArray = array("Category", "Manufacturer", "ProductTitle", "ProductDescription", "SalesPrice", "MSRPPrice", "InStock", "ProductImages", "NutritionFacts", "OtherIngredients", "AllerginInfo", "Warnings");
    $rowArray = array("{$Category}", "{$Manufacturer}", "{$ProductTitle}", "{$ProductDescription}", "{$SalesPrice}", "{$MSRPPrice}", "{$InStock}", "{$ProductImages}", "{$NutritionFacts}", "{$OtherIngredients}", "{$AllerginInfo}", "{$Warnings}");
    PushDataToCSV('data.csv', $headerArray, $rowArray);
    echo $title . ' -- Done' . '<br />';
}