Exemple #1
0
function ScrapeProductPage($URL)
{
    $prodPageSource = GetPageSource($URL);
    $dom = GetDomObject($prodPageSource);
    $xPath = GetXPathObject($dom);
    $Category = '';
    $Manufacturer = '';
    $ProductTitle = '';
    $ProductDescription = '';
    $SKUNumber = 'N/A';
    $SalesPrice = '';
    #(USD)
    $MSRPPrice = '';
    $ProductImages = '';
    $NutritionFacts = '';
    $OtherIngredients = '';
    $AllerginInfo = '';
    $Warnings = '';
    $title = $dom->getElementsByTagName('title')->item(0)->nodeValue;
    $titleArr = explode(':', $title);
    if (sizeof($titleArr) == 3) {
        $Category = trim($titleArr[0]);
        $Manufacturer = trim($titleArr[1]);
        $ProductTitle = trim($titleArr[2]);
    }
    $descNode = $xPath->query("//*[contains(@class, 'product-description')]");
    if (!is_null($descNode)) {
        $ProductDescription = $descNode->item(0)->nodeValue;
        $ProductDescription = trim(preg_replace('/Description/', '', $ProductDescription));
        $ProductDescription = PrepareField($ProductDescription);
    }
    $SalesPriceNode = $xPath->query("//*[contains(@class, 'PricesalesPrice')]");
    if (!is_null($SalesPriceNode)) {
        $SalesPrice = $SalesPriceNode->item(0)->nodeValue;
    }
    preg_match("/MSRP:.+?<s>(.+?)<\\/s>/", $prodPageSource, $matchs);
    if (!is_null($matchs)) {
        $MSRPPrice = $matchs[1];
    }
    $imageNode = $dom->getElementById('medium-image');
    if (!is_null($imageNode)) {
        $ProductImages = 'http://www.nutritionxcellence.com' . $imageNode->getAttribute('src');
    }
    for ($i = 1; $i <= 4; $i++) {
        $tabNode = $dom->getElementById("tabs-{$i}");
        if (!is_null($tabNode)) {
            switch ($i) {
                case 1:
                    $NutritionFacts = PrepareField($tabNode->nodeValue);
                    break;
                case 2:
                    $OtherIngredients = PrepareField($tabNode->nodeValue);
                    break;
                case 3:
                    $AllerginInfo = PrepareField($tabNode->nodeValue);
                    break;
                case 4:
                    $Warnings = PrepareField($tabNode->nodeValue);
            }
        }
    }
    # Push data to a csv file.
    $headerArray = array("Category", "Manufacturer", "ProductTitle", "ProductDescription", "SKUNumber", "SalesPrice", "MSRPPrice", "ProductImages", "NutritionFacts", "OtherIngredients", "AllerginInfo", "Warnings");
    $rowArray = array("{$Category}", "{$Manufacturer}", "{$ProductTitle}", "{$ProductDescription}", "{$SKUNumber}", "{$SalesPrice}", "{$MSRPPrice}", "{$ProductImages}", "{$NutritionFacts}", "{$OtherIngredients}", "{$AllerginInfo}", "{$Warnings}");
    PushDataToCSV('data.csv', $headerArray, $rowArray);
    echo $title . ' -- Done' . '<br />';
}
function ScrapeProductPage($URL, $catName)
{
    $prodPageSource = GetPageSource($URL);
    $dom = GetDomObject($prodPageSource);
    $xPath = GetXPathObject($dom);
    $Category = '';
    $Manufacturer = '';
    $ProductTitle = '';
    $ProductDescription = '';
    $SalesPrice = '';
    #(USD)
    $MSRPPrice = '';
    $InStock = 'In stock';
    $ProductImages = '';
    $NutritionFacts = '';
    $OtherIngredients = '';
    $AllerginInfo = '';
    $Warnings = '';
    if (strpos($prodPageSource, 'Notify Me') != false) {
        $InStock = "Out of stock";
    }
    $Category = $catName;
    $manufacturerNode = $xPath->query("//*[contains(@class, 'manufacturer')]");
    if (!is_null($manufacturerNode)) {
        $Manufacturer = $manufacturerNode->item(0)->nodeValue;
        $Manufacturer = trim(str_replace('Manufacturer:', '', $Manufacturer));
    }
    $titleNode = $xPath->query('//*[@id="main"]/div[2]/h1');
    if (!is_null($titleNode)) {
        $title = $titleNode->item(0)->nodeValue;
        $ProductTitle = trim(str_replace("{$Manufacturer}:", '', $title));
    }
    $descNode = $xPath->query("//*[contains(@class, 'product-description')]");
    if (!is_null($descNode)) {
        $ProductDescription = $descNode->item(0)->nodeValue;
        $ProductDescription = trim(preg_replace('/Description/', '', $ProductDescription));
        $ProductDescription = PrepareField($ProductDescription);
    }
    $SalesPriceNode = $xPath->query("//*[contains(@class, 'PricesalesPrice')]");
    if (!is_null($SalesPriceNode)) {
        $SalesPrice = $SalesPriceNode->item(0)->nodeValue;
        if (preg_match("/\\d+\\.\\d+/", $SalesPrice, $matches)) {
            $SalesPrice = $matches[0];
        }
    }
    preg_match("/MSRP:.+?<s>(.+?)<\\/s>/", $prodPageSource, $matchs);
    if (!is_null($matchs)) {
        $MSRPPrice = $matchs[1];
    }
    $imageNode = $dom->getElementById('medium-image');
    if (!is_null($imageNode)) {
        $ProductImages = 'http://www.nutritionxcellence.com' . $imageNode->getAttribute('src');
    }
    for ($i = 1; $i <= 4; $i++) {
        $tabNode = $dom->getElementById("tabs-{$i}");
        if (!is_null($tabNode)) {
            switch ($i) {
                case 1:
                    $NutritionFacts = PrepareField($tabNode->nodeValue);
                    break;
                case 2:
                    $OtherIngredients = PrepareField($tabNode->nodeValue);
                    break;
                case 3:
                    $AllerginInfo = PrepareField($tabNode->nodeValue);
                    break;
                case 4:
                    $Warnings = PrepareField($tabNode->nodeValue);
            }
        }
    }
    # Push data to a csv file.
    $headerArray = array("Category", "Manufacturer", "ProductTitle", "ProductDescription", "SalesPrice", "MSRPPrice", "InStock", "ProductImages", "NutritionFacts", "OtherIngredients", "AllerginInfo", "Warnings");
    $rowArray = array("{$Category}", "{$Manufacturer}", "{$ProductTitle}", "{$ProductDescription}", "{$SalesPrice}", "{$MSRPPrice}", "{$InStock}", "{$ProductImages}", "{$NutritionFacts}", "{$OtherIngredients}", "{$AllerginInfo}", "{$Warnings}");
    PushDataToCSV('data.csv', $headerArray, $rowArray);
    echo $title . ' -- Done' . '<br />';
}