function ScrapeProductPage($URL) { $prodPageSource = GetPageSource($URL); $dom = GetDomObject($prodPageSource); $xPath = GetXPathObject($dom); $Category = ''; $Manufacturer = ''; $ProductTitle = ''; $ProductDescription = ''; $SKUNumber = 'N/A'; $SalesPrice = ''; #(USD) $MSRPPrice = ''; $ProductImages = ''; $NutritionFacts = ''; $OtherIngredients = ''; $AllerginInfo = ''; $Warnings = ''; $title = $dom->getElementsByTagName('title')->item(0)->nodeValue; $titleArr = explode(':', $title); if (sizeof($titleArr) == 3) { $Category = trim($titleArr[0]); $Manufacturer = trim($titleArr[1]); $ProductTitle = trim($titleArr[2]); } $descNode = $xPath->query("//*[contains(@class, 'product-description')]"); if (!is_null($descNode)) { $ProductDescription = $descNode->item(0)->nodeValue; $ProductDescription = trim(preg_replace('/Description/', '', $ProductDescription)); $ProductDescription = PrepareField($ProductDescription); } $SalesPriceNode = $xPath->query("//*[contains(@class, 'PricesalesPrice')]"); if (!is_null($SalesPriceNode)) { $SalesPrice = $SalesPriceNode->item(0)->nodeValue; } preg_match("/MSRP:.+?<s>(.+?)<\\/s>/", $prodPageSource, $matchs); if (!is_null($matchs)) { $MSRPPrice = $matchs[1]; } $imageNode = $dom->getElementById('medium-image'); if (!is_null($imageNode)) { $ProductImages = 'http://www.nutritionxcellence.com' . $imageNode->getAttribute('src'); } for ($i = 1; $i <= 4; $i++) { $tabNode = $dom->getElementById("tabs-{$i}"); if (!is_null($tabNode)) { switch ($i) { case 1: $NutritionFacts = PrepareField($tabNode->nodeValue); break; case 2: $OtherIngredients = PrepareField($tabNode->nodeValue); break; case 3: $AllerginInfo = PrepareField($tabNode->nodeValue); break; case 4: $Warnings = PrepareField($tabNode->nodeValue); } } } # Push data to a csv file. $headerArray = array("Category", "Manufacturer", "ProductTitle", "ProductDescription", "SKUNumber", "SalesPrice", "MSRPPrice", "ProductImages", "NutritionFacts", "OtherIngredients", "AllerginInfo", "Warnings"); $rowArray = array("{$Category}", "{$Manufacturer}", "{$ProductTitle}", "{$ProductDescription}", "{$SKUNumber}", "{$SalesPrice}", "{$MSRPPrice}", "{$ProductImages}", "{$NutritionFacts}", "{$OtherIngredients}", "{$AllerginInfo}", "{$Warnings}"); PushDataToCSV('data.csv', $headerArray, $rowArray); echo $title . ' -- Done' . '<br />'; }
function ScrapeProductPage($URL, $catName) { $prodPageSource = GetPageSource($URL); $dom = GetDomObject($prodPageSource); $xPath = GetXPathObject($dom); $Category = ''; $Manufacturer = ''; $ProductTitle = ''; $ProductDescription = ''; $SalesPrice = ''; #(USD) $MSRPPrice = ''; $InStock = 'In stock'; $ProductImages = ''; $NutritionFacts = ''; $OtherIngredients = ''; $AllerginInfo = ''; $Warnings = ''; if (strpos($prodPageSource, 'Notify Me') != false) { $InStock = "Out of stock"; } $Category = $catName; $manufacturerNode = $xPath->query("//*[contains(@class, 'manufacturer')]"); if (!is_null($manufacturerNode)) { $Manufacturer = $manufacturerNode->item(0)->nodeValue; $Manufacturer = trim(str_replace('Manufacturer:', '', $Manufacturer)); } $titleNode = $xPath->query('//*[@id="main"]/div[2]/h1'); if (!is_null($titleNode)) { $title = $titleNode->item(0)->nodeValue; $ProductTitle = trim(str_replace("{$Manufacturer}:", '', $title)); } $descNode = $xPath->query("//*[contains(@class, 'product-description')]"); if (!is_null($descNode)) { $ProductDescription = $descNode->item(0)->nodeValue; $ProductDescription = trim(preg_replace('/Description/', '', $ProductDescription)); $ProductDescription = PrepareField($ProductDescription); } $SalesPriceNode = $xPath->query("//*[contains(@class, 'PricesalesPrice')]"); if (!is_null($SalesPriceNode)) { $SalesPrice = $SalesPriceNode->item(0)->nodeValue; if (preg_match("/\\d+\\.\\d+/", $SalesPrice, $matches)) { $SalesPrice = $matches[0]; } } preg_match("/MSRP:.+?<s>(.+?)<\\/s>/", $prodPageSource, $matchs); if (!is_null($matchs)) { $MSRPPrice = $matchs[1]; } $imageNode = $dom->getElementById('medium-image'); if (!is_null($imageNode)) { $ProductImages = 'http://www.nutritionxcellence.com' . $imageNode->getAttribute('src'); } for ($i = 1; $i <= 4; $i++) { $tabNode = $dom->getElementById("tabs-{$i}"); if (!is_null($tabNode)) { switch ($i) { case 1: $NutritionFacts = PrepareField($tabNode->nodeValue); break; case 2: $OtherIngredients = PrepareField($tabNode->nodeValue); break; case 3: $AllerginInfo = PrepareField($tabNode->nodeValue); break; case 4: $Warnings = PrepareField($tabNode->nodeValue); } } } # Push data to a csv file. $headerArray = array("Category", "Manufacturer", "ProductTitle", "ProductDescription", "SalesPrice", "MSRPPrice", "InStock", "ProductImages", "NutritionFacts", "OtherIngredients", "AllerginInfo", "Warnings"); $rowArray = array("{$Category}", "{$Manufacturer}", "{$ProductTitle}", "{$ProductDescription}", "{$SalesPrice}", "{$MSRPPrice}", "{$InStock}", "{$ProductImages}", "{$NutritionFacts}", "{$OtherIngredients}", "{$AllerginInfo}", "{$Warnings}"); PushDataToCSV('data.csv', $headerArray, $rowArray); echo $title . ' -- Done' . '<br />'; }