protected function customScraper($xpath, $urlComponents) { // This sample is really just a copy of the code from the default Handler // with arbitrary values filled in for the parent's private functions // All logic should be implemented in this function echo "Bam! We're in the custom Handler."; $xpathQuery = '/html/head/title'; $title = parent::getTitle($xpath, $xpathQuery); $price = 10000000.0; //arbitrary $xpathQuery = '/html/head/meta'; $descriptionNodeArray = parent::getDescription($xpath, $xpathQuery); $description = "a description metatag has not been found"; for ($i = 0; $i < $descriptionNodeArray->length; $i++) { $descriptionNode = $descriptionNodeArray->item($i); $descriptionNode->getAttribute('name') == 'description' || $descriptionNode->getAttribute('name') == 'Description' ? $description = $descriptionNode->getAttribute('content') : 0; } $imageWidthThreshold = 200; $xpathQuery = '/html/body/descendant::img'; $productImages = parent::getImages($xpath, $xpathQuery, $imageWidthThreshold); $normalizedUrl = "This is a fake normalized URL. Unimplemented"; //arbitrary //assemble return array $scrapedValues = array(); $scrapedValues[] = $title; $scrapedValues[] = $price; $scrapedValues[] = $description; $scrapedValues[] = $productImages; $scrapedValues[] = $normalizedUrl; return $scrapedValues; }
<?php require_once 'ProductScraper.php'; // Test urls /* http://www.zappos.com/lucky-brand-abbey-road-dune?zlfid=111 http://store.americanapparel.net/rsa642sg.html http://www.llbean.com/webapp/wcs/stores/servlet/CategoryDisplay?storeId=1&catalogId=1&langId=-1&categoryId=18584&productId=91702&qs=3009652 */ $scrapedValues = array(); $scrapedValues = ProductScraper::getInfo('http://store.americanapparel.net/rsa642sg.html '); //testing output echo "Title: {$scrapedValues['0']} <br />"; echo "Price: " . $scrapedValues[1] . "<br />"; echo "Description: {$scrapedValues['2']} <br />"; echo "Images: "; print_r($scrapedValues[3]); echo '<br />'; echo "Normalized url: {$scrapedValues['4']} <br />"; ?>