/** * Test that all unit prices are returned in decimal format from $testPageSource. */ public function testRetrieveNodeValuesFromDomWithClean() { $domDocument = new DomDocument(); @$domDocument->loadHTML($this->testPageSource); $titleXPath = 'id(\'productInfo\')/ul/li[@class="unitPrice"]'; $nodeValues = WebScraper::retrieveNodeValuesFromDom($domDocument, $titleXPath, '/[^0-9\\.]/'); $this->assertEquals($nodeValues[0], "1.00"); $this->assertEquals($nodeValues[1], "2.00"); $this->assertEquals($nodeValues[2], "3.00"); }
$linkXPath = 'id(\'productLister\')/ul/li/div/div/div/div/h3/a/@href'; $productLinks = WebScraper::retrieveNodeValuesFromDom($domDocument, $linkXPath); // Retrieve an array of product unit prices (decimal format) that appear on the main URL. $pricesXPath = '//p[contains(@class,"pricePerUnit")]'; $productPrices = WebScraper::retrieveNodeValuesFromDom($domDocument, $pricesXPath, '/[^0-9\\.]/'); // Retrieve the size of each product link found. $linkSizes = array(); $productDescriptions = array(); foreach ($productLinks as $link) { $linkSource = WebScraper::retrievePageSourceSize($link); array_push($linkSizes, $linkSource['size']); // Retrieve an array of product descriptions that appear on the link. $domDocument = new DomDocument(); @$domDocument->loadHTML($linkSource['source']); $descriptionXPath = 'id(\'information\')/productcontent/htmlcontent/div[1]/p[1]'; $productDescription = WebScraper::retrieveNodeValuesFromDom($domDocument, $descriptionXPath); array_push($productDescriptions, $productDescription[0]); } // The product data attributes are all present on the main URL. So the productTitle // key can be used across the other arrays to build a complete product record. $results = array(); foreach ($productTitles as $key => $value) { $product = array('title' => $productTitles[$key], 'size' => $linkSizes[$key], 'unit_price' => $productPrices[$key], 'description' => $productDescriptions[$key]); array_push($results, $product); } // Calculate the total of all product unit prices. $priceTotal = array_sum($productPrices); $summary_keys = array('results', 'total'); $summary_values = array($results, $priceTotal); $summary = array_combine($summary_keys, $summary_values); $json_summary = json_encode($summary, JSON_PRETTY_PRINT);