示例#1
0
 /**
  * Test that all unit prices are returned in decimal format from $testPageSource.
  */
 public function testRetrieveNodeValuesFromDomWithClean()
 {
     $domDocument = new DomDocument();
     @$domDocument->loadHTML($this->testPageSource);
     $titleXPath = 'id(\'productInfo\')/ul/li[@class="unitPrice"]';
     $nodeValues = WebScraper::retrieveNodeValuesFromDom($domDocument, $titleXPath, '/[^0-9\\.]/');
     $this->assertEquals($nodeValues[0], "1.00");
     $this->assertEquals($nodeValues[1], "2.00");
     $this->assertEquals($nodeValues[2], "3.00");
 }
示例#2
0
$linkXPath = 'id(\'productLister\')/ul/li/div/div/div/div/h3/a/@href';
$productLinks = WebScraper::retrieveNodeValuesFromDom($domDocument, $linkXPath);
// Retrieve an array of product unit prices (decimal format) that appear on the main URL.
$pricesXPath = '//p[contains(@class,"pricePerUnit")]';
$productPrices = WebScraper::retrieveNodeValuesFromDom($domDocument, $pricesXPath, '/[^0-9\\.]/');
// Retrieve the size of each product link found.
$linkSizes = array();
$productDescriptions = array();
foreach ($productLinks as $link) {
    $linkSource = WebScraper::retrievePageSourceSize($link);
    array_push($linkSizes, $linkSource['size']);
    // Retrieve an array of product descriptions that appear on the link.
    $domDocument = new DomDocument();
    @$domDocument->loadHTML($linkSource['source']);
    $descriptionXPath = 'id(\'information\')/productcontent/htmlcontent/div[1]/p[1]';
    $productDescription = WebScraper::retrieveNodeValuesFromDom($domDocument, $descriptionXPath);
    array_push($productDescriptions, $productDescription[0]);
}
// The product data attributes are all present on the main URL. So the productTitle
// key can be used across the other arrays to build a complete product record.
$results = array();
foreach ($productTitles as $key => $value) {
    $product = array('title' => $productTitles[$key], 'size' => $linkSizes[$key], 'unit_price' => $productPrices[$key], 'description' => $productDescriptions[$key]);
    array_push($results, $product);
}
// Calculate the total of all product unit prices.
$priceTotal = array_sum($productPrices);
$summary_keys = array('results', 'total');
$summary_values = array($results, $priceTotal);
$summary = array_combine($summary_keys, $summary_values);
$json_summary = json_encode($summary, JSON_PRETTY_PRINT);