Пример #1
0
    public function testCanScrapeTitle()
    {
        $testString = '<div><a href="alink.com" >
	                                        A Link
        </a></div>';
        $testOutcome = 'A Link';
        $productListItemScraper = new ProductListItemScraper($testString);
        $this->assertEquals($testOutcome, $productListItemScraper->scrapeTitle());
    }
Пример #2
0
 public function scrape()
 {
     //use Guzzle to get page details (cookies on for js load)
     $client = new Client(['cookies' => true]);
     $response = $client->request('GET', $this->getUrl());
     //get the product list string from the page body
     $productList = $this->getProductListString($response->getBody());
     //convert list to array (removing first element as its before the list elements)
     $productListArray = explode('<li>', $productList);
     array_shift($productListArray);
     $totalCost = 0;
     $finalArray = [];
     //for each product in list scrape the data and add to total
     foreach ($productListArray as $listEntry) {
         //send list item html to list scraper which creates a ProductInfo object
         $productListItemScraper = new ProductListItemScraper($listEntry);
         $productListItemScraper->scrape();
         //send ProductInfo object to page scraper
         //which uses the link found in the list item to get further information
         $productPageScraper = new ProductPageScraper($productListItemScraper->getProductInfo());
         $productPageScraper->scrape();
         //and finally get complete productInfo object
         $productInfo = $productPageScraper->getProductInfo();
         //add cost to running total and get formatted array from info object
         $totalCost = $totalCost + floatval($productInfo->getUnitCost());
         $finalArray[] = ScraperLibrary::productInfoArrayDecorator($productInfo);
     }
     return ['results' => $finalArray, 'total' => $totalCost];
 }