示例#1
0
 public function testCanDecorateInfo()
 {
     $productInfo = new ProductInfo();
     $productInfo->setTitle('title');
     $productInfo->setSize('size');
     $productInfo->setDescription('description');
     $productInfo->setUnitCost('unit_price');
     $expected = ['title' => 'title', 'size' => 'size', 'unit_price' => 'unit_price', 'description' => 'description'];
     $this->assertEquals($expected, ScraperLibrary::productInfoArrayDecorator($productInfo));
 }
示例#2
0
 /**
  * scrape description from product page and get page size
  *
  * @return ProductInfo
  */
 public function scrape()
 {
     //get response to page request
     $client = new Client(['cookies' => true]);
     $response = $client->request('GET', $this->getProductInfo()->getLink());
     //try to get size from header, if not found fall back to calculating from page body
     if (!($size = $response->getHeader('Content-Length'))) {
         $size = $response->getBody()->getSize();
         $size = ScraperLibrary::formatBytes($size, 1);
     }
     $this->getProductInfo()->setSize($size);
     $this->getProductInfo()->setDescription($this->scrapeProductDescription($response->getBody()));
     return $this->getProductInfo();
 }
 /**
  * scraper to find link to item page from product list item
  */
 public function scrapeLink()
 {
     return ScraperLibrary::findDelimitedContent($this->getString(), $this->getLinkStartMarker(), $this->getLinkEndMarker());
 }
示例#4
0
 public function scrape()
 {
     //use Guzzle to get page details (cookies on for js load)
     $client = new Client(['cookies' => true]);
     $response = $client->request('GET', $this->getUrl());
     //get the product list string from the page body
     $productList = $this->getProductListString($response->getBody());
     //convert list to array (removing first element as its before the list elements)
     $productListArray = explode('<li>', $productList);
     array_shift($productListArray);
     $totalCost = 0;
     $finalArray = [];
     //for each product in list scrape the data and add to total
     foreach ($productListArray as $listEntry) {
         //send list item html to list scraper which creates a ProductInfo object
         $productListItemScraper = new ProductListItemScraper($listEntry);
         $productListItemScraper->scrape();
         //send ProductInfo object to page scraper
         //which uses the link found in the list item to get further information
         $productPageScraper = new ProductPageScraper($productListItemScraper->getProductInfo());
         $productPageScraper->scrape();
         //and finally get complete productInfo object
         $productInfo = $productPageScraper->getProductInfo();
         //add cost to running total and get formatted array from info object
         $totalCost = $totalCost + floatval($productInfo->getUnitCost());
         $finalArray[] = ScraperLibrary::productInfoArrayDecorator($productInfo);
     }
     return ['results' => $finalArray, 'total' => $totalCost];
 }