Ejemplo n.º 1
0
 /**
  * getProducts()
  * Scrapes the products and returns them in a unified JSON format
  */
 public function getProducts()
 {
     // Load HTML into scraper
     $scraper = new Crawler($this->source);
     // Get products
     $results = $scraper->filter('#productLister .product');
     $products = [];
     foreach ($results as $product) {
         // Create new crawler for each product iteration
         $product = new Crawler($product);
         // Get title
         $title = trim($product->filter('h3')->first()->text());
         // Get link
         $link = trim($product->filter('h3 a')->first()->attr('href'));
         // Get unit_price
         $unit_price = preg_replace('/[^\\d+\\.]/', '', $product->filter('.pricePerUnit')->first()->text());
         try {
             // Get secondary resource
             Console::log('Following product link: ' . $link);
             $resource = $this->client->request('GET', $link);
             // Get size / Content-Length
             $size = number_format($resource->getHeader('Content-Length')[0] / 1024, 2);
             // Create tertiary crawler to get description
             $productDetails = new Crawler($resource->getBody()->getContents());
             // Get description
             $description = trim($productDetails->filter('#information .productText')->first()->text());
         } catch (RequestException $e) {
             Console::log('An error occurred while trying to fetch details for a product. I skipped this one for now. Error: ' . $e->getMessage());
             continue;
         }
         // Collect item
         $products[] = ['title' => $title, 'unit_price' => $unit_price, 'size' => $size . 'kb', 'description' => $description];
     }
     // Get total value and namespace the array
     $totalSum = array_sum(array_column($products, 'unit_price'));
     try {
         // Transform collection
         $products = $this->transformCollection($products);
         // Namespace collection and encode to JSON
         $json = $this->toJson(['results' => $products, 'totals' => $totalSum]);
     } catch (JsonException $e) {
         // JSON transformation failed, I'd usually also log this incident here with Monolog or similar
         Console::log('An error occurred while transforming the products into the appropriate JSON format: ' . $e->getMessage());
         return false;
     }
     return $json;
 }
Ejemplo n.º 2
0
<?php

// Mainly for PSR-4 auto-loading
require_once dirname(__FILE__) . '/vendor/autoload.php';
// Initiate Scraper
$scraper = new \Sainsbury\HtmlScraper();
$client = new \GuzzleHttp\Client();
// Logging
\Sainsbury\Console::log('Getting initial source...');
$source = $client->request('GET', 'http://hiring-tests.s3-website-eu-west-1.amazonaws.com/2015_Developer_Scrape/5_products.html');
// Get Products
$products = $scraper->setSource($source->getBody()->getContents())->getProducts();
// Show Products
print_r($products);