public function testScraping() { $this->assertFileExists($this->htmlPage); $html = file_get_contents($this->htmlPage); // Crawler need root url, however the initial page is loaded from $htmlPage variable $this->crawler = new Crawler('', 'http://hiring-tests.s3-website-eu-west-1.amazonaws.com'); // Load html $this->crawler->addHtmlContent($html); $client = new \Goutte\Client(); $scraper = new \App\Scraper($client); $scraper->setXpath('.product'); // Set crawler directly $scraper->setCrawler($this->crawler); // Page is null as we bypass this by setting the crawler on the above line $results = $scraper->scrapePage(""); $this->assertEquals(json_encode($this->_getExpectedResults()), json_encode($results)); }
<?php namespace App; require "../vendor/autoload.php"; $page = 'http://hiring-tests.s3-website-eu-west-1.amazonaws.com/2015_Developer_Scrape/5_products.html'; $client = new \Goutte\Client(); $scraper = new \App\Scraper($client); $scraper->setXpath('.product'); print_r(json_encode($scraper->scrapePage($page), JSON_PRETTY_PRINT)); print "\n";