/** * Run the Command * * @param InputInterface $input * @param OutputInterface $output * @return void */ protected function execute(InputInterface $input, OutputInterface $output) { $guzzleClient = new Client(); $scraper = new Scraper(new Crawler(), new LinkScraper($guzzleClient)); try { // We need to enable cookie to be able to get the proper page source $cookieJar = new CookieJar(); $response = $guzzleClient->get($this->uri, ['cookies' => $cookieJar]); $html = (string) $response->getBody(); $scraper->setHtml($html); $scraper->process(); echo $scraper->toPrettyJson(); } catch (\Exception $e) { $output->writeln(sprintf('<error>%s</error>', 'Error occurred')); var_dump($e->getMessage()); exit(1); } }
public function test_scraper() { // we want to intercept the product description link request // and return the cached version of the page // it's not very efficient to hit HTTP request every time we run the tests $this->linkScraper->shouldReceive('fetchPageContentFor')->andReturn($this->fetchProductDescriptionPage()); $html = $this->fetchProductsPage(); $this->scraper->setHtml($html); $this->scraper->process(); // Check the first item $firstItem = $this->scraper->getFirstItem(); $this->assertEquals('Sainsbury\'s Apple, Strawberry, Grape & Blueberry 240g', $firstItem['title']); // We already know the size of cached page $this->assertEquals('43.77kb', $firstItem['size']); $this->assertEquals(1.75, $firstItem['unit_price']); $this->assertStringStartsWith('Apple, Strawberry, Grape & Blueberry', $firstItem['description']); // Check the last item $lastItem = $this->scraper->getLastItem(); $this->assertEquals('Sainsbury\'s Watermelon, Mango & Grape 240g', $lastItem['title']); $this->assertEquals('43.77kb', $lastItem['size']); $this->assertEquals(1.75, $lastItem['unit_price']); $this->assertStringStartsWith('Apple, Strawberry, Grape & Blueberry', $lastItem['description']); // Check json output $this->assertRegExp('/Apple, Strawberry, Grape & Blueberry/', $this->scraper->toPrettyJson()); }