public function testCanScrapeTitle() { $testString = '<div><a href="alink.com" > A Link </a></div>'; $testOutcome = 'A Link'; $productListItemScraper = new ProductListItemScraper($testString); $this->assertEquals($testOutcome, $productListItemScraper->scrapeTitle()); }
public function scrape() { //use Guzzle to get page details (cookies on for js load) $client = new Client(['cookies' => true]); $response = $client->request('GET', $this->getUrl()); //get the product list string from the page body $productList = $this->getProductListString($response->getBody()); //convert list to array (removing first element as its before the list elements) $productListArray = explode('<li>', $productList); array_shift($productListArray); $totalCost = 0; $finalArray = []; //for each product in list scrape the data and add to total foreach ($productListArray as $listEntry) { //send list item html to list scraper which creates a ProductInfo object $productListItemScraper = new ProductListItemScraper($listEntry); $productListItemScraper->scrape(); //send ProductInfo object to page scraper //which uses the link found in the list item to get further information $productPageScraper = new ProductPageScraper($productListItemScraper->getProductInfo()); $productPageScraper->scrape(); //and finally get complete productInfo object $productInfo = $productPageScraper->getProductInfo(); //add cost to running total and get formatted array from info object $totalCost = $totalCost + floatval($productInfo->getUnitCost()); $finalArray[] = ScraperLibrary::productInfoArrayDecorator($productInfo); } return ['results' => $finalArray, 'total' => $totalCost]; }