/** * This will extract individual product information. * @return array */ public function getResponse() { //set initial data response $data = array(); //get curl response /** @var \Library\Common\Curl $curl */ $curl = $this->getCurl(); $curl->get($this->getUrl()); $response = $curl->getResponse(); //parse response html dom $parser = new HtmlDomParser(); $parser->loadstring($response); //get html filesize $data['size'] = strlen($response); //get product description information $strings = array(); foreach ($parser->find('#information .productText p') as $product) { $strings[] = $product->plaintext; } if (!empty($strings[0])) { $data['description'] = $strings[0]; } //return html size and description in array return $data; }
/** * This will get the basic product information from the product list html snippet response * @param $html * @return array */ public function getProductInfo($html) { //parse html dom $parser = new HtmlDomParser(); $parser->loadstring($html); //init default productInfo array $productInfo = array(); //set product incrementer $i = 0; //iterate over .product classes foreach ($parser->find('.product') as $product) { //iterate over .productInfo a elements foreach ($product->find('.productInfo a') as $a) { //get product title $productInfo['title'] = trim($a->plaintext); //get product href $productInfo['href'] = trim($a->href); //use first anchor tag result only break; } //iterate over .pricePerUnit class foreach ($parser->find('.pricePerUnit') as $pricePerUnit) { //get product unit price. $unitPrice = $pricePerUnit->plaintext; $unitPrice = str_replace('/unit', '', $unitPrice); $unitPrice = str_replace('£', '', $unitPrice); $productInfo['unit_price'] = trim($unitPrice); } $i++; } //return product title, href, and unit price return $productInfo; }
use League\Csv\Writer; # Store url $url = "http://centrodosuplemento.com.br/suplementos"; $html = new HtmlDomParser(); $html->loadUrl($url); $page_numbers = []; foreach ($html->find('.toolbar-bottom .pager .pages ol li') as $page) { $page_numbers[] = $page->plaintext; } $max_page = max($page_numbers); # Initialize Arrays $name = []; $price = []; for ($i = 1; $i <= $max_page; $i++) { # Open search results page $url = "http://centrodosuplemento.com.br/suplementos?mode=list?p={$i}"; $product_html = new HtmlDomParser(); $product_html->loadUrl($url); # Store data in Arrays foreach ($product_html->find('.product-name a') as $line) { $name[] = $line->plaintext; } foreach ($product_html->find('.price-box .regular-price .price') as $line) { $price[] = $line->plaintext; } } $writer = Writer::createFromPath('cds_list.csv', 'w'); $writer->insertOne(["Listing Name", "Price"]); for ($p = 0; $p < count($name); $p++) { $writer->insertOne([$name[$p], $price[$p]]); }