コード例 #1
0
ファイル: Product.php プロジェクト: socialskeptic/sainsburys
 /**
  * This will extract individual product information.
  * @return array
  */
 public function getResponse()
 {
     //set initial data response
     $data = array();
     //get curl response
     /** @var \Library\Common\Curl $curl */
     $curl = $this->getCurl();
     $curl->get($this->getUrl());
     $response = $curl->getResponse();
     //parse response html dom
     $parser = new HtmlDomParser();
     $parser->loadstring($response);
     //get html filesize
     $data['size'] = strlen($response);
     //get product description information
     $strings = array();
     foreach ($parser->find('#information .productText p') as $product) {
         $strings[] = $product->plaintext;
     }
     if (!empty($strings[0])) {
         $data['description'] = $strings[0];
     }
     //return html size and description in array
     return $data;
 }
コード例 #2
0
 /**
  * This will get the basic product information from the product list html snippet response
  * @param $html
  * @return array
  */
 public function getProductInfo($html)
 {
     //parse html dom
     $parser = new HtmlDomParser();
     $parser->loadstring($html);
     //init default productInfo array
     $productInfo = array();
     //set product incrementer
     $i = 0;
     //iterate over .product classes
     foreach ($parser->find('.product') as $product) {
         //iterate over .productInfo a elements
         foreach ($product->find('.productInfo a') as $a) {
             //get product title
             $productInfo['title'] = trim($a->plaintext);
             //get product href
             $productInfo['href'] = trim($a->href);
             //use first anchor tag result only
             break;
         }
         //iterate over .pricePerUnit class
         foreach ($parser->find('.pricePerUnit') as $pricePerUnit) {
             //get product unit price.
             $unitPrice = $pricePerUnit->plaintext;
             $unitPrice = str_replace('/unit', '', $unitPrice);
             $unitPrice = str_replace('£', '', $unitPrice);
             $productInfo['unit_price'] = trim($unitPrice);
         }
         $i++;
     }
     //return product title, href, and unit price
     return $productInfo;
 }
コード例 #3
0
ファイル: cds_scraper.php プロジェクト: adrpnt/web-scraper
use League\Csv\Writer;
# Store url
$url = "http://centrodosuplemento.com.br/suplementos";
$html = new HtmlDomParser();
$html->loadUrl($url);
$page_numbers = [];
foreach ($html->find('.toolbar-bottom .pager .pages ol li') as $page) {
    $page_numbers[] = $page->plaintext;
}
$max_page = max($page_numbers);
# Initialize Arrays
$name = [];
$price = [];
for ($i = 1; $i <= $max_page; $i++) {
    # Open search results page
    $url = "http://centrodosuplemento.com.br/suplementos?mode=list?p={$i}";
    $product_html = new HtmlDomParser();
    $product_html->loadUrl($url);
    # Store data in Arrays
    foreach ($product_html->find('.product-name a') as $line) {
        $name[] = $line->plaintext;
    }
    foreach ($product_html->find('.price-box .regular-price .price') as $line) {
        $price[] = $line->plaintext;
    }
}
$writer = Writer::createFromPath('cds_list.csv', 'w');
$writer->insertOne(["Listing Name", "Price"]);
for ($p = 0; $p < count($name); $p++) {
    $writer->insertOne([$name[$p], $price[$p]]);
}