/** * This is how typical bank grabbing works * Get URL, scrub document, cells, exchanges, throw exception if something is empty or wrong * * {@inheritdoc} */ public function execute() { // grab bank exchange page, check $url = $this->getURL(); if (empty($url)) { throw new \LogicException('broken class:no url'); } // grab bank exchange page, check $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36'); $str = curl_exec($curl); if (empty($str)) { throw new \LogicException('broken remote:no document on link'); } $html = SimpleHTMLDom::str_get_html($str); if (empty($html)) { throw new \LogicException('broken markup:no html'); } // grab exchange table, check $cells = $this->grabCells($html); if (empty($cells)) { throw new \LogicException('broken markup:no cells'); } // grab exchange values $this->grabValues($cells); // return return $this->returnValues(); }
/** * Get HTML * Get remote HTML data from url using Curl and creates a new DOM Object * @param string $url * @return DOM object */ public function getHtml($url) { $c = curl_init(); curl_setopt($c, CURLOPT_URL, $url); curl_setopt($c, CURLOPT_RETURNTRANSFER, 1); curl_setopt($c, CURLOPT_USERAGENT, "My Scrap bot"); curl_setopt($c, CURLOPT_FOLLOWLOCATION, true); curl_setopt($c, CURLOPT_COOKIEJAR, 'cookies.txt'); curl_setopt($c, CURLOPT_COOKIEFILE, 'cookies.txt'); $result = curl_exec($c); $status = curl_getinfo($c); curl_close($c); if ($status['http_code'] == 200) { return \serhatozles\simplehtmldom\SimpleHTMLDom::str_get_html(mb_convert_encoding($result, 'HTML-ENTITIES', 'utf-8')); } //if not met the return criteria above, then show error return "\nERRORCODE22 with {$url}!!\nLast status codes:\n" . json_encode($status) . "\n\nLast data got:\n{$data}\n"; }
public function getProductsWaz($url, $categoria_id) { $loja = 'Waz'; $dominio = 'http://www.ibyte.com.br/'; $dom = SimpleHTMLDom::file_get_html($url, 10); echo "<br/><strong>Loja:</strong> {$loja} | <strong>URL:</strong> {$url} </br><hr/><br/>"; foreach ($dom->find(".section-content .product") as $element) { $peca = trim($element->find('a figure img', 0)->alt); $semEstoque = 0; if ($semEstoque == 0) { $modelPecas = $this->buscaPeca($peca, $loja); $imagem = $element->find('a figure img', 0)->src; $link = trim($element->find('a', 0)->href); $preco = $this->formataPreco($element->find('.k-prd-saleValue', 0)->innertext); $modelPecas = []; if (count($modelPecas) == 1) { $modelPrecos = $this->buscaPreco($modelPecas->id); if ($modelPrecos->preco != $preco) { if ($preco > $modelPrecos->preco) { $cor = '#F00'; } else { if ($preco < $modelPrecos->preco) { $cor = '#0F0'; } else { $cor = '#000'; } } $this->catcherLog($peca, $preco . " - " . $modelPrecos->preco, $cor); $this->atualizaPreco($modelPecas->id, $preco); } } else { $this->catcherLog($peca, $preco, '#00F'); $this->salvaPeca($peca, $loja, $imagem, $link, $preco, $categoria_id); } } else { if (count($this->buscaPeca($peca, $loja)) > 0) { $this->catcherLog("Produto Desabilitado: " . $peca, 0, '#CCC'); $this->desabilitaPeca($peca, $loja); } } } }