Ejemplo n.º 1
0
 /**
  * This is how typical bank grabbing works
  * Get URL, scrub document, cells, exchanges, throw exception if something is empty or wrong
  * 
  * {@inheritdoc}
  */
 public function execute()
 {
     // grab bank exchange page, check
     $url = $this->getURL();
     if (empty($url)) {
         throw new \LogicException('broken class:no url');
     }
     // grab bank exchange page, check
     $curl = curl_init();
     curl_setopt($curl, CURLOPT_URL, $url);
     curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
     curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36');
     $str = curl_exec($curl);
     if (empty($str)) {
         throw new \LogicException('broken remote:no document on link');
     }
     $html = SimpleHTMLDom::str_get_html($str);
     if (empty($html)) {
         throw new \LogicException('broken markup:no html');
     }
     // grab exchange table, check
     $cells = $this->grabCells($html);
     if (empty($cells)) {
         throw new \LogicException('broken markup:no cells');
     }
     // grab exchange values
     $this->grabValues($cells);
     // return
     return $this->returnValues();
 }
 /**
  * Get HTML
  * Get remote HTML data from url using Curl and creates a new DOM Object
  * @param string $url 
  * @return DOM object
  */
 public function getHtml($url)
 {
     $c = curl_init();
     curl_setopt($c, CURLOPT_URL, $url);
     curl_setopt($c, CURLOPT_RETURNTRANSFER, 1);
     curl_setopt($c, CURLOPT_USERAGENT, "My Scrap bot");
     curl_setopt($c, CURLOPT_FOLLOWLOCATION, true);
     curl_setopt($c, CURLOPT_COOKIEJAR, 'cookies.txt');
     curl_setopt($c, CURLOPT_COOKIEFILE, 'cookies.txt');
     $result = curl_exec($c);
     $status = curl_getinfo($c);
     curl_close($c);
     if ($status['http_code'] == 200) {
         return \serhatozles\simplehtmldom\SimpleHTMLDom::str_get_html(mb_convert_encoding($result, 'HTML-ENTITIES', 'utf-8'));
     }
     //if not met the return criteria above, then show error
     return "\nERRORCODE22 with {$url}!!\nLast status codes:\n" . json_encode($status) . "\n\nLast data got:\n{$data}\n";
 }