public function handle(Spizer_Document $doc) { if (!$doc instanceof Spizer_Document_Html) { return; } $headers = $doc->getAllHeaders(); //response is already decoded. unset($headers['transfer-encoding']); unset($headers['content-encoding']); try { $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl()); } catch (Diggin_Scraper_Exception $dse) { if (isset($this->_config['throwIfNotfound'])) { throw $dse; } } if (!isset($results)) { return; } if ($this->_config['debug']) { echo 'count scrape results ' . count($results['kumo']) . PHP_EOL; } foreach ($results['kumo'] as $src) { $this->send($src); } }
public function handle(Spizer_Document $doc) { if ($this->_callonce) { if (true === $this->_callonce) { $this->_callonce = 1; } elseif (1 == $this->_callonce) { return; } } //var_dump(__METHOD__); if (!$doc instanceof Spizer_Document_Html) { return; } $headers = $doc->getAllHeaders(); //response is already decoded. unset($headers['transfer-encoding']); unset($headers['content-encoding']); $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl()); $this->_addQueue($results['kumo']); }
public function handle(Spizer_Document $doc) { //$this->debug('********START****'); if (!$doc instanceof Spizer_Document_Html) { return; } $headers = $doc->getAllHeaders(); //response is already decoded. unset($headers['transfer-encoding']); unset($headers['content-encoding']); $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl()); //$this->debug($results); $targets = $this->filter(array_unique($results['kumo'])); foreach ($targets as $src) { //$request = new Spizer_Request($src); $request = new Kumo_Request($src); $request->setReferrer($doc->getUrl()); //if ($this->_config['referer'] === true) { $request->setHeader('Referer', $this->toRefererUrl($doc->getUrl())); //} $this->send($request); } }