public function handle(Spizer_Document $doc) { if (!$doc instanceof Spizer_Document_Html) { return; } $headers = $doc->getAllHeaders(); //response is already decoded. unset($headers['transfer-encoding']); unset($headers['content-encoding']); try { $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl()); } catch (Diggin_Scraper_Exception $dse) { if (isset($this->_config['throwIfNotfound'])) { throw $dse; } } if (!isset($results)) { return; } if ($this->_config['debug']) { echo 'count scrape results ' . count($results['kumo']) . PHP_EOL; } foreach ($results['kumo'] as $src) { $this->send($src); } }
/** * Handle document * * @param Spizer_Document $document */ public function handle(Spizer_Document $document) { $string = "{$document->getUrl()} {$document->getStatus()} " . strlen($document->getBody()); // Decide log level according to status code switch (round($document->getStatus() / 100)) { case 1: case 2: $level = Zend_Log::INFO; break; case 3: $level = Zend_Log::NOTICE; break; case 4: $level = Zend_Log::WARN; break; case 5: $level = Zend_Log::ERR; break; } $this->_logger->log($string, $level); }
public function handle(Spizer_Document $doc) { if ($this->_callonce) { if (true === $this->_callonce) { $this->_callonce = 1; } elseif (1 == $this->_callonce) { return; } } //var_dump(__METHOD__); if (!$doc instanceof Spizer_Document_Html) { return; } $headers = $doc->getAllHeaders(); //response is already decoded. unset($headers['transfer-encoding']); unset($headers['content-encoding']); $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl()); $this->_addQueue($results['kumo']); }
public function handle(Spizer_Document $doc) { //$this->debug('********START****'); if (!$doc instanceof Spizer_Document_Html) { return; } $headers = $doc->getAllHeaders(); //response is already decoded. unset($headers['transfer-encoding']); unset($headers['content-encoding']); $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl()); //$this->debug($results); $targets = $this->filter(array_unique($results['kumo'])); foreach ($targets as $src) { //$request = new Spizer_Request($src); $request = new Kumo_Request($src); $request->setReferrer($doc->getUrl()); //if ($this->_config['referer'] === true) { $request->setHeader('Referer', $this->toRefererUrl($doc->getUrl())); //} $this->send($request); } }
/** * Check if the handler actually needs to be called (according to it's * content type and status code), and if so call ::handle() * * @param Spizer_Document $document */ public function call(Spizer_Document $document) { $status = $document->getStatus(); $type = $document->getHeader('content-type'); $call = true; if ($this->_config['status']) { if (is_array($this->_config['status'])) { if (!in_array($status, $this->_config['status'])) { $call = false; } } elseif ($this->_config['status'] != $status) { $call = false; } } if ($this->_config['content-type']) { if (is_array($this->_config['content-type'])) { if (!in_array($type, $this->_config['content-type'])) { $call = false; } } elseif ($this->_config['content-type'] != $type) { $call = false; } } if ($call) { $this->handle($document); } }