コード例 #1
0
 public function handle(Spizer_Document $doc)
 {
     if (!$doc instanceof Spizer_Document_Html) {
         return;
     }
     $headers = $doc->getAllHeaders();
     //response is already decoded.
     unset($headers['transfer-encoding']);
     unset($headers['content-encoding']);
     try {
         $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl());
     } catch (Diggin_Scraper_Exception $dse) {
         if (isset($this->_config['throwIfNotfound'])) {
             throw $dse;
         }
     }
     if (!isset($results)) {
         return;
     }
     if ($this->_config['debug']) {
         echo 'count scrape results ' . count($results['kumo']) . PHP_EOL;
     }
     foreach ($results['kumo'] as $src) {
         $this->send($src);
     }
 }
コード例 #2
0
ファイル: SaveHtml.php プロジェクト: sasezaki/spizer
 public function handle(Spizer_Document $document)
 {
     //check document is image
     $filepath = $this->_config['save_dir'] . DIRECTORY_SEPARATOR . rawurlencode($document->getUrl());
     file_put_contents($filepath, $document->getBody());
     $this->addHaveFiles($document->getUrl());
 }
コード例 #3
0
ファイル: RegexMatch.php プロジェクト: sasezaki/spizer
 public function handle(Spizer_Document $document)
 {
     //get! encoding_to_utf-8
     $body = Diggin_Http_Response_Encoding::encode($document->getBody(), $document->getHeader('content-type'));
     if (preg_match($this->_config['match'], $body, $m, PREG_OFFSET_CAPTURE)) {
         $this->engine->log('RegexMatch', array('message' => 'Document body matched lookup expression', 'regex' => $this->_config['match'], 'match' => $m[0][0], 'offset' => $m[0][1]));
     }
 }
コード例 #4
0
 /**
  * Handle incoming documents
  * 
  * @param Spizer_Document $document 
  * @see   Spizer_Handler_Abstract::handle()
  */
 public function handle(Spizer_Document $document)
 {
     $strpos = $this->_config['matchcase'] ? 'strpos' : 'stripos';
     $body = $document->getBody();
     if (($pos = $strpos($body, $this->_config['match'])) !== false) {
         $this->_log(array('message' => 'Document body matched lookup string', 'needle' => $this->_config['match'], 'offset' => $pos));
     }
 }
コード例 #5
0
ファイル: SaveImage.php プロジェクト: sasezaki/spizer
 public function handle(Spizer_Document $document)
 {
     //check document is image
     $content_type = $document->getHeader('content-type');
     if (!preg_match('#image/.*#i', $content_type)) {
         return;
     }
     $filepath = $this->_config['save_dir'] . DIRECTORY_SEPARATOR . rawurlencode($document->getUrl());
     file_put_contents($filepath, $document->getBody(), FILE_BINARY);
     $this->addHaveFiles($document->getUrl());
 }
コード例 #6
0
 public function handle(Spizer_Document $doc)
 {
     if ($this->_callonce) {
         if (true === $this->_callonce) {
             $this->_callonce = 1;
         } elseif (1 == $this->_callonce) {
             return;
         }
     }
     //var_dump(__METHOD__);
     if (!$doc instanceof Spizer_Document_Html) {
         return;
     }
     $headers = $doc->getAllHeaders();
     //response is already decoded.
     unset($headers['transfer-encoding']);
     unset($headers['content-encoding']);
     $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl());
     $this->_addQueue($results['kumo']);
 }
コード例 #7
0
 /**
  * Handle document
  *
  * @param Spizer_Document $document
  */
 public function handle(Spizer_Document $document)
 {
     $string = "{$document->getUrl()} {$document->getStatus()} " . strlen($document->getBody());
     // Decide log level according to status code
     switch (round($document->getStatus() / 100)) {
         case 1:
         case 2:
             $level = Zend_Log::INFO;
             break;
         case 3:
             $level = Zend_Log::NOTICE;
             break;
         case 4:
             $level = Zend_Log::WARN;
             break;
         case 5:
             $level = Zend_Log::ERR;
             break;
     }
     $this->_logger->log($string, $level);
 }
コード例 #8
0
 public function handle(Spizer_Document $doc)
 {
     //$this->debug('********START****');
     if (!$doc instanceof Spizer_Document_Html) {
         return;
     }
     $headers = $doc->getAllHeaders();
     //response is already decoded.
     unset($headers['transfer-encoding']);
     unset($headers['content-encoding']);
     $results = $this->scraper->scrape(new Zend_Http_Response($doc->getStatus(), $headers, $doc->getBody()), $doc->getUrl());
     //$this->debug($results);
     $targets = $this->filter(array_unique($results['kumo']));
     foreach ($targets as $src) {
         //$request = new Spizer_Request($src);
         $request = new Kumo_Request($src);
         $request->setReferrer($doc->getUrl());
         //if ($this->_config['referer'] === true) {
         $request->setHeader('Referer', $this->toRefererUrl($doc->getUrl()));
         //}
         $this->send($request);
     }
 }
コード例 #9
0
 /**
  * Handle incoming documents
  * 
  * @param Spizer_Document $document 
  * @see   Spizer_Handler_Abstract::handle()
  */
 public function handle(Spizer_Document $document)
 {
     if (preg_match($this->_config['match'], $document->getBody(), $m, PREG_OFFSET_CAPTURE)) {
         $this->_log(array('message' => 'Document body matched lookup expression', 'regex' => $this->_config['match'], 'match' => $m[0][0], 'offset' => $m[0][1]));
     }
 }