Пример #1
0
 public function handle(Spizer_Document $document)
 {
     //get! encoding_to_utf-8
     $body = Diggin_Http_Response_Encoding::encode($document->getBody(), $document->getHeader('content-type'));
     if (preg_match($this->_config['match'], $body, $m, PREG_OFFSET_CAPTURE)) {
         $this->engine->log('RegexMatch', array('message' => 'Document body matched lookup expression', 'regex' => $this->_config['match'], 'match' => $m[0][0], 'offset' => $m[0][1]));
     }
 }
Пример #2
0
 public function handle(Spizer_Document $document)
 {
     //check document is image
     $content_type = $document->getHeader('content-type');
     if (!preg_match('#image/.*#i', $content_type)) {
         return;
     }
     $filepath = $this->_config['save_dir'] . DIRECTORY_SEPARATOR . rawurlencode($document->getUrl());
     file_put_contents($filepath, $document->getBody(), FILE_BINARY);
     $this->addHaveFiles($document->getUrl());
 }
Пример #3
0
 /**
  * Check if the handler actually needs to be called (according to it's
  * content type and status code), and if so call ::handle()
  *
  * @param Spizer_Document $document
  */
 public function call(Spizer_Document $document)
 {
     $status = $document->getStatus();
     $type = $document->getHeader('content-type');
     $call = true;
     if ($this->_config['status']) {
         if (is_array($this->_config['status'])) {
             if (!in_array($status, $this->_config['status'])) {
                 $call = false;
             }
         } elseif ($this->_config['status'] != $status) {
             $call = false;
         }
     }
     if ($this->_config['content-type']) {
         if (is_array($this->_config['content-type'])) {
             if (!in_array($type, $this->_config['content-type'])) {
                 $call = false;
             }
         } elseif ($this->_config['content-type'] != $type) {
             $call = false;
         }
     }
     if ($call) {
         $this->handle($document);
     }
 }