public function handle(Spizer_Document $document) { //get! encoding_to_utf-8 $body = Diggin_Http_Response_Encoding::encode($document->getBody(), $document->getHeader('content-type')); if (preg_match($this->_config['match'], $body, $m, PREG_OFFSET_CAPTURE)) { $this->engine->log('RegexMatch', array('message' => 'Document body matched lookup expression', 'regex' => $this->_config['match'], 'match' => $m[0][0], 'offset' => $m[0][1])); } }
public function handle(Spizer_Document $document) { //check document is image $content_type = $document->getHeader('content-type'); if (!preg_match('#image/.*#i', $content_type)) { return; } $filepath = $this->_config['save_dir'] . DIRECTORY_SEPARATOR . rawurlencode($document->getUrl()); file_put_contents($filepath, $document->getBody(), FILE_BINARY); $this->addHaveFiles($document->getUrl()); }
/** * Check if the handler actually needs to be called (according to it's * content type and status code), and if so call ::handle() * * @param Spizer_Document $document */ public function call(Spizer_Document $document) { $status = $document->getStatus(); $type = $document->getHeader('content-type'); $call = true; if ($this->_config['status']) { if (is_array($this->_config['status'])) { if (!in_array($status, $this->_config['status'])) { $call = false; } } elseif ($this->_config['status'] != $status) { $call = false; } } if ($this->_config['content-type']) { if (is_array($this->_config['content-type'])) { if (!in_array($type, $this->_config['content-type'])) { $call = false; } } elseif ($this->_config['content-type'] != $type) { $call = false; } } if ($call) { $this->handle($document); } }