/** * Start a consumer that retrieved documents that have to be saved to the index. * * @param \Symfony\Component\Console\Input\InputInterface $input * @param \Symfony\Component\Console\Output\OutputInterface $output * * @return integer */ protected function execute(InputInterface $input, OutputInterface $output) { $this->queue->listen(function ($message) { if (strlen($message->body) == 0) { $this->queue->rejectMessage($message); return; } $data = json_decode($message->body, true); $this->indexer->prepareDocument($message); $this->queue->acknowledge($message); }); return 1; }
/** * Consume a message, extracts the URL from it and crawls the webpage. * * @param \PhpAmqpLib\Message\AMQPMessage $message */ public function recrawl(AMQPMessage $message) { try { $body = json_decode($message->body); // checking blacklist if (is_array($body->blacklist) && count($body->blacklist) > 0) { $this->dropBlacklistedDocuments($body->blacklist, $body->metadata); } $this->queue->acknowledge($message); } catch (Exception $e) { $this->queue->rejectMessage($message); $this->logMessage("emergency", $e->getMessage(), $body->metadata->core); } }
/** * Consume a message, extracts the URL from it and crawls the webpage. * * @param \PhpAmqpLib\Message\AMQPMessage $message */ public function crawlUrl(AMQPMessage $message) { $data = json_decode($message->body, true); $crawlJob = new CrawlJob($data['url'], $data['base_url'], $data['blacklist'], $data['metadata'], $data['whitelist']); if (false === $crawlJob->isAllowedToCrawl()) { $this->indexer->deleteDocument($message); $this->queue->rejectMessage($message); $this->markAsSkipped($crawlJob, 'info', 'Not allowed to crawl'); return; } if ($this->indexer->isUrlIndexedAndNotExpired($crawlJob->getUrl(), $crawlJob->getMetadata())) { $this->queue->rejectMessage($message); $this->markAsSkipped($crawlJob, 'info', 'Not expired yet'); return; } try { $this->spider->getRequestHandler()->getClient()->setUserAgent($this->userAgent); $this->spider->getRequestHandler()->getClient()->getConfig()->set('request.params', ['redirect.disable' => true]); $this->spider->crawl($crawlJob); $this->logMessage('info', sprintf("Crawling %s", $crawlJob->getUrl()), $crawlJob->getUrl(), $data['metadata']['core']); $this->queue->acknowledge($message); } catch (ClientErrorResponseException $e) { switch ($e->getResponse()->getStatusCode()) { case 301: $this->indexer->deleteDocument($message); $this->queue->rejectMessage($message); $this->markAsSkipped($crawlJob, 'warning', $e->getMessage()); $newCrawlJob = new CrawlJob($e->getResponse()->getInfo('redirect_url'), $crawlJob->getBaseUrl(), $crawlJob->getBlacklist(), $crawlJob->getMetadata(), $crawlJob->getWhitelist()); $this->queue->publishJob($newCrawlJob); break; case 403: case 401: case 500: $this->queue->rejectMessage($message); $this->markAsSkipped($crawlJob, 'warning', 'status: ' . $e->getResponse()->getStatusCode()); break; case 404: case 418: $this->indexer->deleteDocument($message); $this->logMessage('warning', sprintf("Deleted %s", $crawlJob->getUrl()), $crawlJob->getUrl(), $data['metadata']['core']); $this->queue->rejectMessage($message); break; default: $this->queue->rejectMessageAndRequeue($message); $this->markAsFailed($crawlJob, $e->getResponse()->getStatusCode()); break; } } catch (Exception $e) { $this->queue->rejectMessage($message); $this->markAsFailed($crawlJob, $e->getMessage()); } unset($crawlJob, $message, $data); gc_collect_cycles(); }
public function testAcknowledge() { $queueName = 'queue5'; $channel = $this->getMockBuilder('PhpAmqpLib\\Channel\\AMQPChannel')->disableOriginalConstructor()->setMethods(['basic_ack'])->getMock(); $channel->expects($this->once())->method('basic_ack')->with($this->equalTo('dummyTag3')); $message = $this->getMockBuilder('PhpAmqpLib\\Message\\AMQPMessage')->getMock(); $message->delivery_info = []; $message->delivery_info['channel'] = $channel; $message->delivery_info['delivery_tag'] = 'dummyTag3'; $connection = $this->getMockBuilder('PhpAmqpLib\\Connection\\AMQPConnection')->disableOriginalConstructor()->setMethods(['isConnected', 'close'])->getMock(); $connection->expects($this->once())->method('isConnected')->will($this->returnValue(true)); $connection->expects($this->once())->method('close'); $queue = new Queue($connection, $queueName); $queue->acknowledge($message); }