/**
  * Start a consumer that retrieved documents that have to be saved to the index.
  *
  * @param \Symfony\Component\Console\Input\InputInterface   $input
  * @param \Symfony\Component\Console\Output\OutputInterface $output
  *
  * @return integer
  */
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $this->queue->listen(function ($message) {
         if (strlen($message->body) == 0) {
             $this->queue->rejectMessage($message);
             return;
         }
         $data = json_decode($message->body, true);
         $this->indexer->prepareDocument($message);
         $this->queue->acknowledge($message);
     });
     return 1;
 }
 /**
  * Consume a message, extracts the URL from it and crawls the webpage.
  *
  * @param \PhpAmqpLib\Message\AMQPMessage $message
  */
 public function recrawl(AMQPMessage $message)
 {
     try {
         $body = json_decode($message->body);
         // checking blacklist
         if (is_array($body->blacklist) && count($body->blacklist) > 0) {
             $this->dropBlacklistedDocuments($body->blacklist, $body->metadata);
         }
         $this->queue->acknowledge($message);
     } catch (Exception $e) {
         $this->queue->rejectMessage($message);
         $this->logMessage("emergency", $e->getMessage(), $body->metadata->core);
     }
 }
 /**
  * Consume a message, extracts the URL from it and crawls the webpage.
  *
  * @param \PhpAmqpLib\Message\AMQPMessage $message
  */
 public function crawlUrl(AMQPMessage $message)
 {
     $data = json_decode($message->body, true);
     $crawlJob = new CrawlJob($data['url'], $data['base_url'], $data['blacklist'], $data['metadata'], $data['whitelist']);
     if (false === $crawlJob->isAllowedToCrawl()) {
         $this->indexer->deleteDocument($message);
         $this->queue->rejectMessage($message);
         $this->markAsSkipped($crawlJob, 'info', 'Not allowed to crawl');
         return;
     }
     if ($this->indexer->isUrlIndexedAndNotExpired($crawlJob->getUrl(), $crawlJob->getMetadata())) {
         $this->queue->rejectMessage($message);
         $this->markAsSkipped($crawlJob, 'info', 'Not expired yet');
         return;
     }
     try {
         $this->spider->getRequestHandler()->getClient()->setUserAgent($this->userAgent);
         $this->spider->getRequestHandler()->getClient()->getConfig()->set('request.params', ['redirect.disable' => true]);
         $this->spider->crawl($crawlJob);
         $this->logMessage('info', sprintf("Crawling %s", $crawlJob->getUrl()), $crawlJob->getUrl(), $data['metadata']['core']);
         $this->queue->acknowledge($message);
     } catch (ClientErrorResponseException $e) {
         switch ($e->getResponse()->getStatusCode()) {
             case 301:
                 $this->indexer->deleteDocument($message);
                 $this->queue->rejectMessage($message);
                 $this->markAsSkipped($crawlJob, 'warning', $e->getMessage());
                 $newCrawlJob = new CrawlJob($e->getResponse()->getInfo('redirect_url'), $crawlJob->getBaseUrl(), $crawlJob->getBlacklist(), $crawlJob->getMetadata(), $crawlJob->getWhitelist());
                 $this->queue->publishJob($newCrawlJob);
                 break;
             case 403:
             case 401:
             case 500:
                 $this->queue->rejectMessage($message);
                 $this->markAsSkipped($crawlJob, 'warning', 'status: ' . $e->getResponse()->getStatusCode());
                 break;
             case 404:
             case 418:
                 $this->indexer->deleteDocument($message);
                 $this->logMessage('warning', sprintf("Deleted %s", $crawlJob->getUrl()), $crawlJob->getUrl(), $data['metadata']['core']);
                 $this->queue->rejectMessage($message);
                 break;
             default:
                 $this->queue->rejectMessageAndRequeue($message);
                 $this->markAsFailed($crawlJob, $e->getResponse()->getStatusCode());
                 break;
         }
     } catch (Exception $e) {
         $this->queue->rejectMessage($message);
         $this->markAsFailed($crawlJob, $e->getMessage());
     }
     unset($crawlJob, $message, $data);
     gc_collect_cycles();
 }
 public function testRejectMessage()
 {
     $queueName = 'queue3';
     $channel = $this->getMockBuilder('PhpAmqpLib\\Channel\\AMQPChannel')->disableOriginalConstructor()->setMethods(['basic_reject'])->getMock();
     $channel->expects($this->once())->method('basic_reject')->with($this->equalTo('dummyTag'), $this->equalTo(false));
     $message = $this->getMockBuilder('PhpAmqpLib\\Message\\AMQPMessage')->getMock();
     $message->delivery_info = [];
     $message->delivery_info['channel'] = $channel;
     $message->delivery_info['delivery_tag'] = 'dummyTag';
     $connection = $this->getMockBuilder('PhpAmqpLib\\Connection\\AMQPConnection')->disableOriginalConstructor()->setMethods(['isConnected', 'close'])->getMock();
     $connection->expects($this->once())->method('isConnected')->will($this->returnValue(true));
     $connection->expects($this->once())->method('close');
     $queue = new Queue($connection, $queueName);
     $queue->rejectMessage($message);
 }