예제 #1
0
 /**
  * @param DelegatingSourceCleaner $cleaner
  * @param Feed                    $feed
  * @param ThresholdVoterInterface $voter
  *
  * @return bool
  */
 public function cleanFeed(DelegatingSourceCleaner $cleaner, Feed $feed, ThresholdVoterInterface $voter)
 {
     if (null === ($expireDate = $this->getLastFullImportDate($feed))) {
         $this->logger->debug(sprintf('Skipping %s, because it has no recent imports', $feed));
         $this->eventDispatcher->dispatch(IoEvents::FEED_CLEANUP_SKIP, new FeedCleanupEvent($feed, 0));
         return false;
     }
     $this->eventDispatcher->dispatch(IoEvents::PRE_CLEAN_FEED, new FeedEvent($feed));
     $this->logger->debug(sprintf('Checking sources of %s that have not been visited since %s', $feed, $expireDate->format('Y-m-d H:i:s')));
     // get sources that haven't been visited since $expireDate
     $sourceRepo = $this->sourceManager->getRepository();
     $count = $sourceRepo->countByFeedAndUnvisitedSince($feed, $expireDate);
     // fail safe: see if percentage of sources to be removed is not too high
     $total = $sourceRepo->countByFeed($feed);
     $max = $this->getThreshold($total);
     // see if threshold is reached
     if ($count > $max) {
         $message = sprintf('Stopping cleanup for %s, because %s of %s sources were to be deleted, %s is the maximum.', $feed, $count, $total, $max);
         if (!$voter->vote($count, $total, $max, $message)) {
             $this->eventDispatcher->dispatch(IoEvents::FEED_CLEANUP_HALT, new FeedCleanupHaltEvent($feed, $count, $total, $max));
             return false;
         }
     }
     $this->logger->debug(sprintf('Cleaning %d sources for %s', $count, $feed));
     $builder = $sourceRepo->queryByFeedAndUnvisitedSince($feed, $expireDate);
     $numCleaned = $cleaner->cleanByQuery($builder->getQuery());
     $this->eventDispatcher->dispatch(IoEvents::POST_CLEAN_FEED, new FeedCleanupEvent($feed, $numCleaned));
     return $numCleaned;
 }
 /**
  * @inheritdoc
  */
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $id = $input->getArgument('id');
     if (null === ($source = $this->sourceManager->findById($id))) {
         $output->writeln(sprintf('<error>Could not find source with id %d</error>', $id));
         return 1;
     }
     $linked = $this->sourceProcessor->isLinked($source);
     if (!$linked) {
         $output->writeln('Linking source first');
         $this->sourceProcessor->link($source);
     }
     $this->sourceProcessor->process($source);
     $this->sourceManager->flush($source);
     $output->writeln(sprintf('Source <info>%d</info> has been processed', $id));
     return 0;
 }
 /**
  * @inheritdoc
  */
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $async = $input->getOption('async');
     $noLimit = $input->getOption('no-limit');
     $scrapers = $this->findScrapers($input->getArgument('scraper'));
     foreach ($scrapers as $scraperEntity) {
         $date = new \DateTime(sprintf('-%d hours', $scraperEntity->getRevisitFrequency()));
         $builder = $this->sourceManager->getRepository()->queryByScraperAndUnvisitedSince($scraperEntity, $date);
         foreach ($builder->getQuery()->iterate() as list($source)) {
             /* @var SourceInterface $source */
             try {
                 $output->writeln(sprintf('Revisiting <info>%s</info>', $source->getOriginalUrl()));
                 $this->revisitor->revisit($source, $async, $noLimit);
             } catch (CrawlException $e) {
                 $output->writeln(sprintf('<error>%s</error>', $e->getMessage()));
             }
         }
     }
 }
 public function testProcessException()
 {
     $executor = new SourceProcessExecutor($this->manager, $this->processor, new NullLogger());
     $source = new SourceMock(12345);
     $this->manager->expects($this->once())->method('findById')->will($this->returnValue($source));
     $this->processor->expects($this->once())->method('isLinked')->will($this->returnValue(false));
     $this->processor->expects($this->once())->method('process')->will($this->throwException(new SourceProcessException('Foobar')));
     $this->assertFalse($executor->execute($this->getPayload($executor, $source)));
     $messages = $source->getMessages();
     $this->assertInternalType('array', $messages);
     $this->assertArrayHasKey('process', $messages);
     $this->assertArrayHasKey(LogLevel::ERROR, $messages['process']);
     $this->assertContains('Foobar', $messages['process'][LogLevel::ERROR]);
 }
 /**
  * @param AbstractQuery $query
  *
  * @throws \LogicException
  * @return int
  *
  */
 public function cleanByQuery(AbstractQuery $query)
 {
     $numCleaned = 0;
     /** @var SourceInterface $source */
     foreach ($query->iterate() as list($source)) {
         if (!$source instanceof SourceInterface) {
             throw new \LogicException(sprintf('Invalid iterator given, encountered %s instead of SourceInterface', is_object($source) ? get_class($source) : gettype($source)));
         }
         $this->eventDispatcher->dispatch(IoEvents::PRE_CLEAN_SOURCE, new SourceEvent($source));
         $this->sourceManager->remove($source);
         $this->eventDispatcher->dispatch(IoEvents::POST_CLEAN_SOURCE, new SourceEvent($source));
         ++$numCleaned;
         if ($numCleaned % 50 === 0) {
             $this->sourceManager->flush();
             $this->sourceManager->clear();
         }
     }
     if ($numCleaned > 0) {
         $this->sourceManager->flush();
         $this->sourceManager->clear();
     }
     return $numCleaned;
 }
예제 #6
0
 /**
  * @param SourceInterface $source
  */
 protected function removeSource(SourceInterface $source)
 {
     $this->sourceManager->remove($source);
     $this->sourceManager->flush($source);
 }
 /**
  * @param int $sourceId
  *
  * @return SourceInterface
  */
 protected function findSource($sourceId)
 {
     return $this->sourceManager->findById($sourceId);
 }
 /**
  * @inheritdoc
  */
 public function clear()
 {
     $this->sourceManager->clear();
     $this->sources = [];
     $this->originSources = [];
 }
 /**
  * @inheritdoc
  */
 public function clean(DelegatingSourceCleaner $cleaner, ThresholdVoterInterface $voter)
 {
     $builder = $this->sourceManager->getRepository()->queryOrphaned();
     return $cleaner->cleanByQuery($builder->getQuery());
 }