예제 #1
0
 /**
  * @inheritdoc
  */
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     if (null === ($entity = $this->findScraper($input->getArgument('scraper')))) {
         throw new \RuntimeException(sprintf('Scraper %d not found', $input->getArgument('scraper')));
     }
     $scraper = $this->factory->createScraper($entity);
     if ($input->getOption('async')) {
         $scraper->setAsync(true);
     }
     if ($input->getOption('no-limit')) {
         $limit = $scraper->getCrawler()->getRateLimit();
         if ($limit instanceof EnablingRateLimitInterface) {
             $limit->disable();
         }
     }
     if ($output->getVerbosity() >= OutputInterface::VERBOSITY_NORMAL) {
         $dispatcher = $scraper->getEventDispatcher();
         $dispatcher->addSubscriber(new ScrapeOutputSubscriber($output));
     }
     try {
         $scraper->scrape($entity, $input->getArgument('url'));
         return 0;
     } catch (CrawlException $e) {
         $output->writeln("<error>Error scraping url: %s\n\n%s</error>", $e->getUrl(), $e->getMessage());
         return 1;
     }
 }
예제 #2
0
 /**
  * @inheritdoc
  */
 public function execute(array $payload)
 {
     /** @var ScraperEntity $entity */
     /** @var string $url */
     list($entity, $url) = $payload;
     $scraper = $this->factory->createScraper($entity);
     $scraper->setAsync(true);
     try {
         $scraper->scrape($entity, $url);
         return true;
     } catch (RateLimitException $e) {
         $re = new RescheduleException();
         if ($date = $e->getRetryDate()) {
             $re->setRescheduleDate($date);
         }
         throw $re;
     } catch (CrawlException $e) {
         $this->logger->error($e->getMessage(), ['url' => $e->getUrl()]);
         return false;
     }
 }
예제 #3
0
 /**
  * @param ScraperEntity $scraperEntity
  * @param bool          $disableLimit
  *
  * @return ScraperInterface
  */
 protected function createScraper(ScraperEntity $scraperEntity, $disableLimit = false)
 {
     if (!array_key_exists($scraperEntity->getId(), $this->scrapers)) {
         $scraper = $this->factory->createScraper($scraperEntity);
         if ($disableLimit) {
             $limit = $scraper->getCrawler()->getRateLimit();
             if ($limit instanceof EnablingRateLimitInterface) {
                 $limit->disable();
             }
         }
         $this->scrapers[$scraperEntity->getId()] = $scraper;
     }
     return $this->scrapers[$scraperEntity->getId()];
 }
 /**
  * @param InputInterface $input
  * @param Scraper        $scraperEntity
  */
 protected function scrape(InputInterface $input, $scraperEntity)
 {
     $scraper = $this->factory->createScraper($scraperEntity);
     if ($input->getOption('async')) {
         $scraper->setAsync(true);
     }
     if ($input->getOption('no-limit')) {
         $limit = $scraper->getCrawler()->getRateLimit();
         if ($limit instanceof EnablingRateLimitInterface) {
             $limit->disable();
         }
     }
     $scraper->scrape($scraperEntity, $scraperEntity->getUrl());
     $scraperEntity->setDatetimeLastStarted(new \DateTime());
     $this->doctrine->getManager()->flush($scraperEntity);
 }
 /**
  * @expectedException        \OutOfBoundsException
  * @expectedExceptionMessage Handler "foo" is not registered
  */
 public function testMissingHandler()
 {
     $this->factory->getHandler('foo');
 }
예제 #6
0
 /**
  * @param string $crawler
  *
  * @return CrawlerInterface
  */
 protected function findCrawler($crawler)
 {
     return $this->factory->getCrawler($crawler);
 }