Esempio n. 1
0
 /**
  * @return string
  */
 public function __toString()
 {
     return sprintf('%s:%s', $this->scraper->getOrigin()->getName(), $this->originalUrl);
 }
 /**
  * @param ScraperEntity $scraperEntity
  * @param bool          $disableLimit
  *
  * @return ScraperInterface
  */
 protected function createScraper(ScraperEntity $scraperEntity, $disableLimit = false)
 {
     if (!array_key_exists($scraperEntity->getId(), $this->scrapers)) {
         $scraper = $this->factory->createScraper($scraperEntity);
         if ($disableLimit) {
             $limit = $scraper->getCrawler()->getRateLimit();
             if ($limit instanceof EnablingRateLimitInterface) {
                 $limit->disable();
             }
         }
         $this->scrapers[$scraperEntity->getId()] = $scraper;
     }
     return $this->scrapers[$scraperEntity->getId()];
 }
 /**
  * Returns a unique hash for a scraper.
  *
  * @param Scraper $scraper
  *
  * @return string
  */
 protected function getScraperHash(Scraper $scraper)
 {
     return md5('scraper' . $scraper->getId());
 }
Esempio n. 4
0
 /**
  * @inheritdoc
  */
 public function findSourceByScraper(Scraper $scraper, $originalId)
 {
     // look for mapping
     $params = ['scraper' => $scraper->getId(), 'originalId' => $originalId];
     return $this->getRepository()->findOneBy($params);
 }
Esempio n. 5
0
 /**
  * @param ScraperEntity $scraper
  *
  * @return ParserInterface
  */
 protected function createParser(ScraperEntity $scraper)
 {
     $options = array_merge(['scraper' => $scraper], $scraper->getParserOptions());
     $parserType = $this->getParserType($scraper->getParser());
     $builder = new ParserBuilder($this->eventDispatcher);
     return $builder->build($parserType, $options);
 }
 /**
  * @param Scraper $scraper
  *
  * @return ParserInterface
  */
 protected function getParser(Scraper $scraper)
 {
     $parserType = $this->get('tree_house.io.scrape.scraper_factory')->getParserType($scraper->getParser());
     $options = array_merge(['scraper' => $scraper], $scraper->getParserOptions());
     return (new ParserBuilder())->build($parserType, $options);
 }