/** * @return string */ public function __toString() { return sprintf('%s:%s', $this->scraper->getOrigin()->getName(), $this->originalUrl); }
/** * @param ScraperEntity $scraperEntity * @param bool $disableLimit * * @return ScraperInterface */ protected function createScraper(ScraperEntity $scraperEntity, $disableLimit = false) { if (!array_key_exists($scraperEntity->getId(), $this->scrapers)) { $scraper = $this->factory->createScraper($scraperEntity); if ($disableLimit) { $limit = $scraper->getCrawler()->getRateLimit(); if ($limit instanceof EnablingRateLimitInterface) { $limit->disable(); } } $this->scrapers[$scraperEntity->getId()] = $scraper; } return $this->scrapers[$scraperEntity->getId()]; }
/** * Returns a unique hash for a scraper. * * @param Scraper $scraper * * @return string */ protected function getScraperHash(Scraper $scraper) { return md5('scraper' . $scraper->getId()); }
/** * @inheritdoc */ public function findSourceByScraper(Scraper $scraper, $originalId) { // look for mapping $params = ['scraper' => $scraper->getId(), 'originalId' => $originalId]; return $this->getRepository()->findOneBy($params); }
/** * @param ScraperEntity $scraper * * @return ParserInterface */ protected function createParser(ScraperEntity $scraper) { $options = array_merge(['scraper' => $scraper], $scraper->getParserOptions()); $parserType = $this->getParserType($scraper->getParser()); $builder = new ParserBuilder($this->eventDispatcher); return $builder->build($parserType, $options); }
/** * @param Scraper $scraper * * @return ParserInterface */ protected function getParser(Scraper $scraper) { $parserType = $this->get('tree_house.io.scrape.scraper_factory')->getParserType($scraper->getParser()); $options = array_merge(['scraper' => $scraper], $scraper->getParserOptions()); return (new ParserBuilder())->build($parserType, $options); }