/** * @inheritdoc */ public function parse(ScrapedItemBag $item) { $crawler = $this->getDomCrawler($item->getOriginalData(), $item->getOriginalUrl()); foreach ($this->modifiers as $position => $modifier) { // set crawler if needed if ($modifier instanceof CrawlerAwareInterface) { $modifier->setCrawler($crawler); } try { if ($modifier instanceof FilterInterface) { $modifier->filter($item); } if ($modifier instanceof MapperInterface) { $item = $modifier->map($item); } if ($modifier instanceof TransformerInterface) { $modifier->transform($item); } if ($modifier instanceof ValidatorInterface) { $modifier->validate($item); } } catch (FilterException $e) { // filter exceptions don't get to continue throw $e; } catch (ValidationException $e) { // validation exceptions don't get to continue throw $e; } catch (ModificationException $e) { // notify listeners of this failure, give them the option to stop propagation $event = new FailedItemModificationEvent($item, $modifier, $e); $event->setContinue($this->continues[$position]); $this->eventDispatcher->dispatch(FeedEvents::ITEM_MODIFICATION_FAILED, $event); if (!$event->getContinue()) { throw $e; } } } }
/** * @param ScrapedItemBag $item * * @return SourceInterface */ protected function findSourceOrCreate(ScrapedItemBag $item) { return $this->sourceManager->findSourceByScraperOrCreate($item->getScraper(), $item->getOriginalId(), $item->getOriginalUrl()); }
/** * @param Scraper $scraper * @param string $parserType * @param string $fixtureName * * @return ScrapedItemBag */ protected function getExpectedItemFixture(Scraper $scraper, $parserType, $fixtureName) { $refl = new \ReflectionClass(get_class($this)); $phpFile = sprintf('%s/fixtures/%s/%s.php', dirname($refl->getFilename()), $parserType, $fixtureName); /** @var array $expected */ $expected = (include $phpFile); $item = new ScrapedItemBag($scraper, static::$url, ''); $item->add($expected['item']); if (isset($expected['id'])) { $item->setOriginalId($expected['id']); } if (isset($expected['url'])) { $item->setOriginalUrl($expected['url']); } if (isset($expected['date'])) { $item->setDatetimeModified($expected['date']); } return $item; }