Example #1
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state)
 {
     // Set time zone to GMT for parsing dates with strtotime().
     $tz = date_default_timezone_get();
     date_default_timezone_set('GMT');
     $raw = trim($fetcher_result->getRaw());
     if (!strlen($raw)) {
         throw new EmptyFeedException();
     }
     // Yes, using a DOM parser is a bit inefficient, but will do for now.
     // @todo XML error handling.
     $this->startXmlErrorHandling();
     $xml = new \SimpleXMLElement($raw);
     $this->stopXmlErrorHandling();
     $result = new ParserResult();
     foreach ($xml->url as $url) {
         $item = new SitemapItem();
         $item->set('url', (string) $url->loc);
         if ($url->lastmod) {
             $item->set('lastmod', strtotime($url->lastmod));
         }
         if ($url->changefreq) {
             $item->set('changefreq', (string) $url->changefreq);
         }
         if ($url->priority) {
             $item->set('priority', (string) $url->priority);
         }
         $result->addItem($item);
     }
     date_default_timezone_set($tz);
     return $result;
 }
Example #2
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result)
 {
     $feed_config = $feed->getConfigurationFor($this);
     $state = $feed->getState(StateInterface::PARSE);
     // Load and configure parser.
     $parser = new ParserCSV();
     $delimiter = $feed_config['delimiter'] == 'TAB' ? "\t" : $feed_config['delimiter'];
     $parser->setDelimiter($delimiter);
     $iterator = new ParserCSVIterator($fetcher_result->getFilePath());
     if (empty($feed_config['no_headers'])) {
         // Get first line and use it for column names, convert them to lower case.
         $header = $this->parseHeader($parser, $iterator);
         if (!$header) {
             return;
         }
         $parser->setColumnNames($header);
     }
     // Determine section to parse, parse.
     $start = $state->pointer ? $state->pointer : $parser->lastLinePos();
     $limit = $this->importer->getLimit();
     $rows = $this->parseItems($parser, $iterator, $start, $limit);
     // Report progress.
     $state->total = filesize($fetcher_result->getFilePath());
     $state->pointer = $parser->lastLinePos();
     $progress = $parser->lastLinePos() ? $parser->lastLinePos() : $state->total;
     $state->progress($state->total, $progress);
     // Create a result object and return it.
     return new ParserResult($rows, $feed->id());
 }
Example #3
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state)
 {
     $feed_config = $feed->getConfigurationFor($this);
     if (!filesize($fetcher_result->getFilePath())) {
         throw new EmptyFeedException();
     }
     // Load and configure parser.
     $parser = CsvFileParser::createFromFilePath($fetcher_result->getFilePath())->setDelimiter($feed_config['delimiter'] === 'TAB' ? "\t" : $feed_config['delimiter'])->setHasHeader(!$feed_config['no_headers'])->setStartByte((int) $state->pointer);
     // Wrap parser in a limit iterator.
     $parser = new \LimitIterator($parser, 0, $this->configuration['line_limit']);
     $header = !$feed_config['no_headers'] ? $parser->getHeader() : [];
     $result = new ParserResult();
     foreach ($parser as $row) {
         $item = new DynamicItem();
         foreach ($row as $delta => $cell) {
             $key = isset($header[$delta]) ? $header[$delta] : $delta;
             $item->set($key, $cell);
         }
         $result->addItem($item);
     }
     // Report progress.
     $state->total = filesize($fetcher_result->getFilePath());
     $state->pointer = $parser->lastLinePos();
     $state->progress($state->total, $state->pointer);
     return $result;
 }
Example #4
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result)
 {
     $parser = new GenericOPMLParser($fetcher_result->getRaw());
     $opml = $parser->parse(TRUE);
     $result = new ParserResult();
     $result->items = $this->getItems($opml['outlines'], array());
     $result->title = $opml['head']['#title'];
     return $result;
 }
Example #5
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state)
 {
     $raw = $fetcher_result->getRaw();
     if (!strlen(trim($raw))) {
         throw new EmptyFeedException();
     }
     $result = new ParserResult();
     $parser = new GenericOpmlParser($fetcher_result->getRaw());
     $opml = $parser->parse(TRUE);
     foreach ($this->getItems($opml['outlines']) as $item) {
         $item->set('feed_title', $opml['head']['#title']);
         $result->addItem($item);
     }
     return $result;
 }
Example #6
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state)
 {
     $result = new ParserResult();
     Reader::setExtensionManager(\Drupal::service('feed.bridge.reader'));
     Reader::registerExtension('GeoRSS');
     $raw = $fetcher_result->getRaw();
     if (!strlen(trim($raw))) {
         throw new EmptyFeedException();
     }
     try {
         $channel = Reader::importString($raw);
     } catch (ExceptionInterface $e) {
         $args = ['%site' => $feed->label(), '%error' => trim($e->getMessage())];
         throw new \RuntimeException($this->t('The feed from %site seems to be broken because of error "%error".', $args));
     }
     foreach ($channel as $delta => $entry) {
         $item = new SyndicationItem();
         // Move the values to an array as expected by processors.
         $item->set('title', $entry->getTitle())->set('guid', $entry->getId())->set('url', $entry->getLink())->set('guid', $entry->getId())->set('url', $entry->getLink())->set('description', $entry->getDescription())->set('tags', $entry->getCategories()->getValues())->set('feed_title', $channel->getTitle())->set('feed_description', $channel->getDescription())->set('feed_url', $channel->getLink());
         if ($image = $channel->getImage()) {
             $item->set('feed_image_uri', $image['uri']);
         }
         if ($enclosure = $entry->getEnclosure()) {
             $item->set('enclosures', [rawurldecode($enclosure->url)]);
         }
         if ($author = $entry->getAuthor()) {
             $author += ['name' => '', 'email' => ''];
             $item->set('author_name', $author['name'])->set('author_email', $author['email']);
         }
         if ($date = $entry->getDateModified()) {
             $item->set('timestamp', $date->getTimestamp());
         }
         if ($point = $entry->getGeoPoint()) {
             $item->set('georss_lat', $point['lat'])->set('georss_lon', $point['lon']);
         }
         $result->addItem($item);
     }
     return $result;
 }
Example #7
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state)
 {
     // Set time zone to GMT for parsing dates with strtotime().
     $tz = date_default_timezone_get();
     date_default_timezone_set('GMT');
     // Reset item counter.
     $this->items_count = 0;
     // Get raw data.
     $raw = trim($fetcher_result->getRaw());
     if (!strlen($raw)) {
         throw new EmptyFeedException();
     }
     $data = Json::decode($raw);
     $result = new ParserResult();
     if ($data && count($data['items']) > 0) {
         $this->processItems($data['items'], $result);
     }
     if ($data['pageInfo']['totalResults'] && $data['pageInfo']['resultsPerPage'] && $data['pageInfo']['totalResults'] > $data['pageInfo']['resultsPerPage']) {
         $number_of_pages = $data['pageInfo']['totalResults'] / $data['pageInfo']['resultsPerPage'];
         if ($number_of_pages > 1) {
             $feed_type = $feed->getType();
             $fetcher_configuration = $feed_type->getFetcher()->getConfiguration();
             $yt_state = ['channel_id' => $feed->getSource(), 'api_key' => $fetcher_configuration['api_key'], 'import_limit' => $fetcher_configuration['import_limit'], 'page_limit' => $fetcher_configuration['page_limit'], 'pageToken' => ''];
             for ($i = 0; $i <= $number_of_pages; $i++) {
                 if (!$data) {
                     throw new EmptyFeedException();
                 }
                 if ($data['nextPageToken']) {
                     $yt_state['pageToken'] = $data['nextPageToken'];
                     $data = Json::decode($this->fetchInternal($feed, $yt_state));
                     $this->processItems($data['items'], $result);
                 }
             }
         }
     }
     date_default_timezone_set($tz);
     return $result;
 }
Example #8
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result)
 {
     // Set time zone to GMT for parsing dates with strtotime().
     $tz = date_default_timezone_get();
     date_default_timezone_set('GMT');
     // Yes, using a DOM parser is a bit inefficient, but will do for now.
     $xml = new \SimpleXMLElement($fetcher_result->getRaw());
     $result = new ParserResult();
     foreach ($xml->url as $url) {
         $item = array('url' => (string) $url->loc);
         if ($url->lastmod) {
             $item['lastmod'] = strtotime($url->lastmod);
         }
         if ($url->changefreq) {
             $item['changefreq'] = (string) $url->changefreq;
         }
         if ($url->priority) {
             $item['priority'] = (string) $url->priority;
         }
         $result->items[] = $item;
     }
     date_default_timezone_set($tz);
     return $result;
 }
Example #9
0
 /**
  * {@inheritdoc}
  */
 public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result)
 {
     $result = new ParserResult();
     Reader::setExtensionManager(\Drupal::service('feed.bridge.reader'));
     try {
         $channel = Reader::importString($fetcher_result->getRaw());
     } catch (ExceptionInterface $e) {
         watchdog_exception('feeds', $e);
         drupal_set_message($this->t('The feed from %site seems to be broken because of error "%error".', array('%site' => $feed->label(), '%error' => $e->getMessage())), 'error');
         return $result;
     }
     $result->title = $channel->getTitle();
     $result->description = $channel->getDescription();
     $result->link = $channel->getLink();
     foreach ($channel as $item) {
         // Reset the parsed item.
         $parsed_item = array();
         // Move the values to an array as expected by processors.
         $parsed_item['title'] = $item->getTitle();
         $parsed_item['guid'] = $item->getId();
         $parsed_item['url'] = $item->getLink();
         $parsed_item['description'] = $item->getDescription();
         if ($enclosure = $item->getEnclosure()) {
             $parsed_item['enclosures'][] = urldecode($enclosure->url);
         }
         if ($author = $item->getAuthor()) {
             $parsed_item['author_name'] = $author['name'];
         }
         if ($date = $item->getDateModified()) {
             $parsed_item['timestamp'] = $date->getTimestamp();
         }
         $parsed_item['tags'] = $item->getCategories()->getValues();
         $result->items[] = $parsed_item;
     }
     return $result;
 }
Example #10
0
 /**
  * Finds a hub from a fetcher result.
  *
  * @param \Drupal\feeds\Result\FetcherResultInterface $fetcher_result
  *   The fetcher result.
  *
  * @return string|null
  *   The hub URL or null if one wasn't found.
  */
 protected function findRelation(FetcherResultInterface $fetcher_result, $relation)
 {
     if ($fetcher_result instanceof HttpFetcherResultInterface) {
         if ($rel = HttpHelpers::findLinkHeader($fetcher_result->getHeaders(), $relation)) {
             return $rel;
         }
     }
     return HttpHelpers::findRelationFromXml($fetcher_result->getRaw(), $relation);
 }