/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) { // Set time zone to GMT for parsing dates with strtotime(). $tz = date_default_timezone_get(); date_default_timezone_set('GMT'); $raw = trim($fetcher_result->getRaw()); if (!strlen($raw)) { throw new EmptyFeedException(); } // Yes, using a DOM parser is a bit inefficient, but will do for now. // @todo XML error handling. $this->startXmlErrorHandling(); $xml = new \SimpleXMLElement($raw); $this->stopXmlErrorHandling(); $result = new ParserResult(); foreach ($xml->url as $url) { $item = new SitemapItem(); $item->set('url', (string) $url->loc); if ($url->lastmod) { $item->set('lastmod', strtotime($url->lastmod)); } if ($url->changefreq) { $item->set('changefreq', (string) $url->changefreq); } if ($url->priority) { $item->set('priority', (string) $url->priority); } $result->addItem($item); } date_default_timezone_set($tz); return $result; }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result) { $feed_config = $feed->getConfigurationFor($this); $state = $feed->getState(StateInterface::PARSE); // Load and configure parser. $parser = new ParserCSV(); $delimiter = $feed_config['delimiter'] == 'TAB' ? "\t" : $feed_config['delimiter']; $parser->setDelimiter($delimiter); $iterator = new ParserCSVIterator($fetcher_result->getFilePath()); if (empty($feed_config['no_headers'])) { // Get first line and use it for column names, convert them to lower case. $header = $this->parseHeader($parser, $iterator); if (!$header) { return; } $parser->setColumnNames($header); } // Determine section to parse, parse. $start = $state->pointer ? $state->pointer : $parser->lastLinePos(); $limit = $this->importer->getLimit(); $rows = $this->parseItems($parser, $iterator, $start, $limit); // Report progress. $state->total = filesize($fetcher_result->getFilePath()); $state->pointer = $parser->lastLinePos(); $progress = $parser->lastLinePos() ? $parser->lastLinePos() : $state->total; $state->progress($state->total, $progress); // Create a result object and return it. return new ParserResult($rows, $feed->id()); }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) { $feed_config = $feed->getConfigurationFor($this); if (!filesize($fetcher_result->getFilePath())) { throw new EmptyFeedException(); } // Load and configure parser. $parser = CsvFileParser::createFromFilePath($fetcher_result->getFilePath())->setDelimiter($feed_config['delimiter'] === 'TAB' ? "\t" : $feed_config['delimiter'])->setHasHeader(!$feed_config['no_headers'])->setStartByte((int) $state->pointer); // Wrap parser in a limit iterator. $parser = new \LimitIterator($parser, 0, $this->configuration['line_limit']); $header = !$feed_config['no_headers'] ? $parser->getHeader() : []; $result = new ParserResult(); foreach ($parser as $row) { $item = new DynamicItem(); foreach ($row as $delta => $cell) { $key = isset($header[$delta]) ? $header[$delta] : $delta; $item->set($key, $cell); } $result->addItem($item); } // Report progress. $state->total = filesize($fetcher_result->getFilePath()); $state->pointer = $parser->lastLinePos(); $state->progress($state->total, $state->pointer); return $result; }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result) { $parser = new GenericOPMLParser($fetcher_result->getRaw()); $opml = $parser->parse(TRUE); $result = new ParserResult(); $result->items = $this->getItems($opml['outlines'], array()); $result->title = $opml['head']['#title']; return $result; }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) { $raw = $fetcher_result->getRaw(); if (!strlen(trim($raw))) { throw new EmptyFeedException(); } $result = new ParserResult(); $parser = new GenericOpmlParser($fetcher_result->getRaw()); $opml = $parser->parse(TRUE); foreach ($this->getItems($opml['outlines']) as $item) { $item->set('feed_title', $opml['head']['#title']); $result->addItem($item); } return $result; }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) { $result = new ParserResult(); Reader::setExtensionManager(\Drupal::service('feed.bridge.reader')); Reader::registerExtension('GeoRSS'); $raw = $fetcher_result->getRaw(); if (!strlen(trim($raw))) { throw new EmptyFeedException(); } try { $channel = Reader::importString($raw); } catch (ExceptionInterface $e) { $args = ['%site' => $feed->label(), '%error' => trim($e->getMessage())]; throw new \RuntimeException($this->t('The feed from %site seems to be broken because of error "%error".', $args)); } foreach ($channel as $delta => $entry) { $item = new SyndicationItem(); // Move the values to an array as expected by processors. $item->set('title', $entry->getTitle())->set('guid', $entry->getId())->set('url', $entry->getLink())->set('guid', $entry->getId())->set('url', $entry->getLink())->set('description', $entry->getDescription())->set('tags', $entry->getCategories()->getValues())->set('feed_title', $channel->getTitle())->set('feed_description', $channel->getDescription())->set('feed_url', $channel->getLink()); if ($image = $channel->getImage()) { $item->set('feed_image_uri', $image['uri']); } if ($enclosure = $entry->getEnclosure()) { $item->set('enclosures', [rawurldecode($enclosure->url)]); } if ($author = $entry->getAuthor()) { $author += ['name' => '', 'email' => '']; $item->set('author_name', $author['name'])->set('author_email', $author['email']); } if ($date = $entry->getDateModified()) { $item->set('timestamp', $date->getTimestamp()); } if ($point = $entry->getGeoPoint()) { $item->set('georss_lat', $point['lat'])->set('georss_lon', $point['lon']); } $result->addItem($item); } return $result; }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) { // Set time zone to GMT for parsing dates with strtotime(). $tz = date_default_timezone_get(); date_default_timezone_set('GMT'); // Reset item counter. $this->items_count = 0; // Get raw data. $raw = trim($fetcher_result->getRaw()); if (!strlen($raw)) { throw new EmptyFeedException(); } $data = Json::decode($raw); $result = new ParserResult(); if ($data && count($data['items']) > 0) { $this->processItems($data['items'], $result); } if ($data['pageInfo']['totalResults'] && $data['pageInfo']['resultsPerPage'] && $data['pageInfo']['totalResults'] > $data['pageInfo']['resultsPerPage']) { $number_of_pages = $data['pageInfo']['totalResults'] / $data['pageInfo']['resultsPerPage']; if ($number_of_pages > 1) { $feed_type = $feed->getType(); $fetcher_configuration = $feed_type->getFetcher()->getConfiguration(); $yt_state = ['channel_id' => $feed->getSource(), 'api_key' => $fetcher_configuration['api_key'], 'import_limit' => $fetcher_configuration['import_limit'], 'page_limit' => $fetcher_configuration['page_limit'], 'pageToken' => '']; for ($i = 0; $i <= $number_of_pages; $i++) { if (!$data) { throw new EmptyFeedException(); } if ($data['nextPageToken']) { $yt_state['pageToken'] = $data['nextPageToken']; $data = Json::decode($this->fetchInternal($feed, $yt_state)); $this->processItems($data['items'], $result); } } } } date_default_timezone_set($tz); return $result; }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result) { // Set time zone to GMT for parsing dates with strtotime(). $tz = date_default_timezone_get(); date_default_timezone_set('GMT'); // Yes, using a DOM parser is a bit inefficient, but will do for now. $xml = new \SimpleXMLElement($fetcher_result->getRaw()); $result = new ParserResult(); foreach ($xml->url as $url) { $item = array('url' => (string) $url->loc); if ($url->lastmod) { $item['lastmod'] = strtotime($url->lastmod); } if ($url->changefreq) { $item['changefreq'] = (string) $url->changefreq; } if ($url->priority) { $item['priority'] = (string) $url->priority; } $result->items[] = $item; } date_default_timezone_set($tz); return $result; }
/** * {@inheritdoc} */ public function parse(FeedInterface $feed, FetcherResultInterface $fetcher_result) { $result = new ParserResult(); Reader::setExtensionManager(\Drupal::service('feed.bridge.reader')); try { $channel = Reader::importString($fetcher_result->getRaw()); } catch (ExceptionInterface $e) { watchdog_exception('feeds', $e); drupal_set_message($this->t('The feed from %site seems to be broken because of error "%error".', array('%site' => $feed->label(), '%error' => $e->getMessage())), 'error'); return $result; } $result->title = $channel->getTitle(); $result->description = $channel->getDescription(); $result->link = $channel->getLink(); foreach ($channel as $item) { // Reset the parsed item. $parsed_item = array(); // Move the values to an array as expected by processors. $parsed_item['title'] = $item->getTitle(); $parsed_item['guid'] = $item->getId(); $parsed_item['url'] = $item->getLink(); $parsed_item['description'] = $item->getDescription(); if ($enclosure = $item->getEnclosure()) { $parsed_item['enclosures'][] = urldecode($enclosure->url); } if ($author = $item->getAuthor()) { $parsed_item['author_name'] = $author['name']; } if ($date = $item->getDateModified()) { $parsed_item['timestamp'] = $date->getTimestamp(); } $parsed_item['tags'] = $item->getCategories()->getValues(); $result->items[] = $parsed_item; } return $result; }
/** * Finds a hub from a fetcher result. * * @param \Drupal\feeds\Result\FetcherResultInterface $fetcher_result * The fetcher result. * * @return string|null * The hub URL or null if one wasn't found. */ protected function findRelation(FetcherResultInterface $fetcher_result, $relation) { if ($fetcher_result instanceof HttpFetcherResultInterface) { if ($rel = HttpHelpers::findLinkHeader($fetcher_result->getHeaders(), $relation)) { return $rel; } } return HttpHelpers::findRelationFromXml($fetcher_result->getRaw(), $relation); }