function subscribeRoute($feedUrl, $parentFolderId) { // Check to see if the system already has the feed $storage = Storage::getInstance(); $feed = $storage->getFeed($feedUrl); if (!$feed) { $matches = $storage->findFeedFromLinks($feedUrl); if (count($matches) > 0) { $feed = $matches[0]; } // TODO: allow selection if > 1 } if (!$feed) { // Not in the system. Fetch it from www // Check URL for validity if (!filter_var($feedUrl, FILTER_VALIDATE_URL)) { throw new JsonError(l("Incorrect or unrecognized URL")); } // Fetch and parse the feed try { $parser = FeedParser::create($feedUrl, true); } catch (Exception $e) { throw new JsonError(l("Could not read contents of feed")); } if (!$parser) { throw new JsonError(l("Could not determine type of feed")); } try { $feed = $parser->parse(); } catch (Exception $e) { throw new JsonError(l("Could not parse the contents of the feed")); } // Import the feed contents $feed->id = $storage->importFeed($this->user->id, $feed); if ($feed->id === false) { throw new JsonError(l("An error occurred while adding feed")); } // Import any links $links = $parser->getLinks(); if (!in_array($feed->link, $links)) { $links[] = $feed->link; } $storage->addLinks($feed->id, $links); } // Subscribe to feed if (!$storage->subscribeToFeed($this->user->id, $feed->id, $parentFolderId)) { throw new JsonError(l("Could not subscribe to feed")); } return array("feed" => array("title" => $feed->title), "allItems" => $storage->getUserFeeds($this->user)); }
private function parseFeed($feedUrl) { $now = microtime(true); $parser = FeedParser::create($feedUrl); $this->secondsSpentDownloading += microtime(true) - $now; if (!$parser) { throw new Exception("Could not determine type of feed from content"); } $now = microtime(true); echo " (-) parsing (" . get_class($parser) . ") ... "; $feed = $parser->parse(); $secondsSpentParsingFeed = microtime(true) - $now; $this->secondsSpentParsing += $secondsSpentParsingFeed; echo sprintf("done! (%.04fs)\n", $secondsSpentParsingFeed); return $feed; }
public static function create($url, $followHtml = false, $recursions = 0) { if ($recursions > 2) { throw new Exception("Download error", FeedParser::ERROR_TOO_MANY_REDIRECTS); } $originalUrl = $url; $document = FeedParser::fetchDocument($url); if (!$document) { throw new Exception("Document is empty", FeedParser::ERROR_EMPTY_DOCUMENT); } // First pass - parse it as valid XML $xmlDocument = null; $parser = null; $links = array(); libxml_use_internal_errors(true); try { $xmlDocument = @new SimpleXMLElement($document); } catch (Exception $e) { } if ($xmlDocument === null) { // Document didn't parse as valid XML $errors = libxml_get_errors(); foreach ($errors as $error) { if ($error->code == 9) { // PCDATA Invalid char value $document = preg_replace('/[\\x00-\\x1f\\x80-\\xff]/', '', $document); // Reparse the document try { $xmlDocument = @new SimpleXMLElement($document); } catch (Exception $e) { } break; } } } if ($xmlDocument === null && $followHtml) { // Not sure if this is ideal, but let's just blindly assume // this is an HTML document and try to parse any rel=alternate // links $links = FeedParser::extractLinks($url, $document); if (count($links) > 0) { if ($parser = FeedParser::create($links[0]->url, $followHtml, $recursions + 1)) { $parser->links[] = $url; if ($url != $originalUrl) { $parser->links[] = $originalUrl; } } return $parser; } } if ($xmlDocument) { // Valid XML. See if we can determine the type of content $rootName = $xmlDocument->getName(); if ($rootName == 'feed') { $parser = new AtomParser(); } else { if ($rootName == 'rss' || $rootName == 'RDF') { $parser = new RssParser(); } else { if ($followHtml && strcasecmp($rootName, 'html') === 0) { // HTML document. See if we can find a feed by parsing the HTML $links = FeedParser::extractLinks($url, $document); if (count($links) > 0) { if ($parser = FeedParser::create($url, $followHtml, $recursions + 1)) { $parser->links[] = $url; if ($url != $originalUrl) { $parser->links[] = $originalUrl; } } return $parser; } } } } } if ($parser) { $parser->url = $url; $parser->document = $document; $parser->xml = $xmlDocument; } return $parser; }