Example #1
  * Construct the final URL from location headers.
  * @param array $headers List of HTTP response header
 private function setEffectiveUrl($headers)
     foreach ($headers as $header) {
         if (stripos($header, 'Location') === 0) {
             list(, $value) = explode(': ', $header);
             $this->url = Url::resolve($value, $this->url);
Example #2
  * Find feed urls inside a HTML document.
  * @param string $url  Website url
  * @param string $html HTML content
  * @return array List of feed links
 public function find($url, $html)
     Logger::setMessage(get_called_class() . ': Try to discover subscriptions');
     $dom = XmlParser::getHtmlDocument($html);
     $xpath = new DOMXPath($dom);
     $links = array();
     $queries = array('//link[@type="application/rss+xml"]', '//link[@type="application/atom+xml"]');
     foreach ($queries as $query) {
         $nodes = $xpath->query($query);
         foreach ($nodes as $node) {
             $link = $node->getAttribute('href');
             if (!empty($link)) {
                 $feedUrl = new Url($link);
                 $siteUrl = new Url($url);
                 $links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : '');
     Logger::setMessage(get_called_class() . ': ' . implode(', ', $links));
     return $links;
Example #3
  * Check if the item url is correct.
  * @param Feed $feed Feed object
  * @param Item $item Item object
 public function checkItemUrl(Feed $feed, Item $item)
     $item->url = Url::resolve($item->getUrl(), $feed->getSiteUrl());
Example #4
  * Handle manually redirections when there is an open base dir restriction.
  * @param string $location Redirected URL
  * @return array
 private function handleRedirection($location)
     $nb_redirects = 0;
     $result = array();
     $this->url = Url::resolve($location, $this->url);
     $this->body = '';
     $this->body_length = 0;
     $this->response_headers = array();
     $this->response_headers_count = 0;
     while (true) {
         if ($nb_redirects >= $this->max_redirects) {
             throw new MaxRedirectException('Maximum number of redirections reached');
         $result = $this->doRequest(false);
         if ($this->isRedirection($result['status'])) {
             $this->url = Url::resolve($result['headers']['Location'], $this->url);
             $this->body = '';
             $this->body_length = 0;
             $this->response_headers = array();
             $this->response_headers_count = 0;
         } else {
     return $result;
Example #5
  * Find the item enclosure.
  * @param SimpleXMLElement      $entry Feed item
  * @param \AsteFeed\Parser\Item $item  Item object
  * @param \AsteFeed\Parser\Feed $feed  Feed object
 public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
     $enclosure = $this->findLink($entry, 'enclosure');
     if ($enclosure) {
         $item->enclosure_url = Url::resolve((string) $enclosure['href'], $feed->getSiteUrl());
         $item->enclosure_type = (string) $enclosure['type'];
Example #6
  * Get the parser.
  * @return ParserInterface
 public function getParser()
     $ruleLoader = new RuleLoader($this->config);
     $rules = $ruleLoader->getRules($this->url);
     if (!empty($rules['grabber'])) {
         Logger::setMessage(get_called_class() . ': Parse content with rules');
         foreach ($rules['grabber'] as $pattern => $rule) {
             $url = new Url($this->url);
             $sub_url = $url->getFullPath();
             if (preg_match($pattern, $sub_url)) {
                 Logger::setMessage(get_called_class() . ': Matched url ' . $sub_url);
                 return new RuleParser($this->html, $rule);
     } elseif ($this->enableCandidateParser) {
         Logger::setMessage(get_called_class() . ': Parse content with candidates');
         return new CandidateParser($this->html);
Example #7
  * Get the icon link for a website.
  * @param string $website_link URL
  * @param string $favicon_link optional URL
  * @return string
 public function find($website_link, $favicon_link = '')
     $website = new Url($website_link);
     if ($favicon_link !== '') {
         $icons = array($favicon_link);
     } else {
         $icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent());
         $icons[] = $website->getBaseUrl('/favicon.ico');
     foreach ($icons as $icon_link) {
         $icon_link = Url::resolve($icon_link, $website);
         $resource = $this->download($icon_link);
         $this->content = $resource->getContent();
         $this->content_type = $resource->getContentType();
         if ($this->content !== '') {
             return $icon_link;
         } elseif ($favicon_link !== '') {
             return $this->find($website_link);
     return '';
Example #8
  * Find the item enclosure.
  * @param SimpleXMLElement      $entry Feed item
  * @param \AsteFeed\Parser\Item $item  Item object
  * @param \AsteFeed\Parser\Feed $feed  Feed object
 public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
     if (isset($entry->enclosure)) {
         $enclosure_url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'enclosure/@url');
         $enclosure_type = XmlParser::getXPathResult($entry, 'enclosure/@type');
         $item->enclosure_url = Url::resolve((string) current($enclosure_url), $feed->getSiteUrl());
         $item->enclosure_type = (string) current($enclosure_type);
Example #9
  * Convert all relative links to absolute url.
  * @param string $tag       Tag name
  * @param string $attribute Attribute name
  * @param string $value     Attribute value
  * @return bool
 public function rewriteAbsoluteUrl($tag, $attribute, &$value)
     if ($this->isResource($attribute)) {
         $value = Url::resolve($value, $this->website);
     return true;
Example #10
  * Called after XML parsing.
  * @param string $content the content that should be filtered
 public function filterRules($content)
     // the constructor should require a config, then this if can be removed
     if ($this->config === null) {
         $config = new Config();
     } else {
         $config = $this->config;
     $loader = new RuleLoader($config);
     $rules = $loader->getRules($this->website);
     $url = new Url($this->website);
     $sub_url = $url->getFullPath();
     if (isset($rules['filter'])) {
         foreach ($rules['filter'] as $pattern => $rule) {
             if (preg_match($pattern, $sub_url)) {
                 foreach ($rule as $search => $replace) {
                     $content = preg_replace($search, $replace, $content);
     return $content;