/** * Find feed urls inside a HTML document. * * @param string $url Website url * @param string $html HTML content * * @return array List of feed links */ public function find($url, $html) { Logger::setMessage(get_called_class() . ': Try to discover subscriptions'); $dom = XmlParser::getHtmlDocument($html); $xpath = new DOMXPath($dom); $links = array(); $queries = array('//link[@type="application/rss+xml"]', '//link[@type="application/atom+xml"]'); foreach ($queries as $query) { $nodes = $xpath->query($query); foreach ($nodes as $node) { $link = $node->getAttribute('href'); if (!empty($link)) { $feedUrl = new Url($link); $siteUrl = new Url($url); $links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : ''); } } } Logger::setMessage(get_called_class() . ': ' . implode(', ', $links)); return $links; }
/** * Get the icon link for a website. * * @param string $website_link URL * @param string $favicon_link optional URL * * @return string */ public function find($website_link, $favicon_link = '') { $website = new Url($website_link); if ($favicon_link !== '') { $icons = array($favicon_link); } else { $icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent()); $icons[] = $website->getBaseUrl('/favicon.ico'); } foreach ($icons as $icon_link) { $icon_link = Url::resolve($icon_link, $website); $resource = $this->download($icon_link); $this->content = $resource->getContent(); $this->content_type = $resource->getContentType(); if ($this->content !== '') { return $icon_link; } elseif ($favicon_link !== '') { return $this->find($website_link); } } return ''; }