Exemplo n.º 1
0
 /**
  * Construct the final URL from location headers
  *
  * @access private
  * @param  array $headers List of HTTP response header
  */
 private function setEffectiveUrl($headers)
 {
     foreach ($headers as $header) {
         if (stripos($header, 'Location') === 0) {
             list(, $value) = explode(': ', $header);
             $this->url = Url::resolve($value, $this->url);
         }
     }
 }
Exemplo n.º 2
0
 /**
  * Check if the item url is correct
  *
  * @access public
  * @param  Feed    $feed          Feed object
  * @param  Item    $item          Item object
  */
 public function checkItemUrl(Feed $feed, Item $item)
 {
     $item->url = Url::resolve($item->getUrl(), $feed->getSiteUrl());
 }
Exemplo n.º 3
0
 /**
  * Find the item enclosure.
  *
  * @param SimpleXMLElement      $entry Feed item
  * @param \PicoFeed\Parser\Item $item  Item object
  * @param \PicoFeed\Parser\Feed $feed  Feed object
  */
 public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
 {
     if (isset($entry->enclosure)) {
         $enclosure_url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'enclosure/@url');
         $enclosure_type = XmlParser::getXPathResult($entry, 'enclosure/@type');
         $item->enclosure_url = Url::resolve((string) current($enclosure_url), $feed->getSiteUrl());
         $item->enclosure_type = (string) current($enclosure_type);
     }
 }
Exemplo n.º 4
0
 /**
  * Handle HTTP redirects
  *
  * @param string $location Redirected URL
  *
  * @return array
  */
 private function handleRedirection($location)
 {
     $nb_redirects = 0;
     $result = array();
     $this->url = Url::resolve($location, $this->url);
     $this->body = '';
     $this->body_length = 0;
     $this->response_headers = array();
     $this->response_headers_count = 0;
     while (true) {
         ++$nb_redirects;
         if ($nb_redirects >= $this->max_redirects) {
             throw new MaxRedirectException('Maximum number of redirections reached');
         }
         $result = $this->doRequest();
         if ($this->isRedirection($result['status'])) {
             $this->url = Url::resolve($result['headers']['Location'], $this->url);
             $this->body = '';
             $this->body_length = 0;
             $this->response_headers = array();
             $this->response_headers_count = 0;
         } else {
             break;
         }
     }
     return $result;
 }
Exemplo n.º 5
0
 /**
  * Find the item enclosure
  *
  * @access public
  * @param  SimpleXMLElement          $entry   Feed item
  * @param  \PicoFeed\Parser\Item     $item    Item object
  * @param  \PicoFeed\Parser\Feed     $feed    Feed object
  */
 public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
 {
     if (isset($entry->enclosure)) {
         $item->enclosure_url = XmlParser::getNamespaceValue($entry->enclosure, $this->namespaces, 'origEnclosureLink');
         if (empty($item->enclosure_url)) {
             $item->enclosure_url = isset($entry->enclosure['url']) ? (string) $entry->enclosure['url'] : '';
         }
         $item->enclosure_type = isset($entry->enclosure['type']) ? (string) $entry->enclosure['type'] : '';
         $item->enclosure_url = Url::resolve($item->enclosure_url, $feed->getSiteUrl());
     }
 }
Exemplo n.º 6
0
 /**
  * Find the item enclosure
  *
  * @access public
  * @param  SimpleXMLElement   $entry   Feed item
  * @param  \PicoFeed\Parser\Item     $item    Item object
  * @param  \PicoFeed\Parser\Feed     $feed    Feed object
  */
 public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
 {
     $enclosure = $this->findLink($entry, 'enclosure');
     if ($enclosure) {
         $item->enclosure_url = Url::resolve((string) $enclosure['href'], $feed->getSiteUrl());
         $item->enclosure_type = (string) $enclosure['type'];
     }
 }
Exemplo n.º 7
0
 /**
  * Convert all relative links to absolute url
  *
  * @access public
  * @param  string    $tag           Tag name
  * @param  string    $attribute     Attribute name
  * @param  string    $value         Attribute value
  * @return boolean
  */
 public function rewriteAbsoluteUrl($tag, $attribute, &$value)
 {
     if ($this->isResource($attribute)) {
         $value = Url::resolve($value, $this->website);
     }
     return true;
 }
Exemplo n.º 8
0
 /**
  * Execute the scraper.
  */
 public function execute($pageContent = '', $recursionDepth = 0)
 {
     $this->html = '';
     $this->encoding = '';
     $this->content = '';
     $this->download();
     $this->prepareHtml();
     $parser = $this->getParser();
     if ($parser !== null) {
         $maxRecursions = $this->config->getMaxRecursions();
         if (!isset($maxRecursions)) {
             $maxRecursions = 25;
         }
         $pageContent .= $parser->execute();
         // check if there is a link to next page and recursively get content (max 25 pages)
         if (($nextLink = $parser->findNextLink()) !== null && $recursionDepth < $maxRecursions) {
             $nextLink = Url::resolve($nextLink, $this->url);
             $this->setUrl($nextLink);
             $this->execute($pageContent, $recursionDepth + 1);
         } else {
             $this->content = $pageContent;
         }
         Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->content) . ' bytes');
     }
 }
Exemplo n.º 9
0
 public function testResolve()
 {
     // relative link
     $this->assertEquals('http://miniflux.net/assets/img/favicon.png', Url::resolve('assets/img/favicon.png', 'http://miniflux.net'));
     // relative link + HTTPS
     $this->assertEquals('https://miniflux.net/assets/img/favicon.png', Url::resolve('assets/img/favicon.png', 'https://miniflux.net'));
     // absolute link
     $this->assertEquals('http://miniflux.net/assets/img/favicon.png', Url::resolve('/assets/img/favicon.png', 'http://miniflux.net'));
     // absolute link + HTTPS
     $this->assertEquals('https://miniflux.net/assets/img/favicon.png', Url::resolve('/assets/img/favicon.png', 'https://miniflux.net'));
     // Protocol relative link
     $this->assertEquals('http://google.com/assets/img/favicon.png', Url::resolve('//google.com/assets/img/favicon.png', 'http://miniflux.net'));
     // Protocol relative link + HTTPS
     $this->assertEquals('https://google.com/assets/img/favicon.png', Url::resolve('//google.com/assets/img/favicon.png', 'https://miniflux.net'));
     // URL same fqdn
     $this->assertEquals('http://miniflux.net/assets/img/favicon.png', Url::resolve('http://miniflux.net/assets/img/favicon.png', 'https://miniflux.net'));
     // URL different fqdn
     $this->assertEquals('https://www.google.com/assets/img/favicon.png', Url::resolve('https://www.google.com/assets/img/favicon.png', 'https://miniflux.net'));
     // HTTPS URL
     $this->assertEquals('https://miniflux.net/assets/img/favicon.png', Url::resolve('https://miniflux.net/assets/img/favicon.png', 'https://miniflux.net'));
     // empty string on missing website parameter
     $this->assertEquals('', Url::resolve('favicon.png', ''));
     // website only on missing icon parameter
     $this->assertEquals('https://miniflux.net/', Url::resolve('', 'https://miniflux.net'));
     // empty string on missing website and icon parameter
     $this->assertEquals('', Url::resolve('', ''));
 }
Exemplo n.º 10
0
 /**
  * Get the icon link for a website.
  *
  * @param string $website_link
  *        	URL
  * @param string $favicon_link
  *        	optional URL
  *        	
  * @return string
  */
 public function find($website_link, $favicon_link = '')
 {
     $website = new Url($website_link);
     if ($favicon_link !== '') {
         $icons = array($favicon_link);
     } else {
         $icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent());
         $icons[] = $website->getBaseUrl('/favicon.ico');
     }
     foreach ($icons as $icon_link) {
         $icon_link = Url::resolve($icon_link, $website);
         $resource = $this->download($icon_link);
         $this->content = $resource->getContent();
         $this->content_type = $resource->getContentType();
         if ($this->content !== '') {
             return $icon_link;
         } elseif ($favicon_link !== '') {
             return $this->find($website_link);
         }
     }
     return '';
 }
Exemplo n.º 11
0
 /**
  * Find the item enclosure.
  *
  * @param SimpleXMLElement      $entry Feed item
  * @param \PicoFeed\Parser\Item $item  Item object
  * @param \PicoFeed\Parser\Feed $feed  Feed object
  */
 public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
 {
     if (isset($entry->enclosure)) {
         $type = XmlParser::getXPathResult($entry, 'enclosure/@type');
         $url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'enclosure/@url');
         $item->setEnclosureUrl(Url::resolve(XmlParser::getValue($url), $feed->getSiteUrl()));
         $item->setEnclosureType(XmlParser::getValue($type));
     }
 }
Exemplo n.º 12
0
 public function testResolve()
 {
     // relative link
     $this->assertEquals('http://miniflux.net/assets/img/favicon.png', Url::resolve('assets/img/favicon.png', 'http://miniflux.net'));
     // relative link + HTTPS
     $this->assertEquals('https://miniflux.net/assets/img/favicon.png', Url::resolve('assets/img/favicon.png', 'https://miniflux.net'));
     // absolute link
     $this->assertEquals('http://miniflux.net/assets/img/favicon.png', Url::resolve('/assets/img/favicon.png', 'http://miniflux.net'));
     // absolute link + HTTPS
     $this->assertEquals('https://miniflux.net/assets/img/favicon.png', Url::resolve('/assets/img/favicon.png', 'https://miniflux.net'));
     // Protocol relative link
     $this->assertEquals('http://google.com/assets/img/favicon.png', Url::resolve('//google.com/assets/img/favicon.png', 'http://miniflux.net'));
     // Protocol relative link + HTTPS
     $this->assertEquals('https://google.com/assets/img/favicon.png', Url::resolve('//google.com/assets/img/favicon.png', 'https://miniflux.net'));
     // URL same fqdn
     $this->assertEquals('http://miniflux.net/assets/img/favicon.png', Url::resolve('http://miniflux.net/assets/img/favicon.png', 'https://miniflux.net'));
     // URL different fqdn
     $this->assertEquals('https://www.google.com/assets/img/favicon.png', Url::resolve('https://www.google.com/assets/img/favicon.png', 'https://miniflux.net'));
     // HTTPS URL
     $this->assertEquals('https://miniflux.net/assets/img/favicon.png', Url::resolve('https://miniflux.net/assets/img/favicon.png', 'https://miniflux.net'));
     // empty string on missing website parameter
     $this->assertEquals('', Url::resolve('favicon.png', ''));
     // website only on missing icon parameter
     $this->assertEquals('https://miniflux.net/', Url::resolve('', 'https://miniflux.net'));
     // empty string on missing website and icon parameter
     $this->assertEquals('', Url::resolve('', ''));
     // Test no-ascii paths
     $this->assertEquals('http://lesjoiesducode.fr/post/125336534020/quand-la-page-doit-%C3%AAtre-pixel-perfect', Url::resolve('http://lesjoiesducode.fr/post/125336534020/quand-la-page-doit-être-pixel-perfect', 'http://lesjoiesducode.fr/post/125336534020'));
 }