Example #1
0
 /**
  * Get the relevant content with the list of potential attributes
  *
  * @access public
  */
 public function parseContentWithCandidates()
 {
     $dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">' . $this->html);
     $xpath = new DOMXPath($dom);
     // Try to lookup in each tag
     foreach ($this->candidatesAttributes as $candidate) {
         Logging::setMessage(get_called_class() . ' Try this candidate: "' . $candidate . '"');
         $nodes = $xpath->query('//*[(contains(@class, "' . $candidate . '") or @id="' . $candidate . '") and not (contains(@class, "nav") or contains(@class, "page"))]');
         if ($nodes !== false && $nodes->length > 0) {
             $this->content = $dom->saveXML($nodes->item(0));
             Logging::setMessage(get_called_class() . ' Find candidate "' . $candidate . '" (' . strlen($this->content) . ' bytes)');
             break;
         }
     }
     // Try to fetch <article/>
     if (!$this->content) {
         $nodes = $xpath->query('//article');
         if ($nodes !== false && $nodes->length > 0) {
             $this->content = $dom->saveXML($nodes->item(0));
             Logging::setMessage(get_called_class() . ' Find <article/> tag (' . strlen($this->content) . ' bytes)');
         }
     }
     if (strlen($this->content) < 50) {
         Logging::setMessage(get_called_class() . ' No enought content fetched, get the full body');
         $this->content = $dom->saveXML($dom->firstChild);
     }
     Logging::setMessage(get_called_class() . ' Strip garbage');
     $this->stripGarbage();
 }
Example #2
0
 /**
  * Discover the feed url inside a HTML document and download the feed
  *
  * @access public
  * @return boolean
  */
 public function discover()
 {
     if (!$this->content) {
         return false;
     }
     Logging::setMessage(get_called_class() . ': Try to discover a subscription');
     $dom = XmlParser::getHtmlDocument($this->content);
     $xpath = new DOMXPath($dom);
     $queries = array('//link[@type="application/rss+xml"]', '//link[@type="application/atom+xml"]');
     foreach ($queries as $query) {
         $nodes = $xpath->query($query);
         if ($nodes->length !== 0) {
             $link = $nodes->item(0)->getAttribute('href');
             if (!empty($link)) {
                 // Relative links
                 if (strpos($link, 'http') !== 0) {
                     if ($link[0] === '/') {
                         $link = substr($link, 1);
                     }
                     if ($this->url[strlen($this->url) - 1] !== '/') {
                         $this->url .= '/';
                     }
                     $link = $this->url . $link;
                 }
                 Logging::setMessage(get_called_class() . ': Find subscription link: ' . $link);
                 $this->download($link);
                 return true;
             }
         }
     }
     return false;
 }