Ejemplo n.º 1
0
 /**
  * Constructor.
  *
  * @param string $content       Feed content
  * @param string $http_encoding HTTP encoding (headers)
  * @param string $fallback_url  Fallback url when the feed provide relative or broken url
  */
 public function __construct($content, $http_encoding = '', $fallback_url = '')
 {
     $this->fallback_url = $fallback_url;
     $xml_encoding = XmlParser::getEncodingFromXmlTag($content);
     // Strip XML tag to avoid multiple encoding/decoding in the next XML processing
     $this->content = Filter::stripXmlTag($content);
     // Encode everything in UTF-8
     Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $http_encoding . '" ; XML Encoding "' . $xml_encoding . '"');
     $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
     $this->itemPostProcessor = new ItemPostProcessor($this->config);
     $this->itemPostProcessor->register(new ContentGeneratorProcessor($this->config));
     $this->itemPostProcessor->register(new ContentFilterProcessor($this->config));
 }
Ejemplo n.º 2
0
 /**
  * Constructor
  *
  * @access public
  * @param  string  $content          Feed content
  * @param  string  $http_encoding    HTTP encoding (headers)
  * @param  string  $fallback_url     Fallback url when the feed provide relative or broken url
  */
 public function __construct($content, $http_encoding = '', $fallback_url = '')
 {
     $this->date = new DateParser();
     $this->fallback_url = $fallback_url;
     $xml_encoding = XmlParser::getEncodingFromXmlTag($content);
     // Strip XML tag to avoid multiple encoding/decoding in the next XML processing
     $this->content = Filter::stripXmlTag($content);
     // Encode everything in UTF-8
     Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $http_encoding . '" ; XML Encoding "' . $xml_encoding . '"');
     $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
     // Workarounds
     $this->content = Filter::normalizeData($this->content);
 }
Ejemplo n.º 3
0
 /**
  * Normalize encoding and strip head tag.
  */
 public function prepareHtml()
 {
     $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
     $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
     $this->html = Filter::stripHeadTags($this->html);
     Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"');
 }
Ejemplo n.º 4
0
 /**
  * Parse the HTML content
  *
  * @access public
  * @return bool
  */
 public function parse()
 {
     if ($this->skip_processing) {
         return true;
     }
     if ($this->html) {
         $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
         // Encode everything in UTF-8
         Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"');
         $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
         $this->html = Filter::stripHeadTags($this->html);
         Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->html) . ' bytes');
         $rules = $this->getRules();
         if (is_array($rules)) {
             Logger::setMessage(get_called_class() . ': Parse content with rules');
             $this->parseContentWithRules($rules);
         } else {
             Logger::setMessage(get_called_class() . ': Parse content with candidates');
             $this->parseContentWithCandidates();
         }
     } else {
         Logger::setMessage(get_called_class() . ': No content fetched');
     }
     Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->content) . ' bytes');
     Logger::setMessage(get_called_class() . ': Grabber done');
     return $this->content !== '';
 }