Exemple #1
0
 /**
  * Parse the document.
  *
  * @return \PicoFeed\Parser\Feed
  */
 public function execute()
 {
     Logger::setMessage(get_called_class() . ': begin parsing');
     $xml = XmlParser::getSimpleXml($this->content);
     if ($xml === false) {
         Logger::setMessage(get_called_class() . ': Applying XML workarounds');
         $this->content = Filter::normalizeData($this->content);
         $xml = XmlParser::getSimpleXml($this->content);
         if ($xml === false) {
             Logger::setMessage(get_called_class() . ': XML parsing error');
             Logger::setMessage(XmlParser::getErrors());
             throw new MalformedXmlException('XML parsing error');
         }
     }
     $this->used_namespaces = $xml->getNamespaces(true);
     $xml = $this->registerSupportedNamespaces($xml);
     $feed = new Feed();
     $this->findFeedUrl($xml, $feed);
     $this->checkFeedUrl($feed);
     $this->findSiteUrl($xml, $feed);
     $this->checkSiteUrl($feed);
     $this->findFeedTitle($xml, $feed);
     $this->findFeedDescription($xml, $feed);
     $this->findFeedLanguage($xml, $feed);
     $this->findFeedId($xml, $feed);
     $this->findFeedDate($xml, $feed);
     $this->findFeedLogo($xml, $feed);
     $this->findFeedIcon($xml, $feed);
     foreach ($this->getItemsTree($xml) as $entry) {
         $entry = $this->registerSupportedNamespaces($entry);
         $item = new Item();
         $item->xml = $entry;
         $item->namespaces = $this->used_namespaces;
         $this->findItemAuthor($xml, $entry, $item);
         $this->findItemUrl($entry, $item);
         $this->checkItemUrl($feed, $item);
         $this->findItemTitle($entry, $item);
         $this->findItemContent($entry, $item);
         // Id generation can use the item url/title/content (order is important)
         $this->findItemId($entry, $item, $feed);
         $this->findItemDate($entry, $item, $feed);
         $this->findItemEnclosure($entry, $item, $feed);
         $this->findItemLanguage($entry, $item, $feed);
         $this->itemPostProcessor->execute($feed, $item);
         $feed->items[] = $item;
     }
     Logger::setMessage(get_called_class() . PHP_EOL . $feed);
     return $feed;
 }
Exemple #2
0
 /**
  * Constructor
  *
  * @access public
  * @param  string  $content          Feed content
  * @param  string  $http_encoding    HTTP encoding (headers)
  * @param  string  $fallback_url     Fallback url when the feed provide relative or broken url
  */
 public function __construct($content, $http_encoding = '', $fallback_url = '')
 {
     $this->date = new DateParser();
     $this->fallback_url = $fallback_url;
     $xml_encoding = XmlParser::getEncodingFromXmlTag($content);
     // Strip XML tag to avoid multiple encoding/decoding in the next XML processing
     $this->content = Filter::stripXmlTag($content);
     // Encode everything in UTF-8
     Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $http_encoding . '" ; XML Encoding "' . $xml_encoding . '"');
     $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
     // Workarounds
     $this->content = Filter::normalizeData($this->content);
 }
Exemple #3
0
 public function testNormalizeData()
 {
     // invalid data link escape control character
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#x10; text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#16; text</xml>"));
     // invalid unit seperator control character (lower and upper case)
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#x1f; text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#x1F; text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#31; text</xml>"));
     /*
      * Do not test invalid multibyte characters. The output depends on php
      * version and character.
      *
      * php 5.3: always null
      * php >5.3: sometime null, sometimes the stripped string
      */
     // invalid backspace control character + valid multibyte character
     $this->assertEquals('<xml>“random“ text</xml>', Filter::normalizeData("<xml>“random“ text</xml>"));
     $this->assertEquals('<xml>&#x201C;random&#x201C; text</xml>', Filter::normalizeData("<xml>&#x201C;random&#x201C;&#x08; text</xml>"));
     $this->assertEquals('<xml>&#8220;random&#8220; text</xml>', Filter::normalizeData("<xml>&#8220;random&#8220;&#08; text</xml>"));
     // do not convert valid entities to utf-8 character
     $this->assertEquals('<xml attribute="&#34;value&#34;">random text</xml>', Filter::normalizeData('<xml attribute="&#34;value&#34;">random text</xml>'));
     $this->assertEquals('<xml attribute="&#x22;value&#x22;">random text</xml>', Filter::normalizeData('<xml attribute="&#x22;value&#x22;">random text</xml>'));
 }