/**
  * Execute Item Processor
  *
  * @access public
  * @param  Feed $feed
  * @param  Item $item
  * @return bool
  */
 public function execute(Feed $feed, Item $item)
 {
     if ($this->config->getContentFiltering(true)) {
         $filter = Filter::html($item->getContent(), $feed->getSiteUrl());
         $filter->setConfig($this->config);
         $item->setContent($filter->execute());
     } else {
         Logger::setMessage(get_called_class() . ': Content filtering disabled');
     }
 }
Ejemplo n.º 2
0
function addProxyToTags($html, $website, $proxy_images, $cloak_referrer)
{
    if ($html === '' || !$cloak_referrer && !$proxy_images || !$cloak_referrer && $proxy_images && !Helper\isSecureConnection()) {
        // only proxy enabled, but not connected via HTTPS
        return $html;
    }
    $config = new PicoFeedConfig();
    $config->setFilterImageProxyUrl('?action=proxy&url=%s');
    if (!$cloak_referrer && $proxy_images) {
        // image proxy mode only: https links do not need to be proxied, since
        // they do not trigger mixed content warnings.
        $config->setFilterImageProxyProtocol('http');
    } elseif (!$proxy_images && $cloak_referrer && Helper\isSecureConnection()) {
        // cloaking mode only: if a request from a HTTPS connection to a HTTP
        // connection is made, the referrer will be omitted by the browser.
        // Only the referrer for HTTPS to HTTPs requests needs to be cloaked.
        $config->setFilterImageProxyProtocol('https');
    }
    $filter = Filter::html($html, $website);
    $filter->setConfig($config);
    return $filter->execute();
}
Ejemplo n.º 3
0
 /**
  * Find the item title.
  *
  * @param SimpleXMLElement      $entry Feed item
  * @param \PicoFeed\Parser\Item $item  Item object
  */
 public function findItemTitle(SimpleXMLElement $entry, Item $item)
 {
     $title = XmlParser::getXPathResult($entry, 'title');
     $item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url;
 }
Ejemplo n.º 4
0
 /**
  * Parse the document.
  *
  * @return \PicoFeed\Parser\Feed
  */
 public function execute()
 {
     Logger::setMessage(get_called_class() . ': begin parsing');
     $xml = XmlParser::getSimpleXml($this->content);
     if ($xml === false) {
         Logger::setMessage(get_called_class() . ': Applying XML workarounds');
         $this->content = Filter::normalizeData($this->content);
         $xml = XmlParser::getSimpleXml($this->content);
         if ($xml === false) {
             Logger::setMessage(get_called_class() . ': XML parsing error');
             Logger::setMessage(XmlParser::getErrors());
             throw new MalformedXmlException('XML parsing error');
         }
     }
     $this->used_namespaces = $xml->getNamespaces(true);
     $xml = $this->registerSupportedNamespaces($xml);
     $feed = new Feed();
     $this->findFeedUrl($xml, $feed);
     $this->checkFeedUrl($feed);
     $this->findSiteUrl($xml, $feed);
     $this->checkSiteUrl($feed);
     $this->findFeedTitle($xml, $feed);
     $this->findFeedDescription($xml, $feed);
     $this->findFeedLanguage($xml, $feed);
     $this->findFeedId($xml, $feed);
     $this->findFeedDate($xml, $feed);
     $this->findFeedLogo($xml, $feed);
     $this->findFeedIcon($xml, $feed);
     foreach ($this->getItemsTree($xml) as $entry) {
         $entry = $this->registerSupportedNamespaces($entry);
         $item = new Item();
         $item->xml = $entry;
         $item->namespaces = $this->used_namespaces;
         $this->findItemAuthor($xml, $entry, $item);
         $this->findItemUrl($entry, $item);
         $this->checkItemUrl($feed, $item);
         $this->findItemTitle($entry, $item);
         $this->findItemContent($entry, $item);
         // Id generation can use the item url/title/content (order is important)
         $this->findItemId($entry, $item, $feed);
         $this->findItemDate($entry, $item, $feed);
         $this->findItemEnclosure($entry, $item, $feed);
         $this->findItemLanguage($entry, $item, $feed);
         $this->itemPostProcessor->execute($feed, $item);
         $feed->items[] = $item;
     }
     Logger::setMessage(get_called_class() . PHP_EOL . $feed);
     return $feed;
 }
Ejemplo n.º 5
0
 /**
  * Find the item title
  *
  * @access public
  * @param  SimpleXMLElement          $entry   Feed item
  * @param  \PicoFeed\Parser\Item     $item    Item object
  */
 public function findItemTitle(SimpleXMLElement $entry, Item $item)
 {
     $item->title = Filter::stripWhiteSpace((string) $entry->title);
     if (empty($item->title)) {
         $item->title = $item->url;
     }
 }
Ejemplo n.º 6
0
 public function testNormalizeData()
 {
     // invalid data link escape control character
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#x10; text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#16; text</xml>"));
     // invalid unit seperator control character (lower and upper case)
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#x1f; text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#x1F; text</xml>"));
     $this->assertEquals('<xml>random text</xml>', Filter::normalizeData("<xml>random&#31; text</xml>"));
     /*
      * Do not test invalid multibyte characters. The output depends on php
      * version and character.
      *
      * php 5.3: always null
      * php >5.3: sometime null, sometimes the stripped string
      */
     // invalid backspace control character + valid multibyte character
     $this->assertEquals('<xml>“random“ text</xml>', Filter::normalizeData("<xml>“random“ text</xml>"));
     $this->assertEquals('<xml>&#x201C;random&#x201C; text</xml>', Filter::normalizeData("<xml>&#x201C;random&#x201C;&#x08; text</xml>"));
     $this->assertEquals('<xml>&#8220;random&#8220; text</xml>', Filter::normalizeData("<xml>&#8220;random&#8220;&#08; text</xml>"));
     // do not convert valid entities to utf-8 character
     $this->assertEquals('<xml attribute="&#34;value&#34;">random text</xml>', Filter::normalizeData('<xml attribute="&#34;value&#34;">random text</xml>'));
     $this->assertEquals('<xml attribute="&#x22;value&#x22;">random text</xml>', Filter::normalizeData('<xml attribute="&#x22;value&#x22;">random text</xml>'));
 }
Ejemplo n.º 7
0
 /**
  * Convert the attribute list to html
  *
  * @access public
  * @param  array     $attributes    Attributes
  * @return string
  */
 public function toHtml(array $attributes)
 {
     $html = array();
     foreach ($attributes as $attribute => $value) {
         $html[] = sprintf('%s="%s"', $attribute, Filter::escape($value));
     }
     return implode(' ', $html);
 }
Ejemplo n.º 8
0
 /**
  * Find the item title.
  *
  * @param SimpleXMLElement      $entry Feed item
  * @param \PicoFeed\Parser\Item $item  Item object
  */
 public function findItemTitle(SimpleXMLElement $entry, Item $item)
 {
     $title = XmlParser::getXPathResult($entry, 'rss:title', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'title') ?: $entry->title;
     $item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($title)) ?: $item->getUrl());
 }
 public function testsetFilterImageProxyCallback()
 {
     $config = new Config();
     $config->setFilterImageProxyCallback(function ($image_url) {
         $key = hash_hmac('sha1', $image_url, 'secret');
         return 'https://mypublicproxy/' . $key . '/' . rawurlencode($image_url);
     });
     $f = Filter::html('<p>Image <img src="/image.png" alt="My Image"/></p>', 'http://foo');
     $f->setConfig($config);
     $this->assertEquals('<p>Image <img src="https://mypublicproxy/4924964043f3119b3cf2b07b1922d491bcc20092/' . rawurlencode('http://foo/image.png') . '" alt="My Image"/></p>', $f->execute());
 }
Ejemplo n.º 10
0
 /**
  * Normalize encoding and strip head tag.
  */
 public function prepareHtml()
 {
     $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
     $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
     $this->html = Filter::stripHeadTags($this->html);
     Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"');
 }
Ejemplo n.º 11
0
 /**
  * Parse the HTML content
  *
  * @access public
  * @return bool
  */
 public function parse()
 {
     if ($this->skip_processing) {
         return true;
     }
     if ($this->html) {
         $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
         // Encode everything in UTF-8
         Logger::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '" ; HTML Encoding "' . $html_encoding . '"');
         $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
         $this->html = Filter::stripHeadTags($this->html);
         Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->html) . ' bytes');
         $rules = $this->getRules();
         if (is_array($rules)) {
             Logger::setMessage(get_called_class() . ': Parse content with rules');
             $this->parseContentWithRules($rules);
         } else {
             Logger::setMessage(get_called_class() . ': Parse content with candidates');
             $this->parseContentWithCandidates();
         }
     } else {
         Logger::setMessage(get_called_class() . ': No content fetched');
     }
     Logger::setMessage(get_called_class() . ': Content length: ' . strlen($this->content) . ' bytes');
     Logger::setMessage(get_called_class() . ': Grabber done');
     return $this->content !== '';
 }
Ejemplo n.º 12
0
 /**
  * Find the item title.
  *
  * @param SimpleXMLElement      $entry Feed item
  * @param \PicoFeed\Parser\Item $item  Item object
  */
 public function findItemTitle(SimpleXMLElement $entry, Item $item)
 {
     $value = XmlParser::getXPathResult($entry, 'title');
     $item->setTitle(Filter::stripWhiteSpace(XmlParser::getValue($value)) ?: $item->getUrl());
 }
Ejemplo n.º 13
0
 /**
  * Parse tag content.
  *
  * @param resource $parser  XML parser
  * @param string   $content Tag content
  */
 public function dataTag($parser, $content)
 {
     // Replace &nbsp; with normal space
     $content = str_replace(" ", ' ', $content);
     $this->output .= Filter::escape($content);
 }