public function testOverrideFilters() { $data = '<iframe src="http://www.kickstarter.com/projects/lefnire/habitrpg-mobile/widget/video.html" height="480" width="640" frameborder="0"></iframe>'; $f = new Filter($data, 'http://blabla'); $f->setIframeWhitelist(array('http://www.kickstarter.com')); $this->assertEquals($data, $f->execute()); $data = '<iframe src="http://www.youtube.com/bla" height="480" width="640" frameborder="0"></iframe>'; $f = new Filter($data, 'http://blabla'); $f->setIframeWhitelist(array('http://www.kickstarter.com')); $this->assertEmpty($f->execute()); }
/** * Find the item enclosure * * @access public * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Item $item Item object * @param \PicoFeed\Feed $feed Feed object */ public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed) { foreach ($entry->link as $link) { if ((string) $link['rel'] === 'enclosure') { $item->enclosure_url = (string) $link['href']; $item->enclosure_type = (string) $link['type']; if (Filter::isRelativePath($item->enclosure_url)) { $item->enclosure_url = Filter::getAbsoluteUrl($item->enclosure_url, $feed->url); } break; } } }
/** * Parse the HTML content * * @access public * @return bool */ public function parse() { if ($this->html) { Logging::setMessage(get_called_class() . ' Fix encoding'); Logging::setMessage(get_called_class() . ': HTTP Encoding "' . $this->encoding . '"'); $this->html = Filter::stripHeadTags($this->html); $this->html = Encoding::convert($this->html, $this->encoding); Logging::setMessage(get_called_class() . ' Content length: ' . strlen($this->html) . ' bytes'); $rules = $this->getRules(); if (is_array($rules)) { Logging::setMessage(get_called_class() . ' Parse content with rules'); $this->parseContentWithRules($rules); } else { Logging::setMessage(get_called_class() . ' Parse content with candidates'); $this->parseContentWithCandidates(); } } else { Logging::setMessage(get_called_class() . ' No content fetched'); } Logging::setMessage(get_called_class() . ' Content length: ' . strlen($this->content) . ' bytes'); Logging::setMessage(get_called_class() . ' Grabber done'); return $this->content !== ''; }
/** * Find the item enclosure * * @access public * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Item $item Item object * @param \PicoFeed\Feed $feed Feed object */ public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed) { if (isset($entry->enclosure)) { $item->enclosure_url = $this->getNamespaceValue($entry->enclosure, $this->namespaces, 'origEnclosureLink'); if (empty($item->enclosure_url)) { $item->enclosure_url = isset($entry->enclosure['url']) ? (string) $entry->enclosure['url'] : ''; } $item->enclosure_type = isset($entry->enclosure['type']) ? (string) $entry->enclosure['type'] : ''; if (Filter::isRelativePath($item->enclosure_url)) { $item->enclosure_url = Filter::getAbsoluteUrl($item->enclosure_url, $feed->url); } } }
/** * Get the first XML tag * * @access public * @param string $data Feed content * @return string */ public function getFirstTag($data) { // Strip HTML comments (max of 5,000 characters long to prevent crashing) $data = preg_replace('/<!--(.{0,5000}?)-->/Uis', '', $data); /* Strip Doctype: * Doctype needs to be within the first 100 characters. (Ideally the first!) * If it's not found by then, we need to stop looking to prevent PREG * from reaching max backtrack depth and crashing. */ $data = preg_replace('/^.{0,100}<!DOCTYPE([^>]*)>/Uis', '', $data); // Strip <?xml version.... $data = Filter::stripXmlTag($data); // Find the first tag $open_tag = strpos($data, '<'); $close_tag = strpos($data, '>'); return substr($data, $open_tag, $close_tag); }