/** * @dataProvider firefoxtests */ public function test_from_file($data) { $locator = new SimplePie_Locator($data, 0, null, 'MockSimplePie_File', false); $expected = SimplePie_Misc::get_element('link', $data->body); $feed = $locator->find(SIMPLEPIE_LOCATOR_ALL, $all); $this->assertFalse($locator->is_feed($data), 'HTML document not be a feed itself'); $this->assertInstanceOf('MockSimplePie_File', $feed); $expected = array_map(array(get_class(), 'map_url_attrib'), $expected); $success = array_filter($expected, array(get_class(), 'filter_success')); $found = array_map(array(get_class(), 'map_url_file'), $all); $this->assertEquals($success, $found); }
function replace_urls($data, $tag, $attributes) { //This seems to do nothing at all!? if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) { $elements = SimplePie_Misc::get_element($tag, $data); foreach ($elements as $element) { if (is_array($attributes)) { foreach ($attributes as $attribute) { if (isset($element['attribs'][$attribute]['data'])) { $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); $new_element = SimplePie_Misc::element_implode($element); $data = str_replace($element['full'], $new_element, $data); $element['full'] = $new_element; } } } elseif (isset($element['attribs'][$attributes]['data'])) { $element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base); $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); } } } return $data; }
function result() { if ($this->data['file']->url != 'http://diveintomark.org/tests/client/autodiscovery/') { parent::result(); } static $done = array(); $links = SimplePie_Misc::get_element('link', $this->data['file']->body); foreach ($links as $link) { if (!empty($link['attribs']['href']['data']) && !empty($link['attribs']['rel']['data'])) { $rel = array_unique(SimplePie_Misc::space_seperated_tokens(strtolower($link['attribs']['rel']['data']))); $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->data['file']->url); if (!in_array($href, $done) && in_array('next', $rel)) { $done[] = $this->data['url'] = $href; break; } } } if ($this->data['url']) { $this->run(); } }
public function get_links() { $links = SimplePie_Misc::get_element('a', $this->file->body); foreach ($links as $link) { if (isset($link['attribs']['href']['data'])) { $href = trim($link['attribs']['href']['data']); $parsed = SimplePie_Misc::parse_url($href); if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) { if ($this->base_location < $link['offset']) { $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base); } else { $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base); } $current = SimplePie_Misc::parse_url($this->file->url); if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) { $this->local[] = $href; } else { $this->elsewhere[] = $href; } } } } $this->local = array_unique($this->local); $this->elsewhere = array_unique($this->elsewhere); if (!empty($this->local) || !empty($this->elsewhere)) { return true; } return null; }
function sanitize($data, $attribs, $is_url = false, $force_decode = false) { $this->attribs = $attribs; if (isset($this->feedinfo['type']) && $this->feedinfo['type'] == 'Atom') { if (!empty($attribs['MODE']) && $attribs['MODE'] == 'base64' || !empty($attribs['TYPE']) && $attribs['TYPE'] == 'application/octet-stream') { $data = trim($data); $data = base64_decode($data); } else { if (!empty($attribs['MODE']) && $attribs['MODE'] == 'escaped' || !empty($attribs['TYPE']) && ($attribs['TYPE'] == 'html' || $attribs['TYPE'] == 'text/html')) { $data = $this->entities_decode($data); } } if (!empty($attribs['TYPE']) && ($attribs['TYPE'] == 'xhtml' || $attribs['TYPE'] == 'application/xhtml+xml')) { if ($this->remove_div) { $data = preg_replace('/<div( .*)?>/msiU', '', strrev(preg_replace('/>vid\\/</i', '', strrev($data), 1)), 1); } else { $data = preg_replace('/<div( .*)?>/msiU', '<div>', $data, 1); } $data = $this->convert_entities($data); } } else { $data = $this->convert_entities($data); } if ($force_decode) { $data = $this->entities_decode($data); } $data = trim($data); $data = preg_replace('/<\\!--([^-]|-[^-])*-->/msiU', '', $data); // If Strip Ads is enabled, strip them. if ($this->strip_ads) { $data = preg_replace('/<a (.*)href=(.*)click\\.phdo\\?s=(.*)<\\/a>/msiU', '', $data); // Pheedo links (tested with Dooce.com) $data = preg_replace('/<p(.*)>(.*)<a href="http:\\/\\/ad.doubleclick.net\\/jump\\/(.*)<\\/p>/msiU', '', $data); // Doubleclick links (tested with InfoWorld.com) $data = preg_replace('/<p><map (.*)name=(.*)google_ad_map(.*)<\\/p>/msiU', '', $data); // Google AdSense for Feeds (tested with tuaw.com). // Feedflare, from Feedburner } // Replace H1, H2, and H3 tags with the less important H4 tags. // This is because on a site, the more important headers might make sense, // but it most likely doesn't fit in the context of RSS-in-a-webpage. if ($this->replace_headers) { $data = preg_replace('/<h[1-3]((\\s*((\\w+:)?\\w+)\\s*=\\s*("([^"]*)"|\'([^\']*)\'|(.*)))*)\\s*>/msiU', '<h4\\1>', $data); $data = preg_replace('/<\\/h[1-3]>/i', '</h4>', $data); } if ($is_url) { $data = $this->replace_urls($data, true); } else { $data = preg_replace_callback('/<(\\S+)((\\s*((\\w+:)?\\w+)\\s*=\\s*("([^"]*)"|\'([^\']*)\'|(.*)))*)\\s*(\\/>|>(.*)<\\/\\S+>)/msiU', array(&$this, 'replace_urls'), $data); } // If Bypass Image Hotlink is enabled, rewrite all the image tags. if ($this->bypass_image_hotlink) { $images = SimplePie_Misc::get_element('img', $data); foreach ($images as $img) { if (!empty($img['attribs']['SRC']['data'])) { $pre = ''; if ($this->bypass_image_hotlink_page) { $pre = $this->bypass_image_hotlink_page; } $pre .= "?{$this->bypass_image_hotlink}="; $img['attribs']['SRC']['data'] = $pre . rawurlencode(strtr($img['attribs']['SRC']['data'], array_flip(get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)))); $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data); } } } // Strip out HTML tags and attributes that might cause various security problems. // Based on recommendations by Mark Pilgrim at: // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely if ($this->strip_htmltags) { foreach ($this->strip_htmltags as $tag) { $data = preg_replace_callback("/<({$tag})((\\s*((\\w+:)?\\w+)(\\s*=\\s*(\"([^\"]*)\"|'([^']*)'|(.*)))?)*)\\s*(\\/>|>(.*)<\\/({$tag})((\\s*((\\w+:)?\\w+)(\\s*=\\s*(\"([^\"]*)\"|'([^']*)'|(.*)))?)*)\\s*>)/msiU", array(&$this, 'do_strip_htmltags'), $data); } } if ($this->strip_attributes) { foreach ($this->strip_attributes as $attrib) { $data = preg_replace('/ ' . trim($attrib) . '=("|")(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\'|'|<|>|\\+|{|})*("|")/i', '', $data); $data = preg_replace('/ ' . trim($attrib) . '=(\'|')(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|"|"|<|>|\\+|{|})*(\'|')/i', '', $data); $data = preg_replace('/ ' . trim($attrib) . '=(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\\+|{|})*/i', '', $data); } } // Convert encoding $data = SimplePie_Misc::change_encoding($data, $this->input_encoding, $this->output_encoding); return $data; }
function replace_urls($data, $tag, $attribute) { $elements = SimplePie_Misc::get_element($tag, $data); foreach ($elements as $element) { if (isset($element['attribs'][$attribute]['data'])) { $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); } } return $data; }