get_element() public method

public get_element ( $realname, $string )
 /**
  * @dataProvider firefoxtests
  */
 public function test_from_file($data)
 {
     $locator = new SimplePie_Locator($data, 0, null, 'MockSimplePie_File', false);
     $expected = SimplePie_Misc::get_element('link', $data->body);
     $feed = $locator->find(SIMPLEPIE_LOCATOR_ALL, $all);
     $this->assertFalse($locator->is_feed($data), 'HTML document not be a feed itself');
     $this->assertInstanceOf('MockSimplePie_File', $feed);
     $expected = array_map(array(get_class(), 'map_url_attrib'), $expected);
     $success = array_filter($expected, array(get_class(), 'filter_success'));
     $found = array_map(array(get_class(), 'map_url_file'), $all);
     $this->assertEquals($success, $found);
 }
 function replace_urls($data, $tag, $attributes)
 {
     //This seems to do nothing at all!?
     if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) {
         $elements = SimplePie_Misc::get_element($tag, $data);
         foreach ($elements as $element) {
             if (is_array($attributes)) {
                 foreach ($attributes as $attribute) {
                     if (isset($element['attribs'][$attribute]['data'])) {
                         $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base);
                         $new_element = SimplePie_Misc::element_implode($element);
                         $data = str_replace($element['full'], $new_element, $data);
                         $element['full'] = $new_element;
                     }
                 }
             } elseif (isset($element['attribs'][$attributes]['data'])) {
                 $element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base);
                 $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data);
             }
         }
     }
     return $data;
 }
Beispiel #3
0
 function result()
 {
     if ($this->data['file']->url != 'http://diveintomark.org/tests/client/autodiscovery/') {
         parent::result();
     }
     static $done = array();
     $links = SimplePie_Misc::get_element('link', $this->data['file']->body);
     foreach ($links as $link) {
         if (!empty($link['attribs']['href']['data']) && !empty($link['attribs']['rel']['data'])) {
             $rel = array_unique(SimplePie_Misc::space_seperated_tokens(strtolower($link['attribs']['rel']['data'])));
             $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->data['file']->url);
             if (!in_array($href, $done) && in_array('next', $rel)) {
                 $done[] = $this->data['url'] = $href;
                 break;
             }
         }
     }
     if ($this->data['url']) {
         $this->run();
     }
 }
 public function get_links()
 {
     $links = SimplePie_Misc::get_element('a', $this->file->body);
     foreach ($links as $link) {
         if (isset($link['attribs']['href']['data'])) {
             $href = trim($link['attribs']['href']['data']);
             $parsed = SimplePie_Misc::parse_url($href);
             if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) {
                 if ($this->base_location < $link['offset']) {
                     $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base);
                 } else {
                     $href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base);
                 }
                 $current = SimplePie_Misc::parse_url($this->file->url);
                 if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) {
                     $this->local[] = $href;
                 } else {
                     $this->elsewhere[] = $href;
                 }
             }
         }
     }
     $this->local = array_unique($this->local);
     $this->elsewhere = array_unique($this->elsewhere);
     if (!empty($this->local) || !empty($this->elsewhere)) {
         return true;
     }
     return null;
 }
 function sanitize($data, $attribs, $is_url = false, $force_decode = false)
 {
     $this->attribs = $attribs;
     if (isset($this->feedinfo['type']) && $this->feedinfo['type'] == 'Atom') {
         if (!empty($attribs['MODE']) && $attribs['MODE'] == 'base64' || !empty($attribs['TYPE']) && $attribs['TYPE'] == 'application/octet-stream') {
             $data = trim($data);
             $data = base64_decode($data);
         } else {
             if (!empty($attribs['MODE']) && $attribs['MODE'] == 'escaped' || !empty($attribs['TYPE']) && ($attribs['TYPE'] == 'html' || $attribs['TYPE'] == 'text/html')) {
                 $data = $this->entities_decode($data);
             }
         }
         if (!empty($attribs['TYPE']) && ($attribs['TYPE'] == 'xhtml' || $attribs['TYPE'] == 'application/xhtml+xml')) {
             if ($this->remove_div) {
                 $data = preg_replace('/<div( .*)?>/msiU', '', strrev(preg_replace('/>vid\\/</i', '', strrev($data), 1)), 1);
             } else {
                 $data = preg_replace('/<div( .*)?>/msiU', '<div>', $data, 1);
             }
             $data = $this->convert_entities($data);
         }
     } else {
         $data = $this->convert_entities($data);
     }
     if ($force_decode) {
         $data = $this->entities_decode($data);
     }
     $data = trim($data);
     $data = preg_replace('/<\\!--([^-]|-[^-])*-->/msiU', '', $data);
     // If Strip Ads is enabled, strip them.
     if ($this->strip_ads) {
         $data = preg_replace('/<a (.*)href=(.*)click\\.phdo\\?s=(.*)<\\/a>/msiU', '', $data);
         // Pheedo links (tested with Dooce.com)
         $data = preg_replace('/<p(.*)>(.*)<a href="http:\\/\\/ad.doubleclick.net\\/jump\\/(.*)<\\/p>/msiU', '', $data);
         // Doubleclick links (tested with InfoWorld.com)
         $data = preg_replace('/<p><map (.*)name=(.*)google_ad_map(.*)<\\/p>/msiU', '', $data);
         // Google AdSense for Feeds (tested with tuaw.com).
         // Feedflare, from Feedburner
     }
     // Replace H1, H2, and H3 tags with the less important H4 tags.
     // This is because on a site, the more important headers might make sense,
     // but it most likely doesn't fit in the context of RSS-in-a-webpage.
     if ($this->replace_headers) {
         $data = preg_replace('/<h[1-3]((\\s*((\\w+:)?\\w+)\\s*=\\s*("([^"]*)"|\'([^\']*)\'|(.*)))*)\\s*>/msiU', '<h4\\1>', $data);
         $data = preg_replace('/<\\/h[1-3]>/i', '</h4>', $data);
     }
     if ($is_url) {
         $data = $this->replace_urls($data, true);
     } else {
         $data = preg_replace_callback('/<(\\S+)((\\s*((\\w+:)?\\w+)\\s*=\\s*("([^"]*)"|\'([^\']*)\'|(.*)))*)\\s*(\\/>|>(.*)<\\/\\S+>)/msiU', array(&$this, 'replace_urls'), $data);
     }
     // If Bypass Image Hotlink is enabled, rewrite all the image tags.
     if ($this->bypass_image_hotlink) {
         $images = SimplePie_Misc::get_element('img', $data);
         foreach ($images as $img) {
             if (!empty($img['attribs']['SRC']['data'])) {
                 $pre = '';
                 if ($this->bypass_image_hotlink_page) {
                     $pre = $this->bypass_image_hotlink_page;
                 }
                 $pre .= "?{$this->bypass_image_hotlink}=";
                 $img['attribs']['SRC']['data'] = $pre . rawurlencode(strtr($img['attribs']['SRC']['data'], array_flip(get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES))));
                 $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
             }
         }
     }
     // Strip out HTML tags and attributes that might cause various security problems.
     // Based on recommendations by Mark Pilgrim at:
     // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
     if ($this->strip_htmltags) {
         foreach ($this->strip_htmltags as $tag) {
             $data = preg_replace_callback("/<({$tag})((\\s*((\\w+:)?\\w+)(\\s*=\\s*(\"([^\"]*)\"|'([^']*)'|(.*)))?)*)\\s*(\\/>|>(.*)<\\/({$tag})((\\s*((\\w+:)?\\w+)(\\s*=\\s*(\"([^\"]*)\"|'([^']*)'|(.*)))?)*)\\s*>)/msiU", array(&$this, 'do_strip_htmltags'), $data);
         }
     }
     if ($this->strip_attributes) {
         foreach ($this->strip_attributes as $attrib) {
             $data = preg_replace('/ ' . trim($attrib) . '=("|&quot;)(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\'|&apos;|<|>|\\+|{|})*("|&quot;)/i', '', $data);
             $data = preg_replace('/ ' . trim($attrib) . '=(\'|&apos;)(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|"|&quot;|<|>|\\+|{|})*(\'|&apos;)/i', '', $data);
             $data = preg_replace('/ ' . trim($attrib) . '=(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\\+|{|})*/i', '', $data);
         }
     }
     // Convert encoding
     $data = SimplePie_Misc::change_encoding($data, $this->input_encoding, $this->output_encoding);
     return $data;
 }
Beispiel #6
0
 function replace_urls($data, $tag, $attribute)
 {
     $elements = SimplePie_Misc::get_element($tag, $data);
     foreach ($elements as $element) {
         if (isset($element['attribs'][$attribute]['data'])) {
             $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base);
             $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data);
         }
     }
     return $data;
 }