function sanitize($data, $attribs, $is_url = false, $force_decode = false) { $this->attribs = $attribs; if (isset($this->feedinfo['type']) && $this->feedinfo['type'] == 'Atom') { if (!empty($attribs['MODE']) && $attribs['MODE'] == 'base64' || !empty($attribs['TYPE']) && $attribs['TYPE'] == 'application/octet-stream') { $data = trim($data); $data = base64_decode($data); } else { if (!empty($attribs['MODE']) && $attribs['MODE'] == 'escaped' || !empty($attribs['TYPE']) && ($attribs['TYPE'] == 'html' || $attribs['TYPE'] == 'text/html')) { $data = $this->entities_decode($data); } } if (!empty($attribs['TYPE']) && ($attribs['TYPE'] == 'xhtml' || $attribs['TYPE'] == 'application/xhtml+xml')) { if ($this->remove_div) { $data = preg_replace('/<div( .*)?>/msiU', '', strrev(preg_replace('/>vid\\/</i', '', strrev($data), 1)), 1); } else { $data = preg_replace('/<div( .*)?>/msiU', '<div>', $data, 1); } $data = $this->convert_entities($data); } } else { $data = $this->convert_entities($data); } if ($force_decode) { $data = $this->entities_decode($data); } $data = trim($data); $data = preg_replace('/<\\!--([^-]|-[^-])*-->/msiU', '', $data); // If Strip Ads is enabled, strip them. if ($this->strip_ads) { $data = preg_replace('/<a (.*)href=(.*)click\\.phdo\\?s=(.*)<\\/a>/msiU', '', $data); // Pheedo links (tested with Dooce.com) $data = preg_replace('/<p(.*)>(.*)<a href="http:\\/\\/ad.doubleclick.net\\/jump\\/(.*)<\\/p>/msiU', '', $data); // Doubleclick links (tested with InfoWorld.com) $data = preg_replace('/<p><map (.*)name=(.*)google_ad_map(.*)<\\/p>/msiU', '', $data); // Google AdSense for Feeds (tested with tuaw.com). // Feedflare, from Feedburner } // Replace H1, H2, and H3 tags with the less important H4 tags. // This is because on a site, the more important headers might make sense, // but it most likely doesn't fit in the context of RSS-in-a-webpage. if ($this->replace_headers) { $data = preg_replace('/<h[1-3]((\\s*((\\w+:)?\\w+)\\s*=\\s*("([^"]*)"|\'([^\']*)\'|(.*)))*)\\s*>/msiU', '<h4\\1>', $data); $data = preg_replace('/<\\/h[1-3]>/i', '</h4>', $data); } if ($is_url) { $data = $this->replace_urls($data, true); } else { $data = preg_replace_callback('/<(\\S+)((\\s*((\\w+:)?\\w+)\\s*=\\s*("([^"]*)"|\'([^\']*)\'|(.*)))*)\\s*(\\/>|>(.*)<\\/\\S+>)/msiU', array(&$this, 'replace_urls'), $data); } // If Bypass Image Hotlink is enabled, rewrite all the image tags. if ($this->bypass_image_hotlink) { $images = SimplePie_Misc::get_element('img', $data); foreach ($images as $img) { if (!empty($img['attribs']['SRC']['data'])) { $pre = ''; if ($this->bypass_image_hotlink_page) { $pre = $this->bypass_image_hotlink_page; } $pre .= "?{$this->bypass_image_hotlink}="; $img['attribs']['SRC']['data'] = $pre . rawurlencode(strtr($img['attribs']['SRC']['data'], array_flip(get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)))); $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data); } } } // Strip out HTML tags and attributes that might cause various security problems. // Based on recommendations by Mark Pilgrim at: // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely if ($this->strip_htmltags) { foreach ($this->strip_htmltags as $tag) { $data = preg_replace_callback("/<({$tag})((\\s*((\\w+:)?\\w+)(\\s*=\\s*(\"([^\"]*)\"|'([^']*)'|(.*)))?)*)\\s*(\\/>|>(.*)<\\/({$tag})((\\s*((\\w+:)?\\w+)(\\s*=\\s*(\"([^\"]*)\"|'([^']*)'|(.*)))?)*)\\s*>)/msiU", array(&$this, 'do_strip_htmltags'), $data); } } if ($this->strip_attributes) { foreach ($this->strip_attributes as $attrib) { $data = preg_replace('/ ' . trim($attrib) . '=("|")(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\'|'|<|>|\\+|{|})*("|")/i', '', $data); $data = preg_replace('/ ' . trim($attrib) . '=(\'|')(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|"|"|<|>|\\+|{|})*(\'|')/i', '', $data); $data = preg_replace('/ ' . trim($attrib) . '=(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\\+|{|})*/i', '', $data); } } // Convert encoding $data = SimplePie_Misc::change_encoding($data, $this->input_encoding, $this->output_encoding); return $data; }
function replace_urls($data, $tag, $attributes) { if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) { $elements = SimplePie_Misc::get_element($tag, $data); foreach ($elements as $element) { if (is_array($attributes)) { foreach ($attributes as $attribute) { if (isset($element['attribs'][$attribute]['data'])) { $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); $new_element = SimplePie_Misc::element_implode($element); $data = str_replace($element['full'], $new_element, $data); $element['full'] = $new_element; } } } elseif (isset($element['attribs'][$attributes]['data'])) { $element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base); $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); } } } return $data; }
function replace_urls($data, $tag, $attribute) { $elements = SimplePie_Misc::get_element($tag, $data); foreach ($elements as $element) { if (isset($element['attribs'][$attribute]['data'])) { $element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base); $data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data); } } return $data; }