utf8_bad_replace() public method

PCRE Pattern to locate bad bytes in a UTF-8 string comes from W3C FAQ: Multilingual Forms (modified to include full ASCII range)
See also: http://www.w3.org/International/questions/qa-forms-utf-8
Author: Geoffrey Sneddon
public utf8_bad_replace ( string $str ) : string
$str string String to remove bad UTF-8 bytes from
return string UTF-8 string
Esempio n. 1
0
    function init()
    {
        if (!(function_exists('version_compare') && (version_compare(phpversion(), '4.3.2', '>=') && version_compare(phpversion(), '5', '<') || version_compare(phpversion(), '5.0.3', '>='))) || !extension_loaded('xml') || !extension_loaded('pcre')) {
            return false;
        }
        if ($this->sanitize->bypass_image_hotlink && !empty($_GET[$this->sanitize->bypass_image_hotlink])) {
            if (get_magic_quotes_gpc()) {
                $_GET[$this->sanitize->bypass_image_hotlink] = stripslashes($_GET[$this->sanitize->bypass_image_hotlink]);
            }
            SimplePie_Misc::display_file($_GET[$this->sanitize->bypass_image_hotlink], 10, $this->useragent);
        }
        if (isset($_GET['js'])) {
            $embed = <<<EOT
function embed_odeo(link) {
\tdocument.writeln('<embed src="http://odeo.com/flash/audio_player_fullsize.swf" pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="440" height="80" wmode="transparent" allowScriptAccess="any" flashvars="valid_sample_rate=true&external_url='+link+'"></embed>');
}

function embed_quicktime(type, bgcolor, width, height, link, placeholder, loop) {
\tif (placeholder != '') {
\t\tdocument.writeln('<embed type="'+type+'" style="cursor:hand; cursor:pointer;" href="'+link+'" src="'+placeholder+'" width="'+width+'" height="'+height+'" autoplay="false" target="myself" controller="false" loop="'+loop+'" scale="aspect" bgcolor="'+bgcolor+'" pluginspage="http://www.apple.com/quicktime/download/"></embed>');
\t}
\telse {
\t\tdocument.writeln('<embed type="'+type+'" style="cursor:hand; cursor:pointer;" src="'+link+'" width="'+width+'" height="'+height+'" autoplay="false" target="myself" controller="true" loop="'+loop+'" scale="aspect" bgcolor="'+bgcolor+'" pluginspage="http://www.apple.com/quicktime/download/"></embed>');
\t}
}

function embed_flash(bgcolor, width, height, link, loop, type) {
\tdocument.writeln('<embed src="'+link+'" pluginspage="http://www.macromedia.com/shockwave/download/index.cgi?P1_Prod_Version=ShockwaveFlash" type="'+type+'" quality="high" width="'+width+'" height="'+height+'" bgcolor="'+bgcolor+'" loop="'+loop+'"></embed>');
}

function embed_wmedia(width, height, link) {
\tdocument.writeln('<embed type="application/x-mplayer2" src="'+link+'" autosize="1" width="'+width+'" height="'+height+'" showcontrols="1" showstatusbar="0" showdisplay="0" autostart="0"></embed>');
}
EOT;
            if (function_exists('ob_gzhandler')) {
                ob_start('ob_gzhandler');
            }
            header('Content-type: text/javascript; charset: UTF-8');
            header('Cache-Control: must-revalidate');
            header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 86400) . ' GMT');
            echo $embed;
            exit;
        }
        if (!empty($this->rss_url) || !empty($this->raw_data)) {
            $this->data = array();
            $cache = false;
            if (!empty($this->rss_url)) {
                // Decide whether to enable caching
                if ($this->enable_cache && preg_match('/^http(s)?:\\/\\//i', $this->rss_url)) {
                    $cache = new $this->cache_class($this->cache_location, call_user_func($this->cache_name_type, $this->rss_url), 'spc');
                }
                // If it's enabled and we don't want an XML dump, use the cache
                if ($cache && !$this->xml_dump) {
                    // Load the Cache
                    $this->data = $cache->load();
                    if (!empty($this->data)) {
                        // If we've hit a collision just rerun it with caching disabled
                        if (isset($this->data['url']) && $this->data['url'] != $this->rss_url) {
                            $cache = false;
                        } else {
                            if (!empty($this->data['feed_url'])) {
                                if ($this->data['feed_url'] == $this->data['url']) {
                                    $cache->unlink();
                                } else {
                                    $this->feed_url($this->data['feed_url']);
                                    return $this->init();
                                }
                            } else {
                                if ($cache->mtime() + $this->max_minutes * 60 < time()) {
                                    // If we have last-modified and/or etag set
                                    if (!empty($this->data['last-modified']) || !empty($this->data['etag'])) {
                                        $headers = array();
                                        if (!empty($this->data['last-modified'])) {
                                            $headers['if-modified-since'] = $this->data['last-modified'];
                                        }
                                        if (!empty($this->data['etag'])) {
                                            $headers['if-none-match'] = $this->data['etag'];
                                        }
                                        $file = new $this->file_class($this->rss_url, $this->timeout / 10, 5, $headers, $this->useragent, $this->force_fsockopen);
                                        if ($file->success) {
                                            $headers = $file->headers();
                                            if ($headers['status']['code'] == 304) {
                                                $cache->touch();
                                                return true;
                                            }
                                        } else {
                                            unset($file);
                                        }
                                    } else {
                                        $cache->unlink();
                                    }
                                } else {
                                    return true;
                                }
                            }
                        }
                    } else {
                        $cache->unlink();
                    }
                }
                $this->data = array();
                // If we don't already have the file (it'll only exist if we've opened it to check if the cache has been modified), open it.
                if (!isset($file)) {
                    if (is_a($this->file, 'SimplePie_File') && $this->file->url == $this->rss_url) {
                        $file =& $this->file;
                    } else {
                        $file = new $this->file_class($this->rss_url, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen);
                    }
                }
                // If the file connection has an error, set SimplePie::error to that and quit
                if (!$file->success) {
                    $this->error = $file->error;
                    return false;
                }
                // Check if the supplied URL is a feed, if it isn't, look for it.
                $locate = new $this->locator_class($file, $this->timeout, $this->useragent);
                if (!$locate->is_feed($file)) {
                    $feed = $locate->find();
                    if ($feed) {
                        if ($cache && !$cache->save(array('url' => $this->rss_url, 'feed_url' => $feed))) {
                            $this->error = "{$cache->name} is not writeable";
                            SimplePie_Misc::error($this->error, E_USER_WARNING, __FILE__, __LINE__);
                        }
                        $this->rss_url = $feed;
                        return $this->init();
                    } else {
                        $this->error = "A feed could not be found at {$this->rss_url}";
                        SimplePie_Misc::error($this->error, E_USER_WARNING, __FILE__, __LINE__);
                        return false;
                    }
                }
                $headers = $file->headers();
                $data = trim($file->body());
                $file->close();
                unset($file);
            } else {
                $data = $this->raw_data;
            }
            // First check to see if input has been overridden.
            if (!empty($this->input_encoding)) {
                $encoding = $this->input_encoding;
            } else {
                if (!empty($headers['content-type']) && preg_match('/charset\\s*=\\s*([^;]*)/i', $headers['content-type'], $charset)) {
                    $encoding = $charset[1];
                } else {
                    if (preg_match('/^<\\?xml(.*)?>/msiU', $data, $prolog) && preg_match('/encoding\\s*=\\s*("([^"]*)"|\'([^\']*)\')/Ui', $prolog[1], $encoding)) {
                        $encoding = substr($encoding[1], 1, -1);
                    } else {
                        if (strpos($data, sprintf('%c%c%c%c', 0x0, 0x0, 0xfe, 0xff)) === 0) {
                            $encoding = 'UTF-32be';
                        } else {
                            if (strpos($data, sprintf('%c%c%c%c', 0xff, 0xfe, 0x0, 0x0)) === 0) {
                                $encoding = 'UTF-32';
                            } else {
                                if (strpos($data, sprintf('%c%c', 0xfe, 0xff)) === 0) {
                                    $encoding = 'UTF-16be';
                                } else {
                                    if (strpos($data, sprintf('%c%c', 0xff, 0xfe)) === 0) {
                                        $encoding = 'UTF-16le';
                                    } else {
                                        if (strpos($data, sprintf('%c%c%c', 0xef, 0xbb, 0xbf)) === 0) {
                                            $encoding = 'UTF-8';
                                        } else {
                                            $encoding = null;
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // Change the encoding to UTF-8 (as we always use UTF-8 internally)
            $data = SimplePie_Misc::change_encoding($data, $encoding, 'UTF-8');
            // Strip illegal characters (if on less  than PHP5, as on PHP5 expat can manage fine)
            if (version_compare(phpversion(), '5', '<')) {
                if (function_exists('iconv')) {
                    $data = iconv('UTF-8', 'UTF-8//IGNORE', $data);
                } else {
                    if (function_exists('mb_convert_encoding')) {
                        $data = mb_convert_encoding($data, 'UTF-8', 'UTF-8');
                    } else {
                        $data = SimplePie_Misc::utf8_bad_replace($data);
                    }
                }
            }
            // Start parsing
            $data = new $this->parser_class($data, 'UTF-8', $this->xml_dump);
            // If we want the XML, just output that and quit
            if ($this->xml_dump) {
                header('Content-type: text/xml; charset=UTF-8');
                echo $data->data;
                exit;
            } else {
                if (!$data->error_code) {
                    // Parse the data, and make it sane
                    $this->sanitize->parse_data_array($data->data, $this->rss_url);
                    unset($data);
                    // Get the sane data
                    $this->data['feedinfo'] = $this->sanitize->feedinfo;
                    unset($this->sanitize->feedinfo);
                    $this->data['info'] = $this->sanitize->info;
                    unset($this->sanitize->info);
                    $this->data['items'] = $this->sanitize->items;
                    unset($this->sanitize->items);
                    $this->data['feedinfo']['encoding'] = $this->sanitize->output_encoding;
                    $this->data['url'] = $this->rss_url;
                    // Store the headers that we need
                    if (!empty($headers['last-modified'])) {
                        $this->data['last-modified'] = $headers['last-modified'];
                    }
                    if (!empty($headers['etag'])) {
                        $this->data['etag'] = $headers['etag'];
                    }
                    // If we want to order it by date, check if all items have a date, and then sort it
                    if ($this->order_by_date && !empty($this->data['items'])) {
                        $do_sort = true;
                        foreach ($this->data['items'] as $item) {
                            if (!$item->get_date('U')) {
                                $do_sort = false;
                                break;
                            }
                        }
                        if ($do_sort) {
                            usort($this->data['items'], create_function('$a, $b', 'if ($a->get_date(\'U\') == $b->get_date(\'U\')) return 1; return ($a->get_date(\'U\') < $b->get_date(\'U\')) ? 1 : -1;'));
                        }
                    }
                    // Cache the file if caching is enabled
                    if ($cache && !$cache->save($this->data)) {
                        $this->error = "{$cache->name} is not writeable";
                        SimplePie_Misc::error($this->error, E_USER_WARNING, __FILE__, __LINE__);
                    }
                    return true;
                } else {
                    $this->error = "XML error: {$data->error_string} at line {$data->current_line}, column {$data->current_column}";
                    SimplePie_Misc::error($this->error, E_USER_WARNING, __FILE__, __LINE__);
                    return false;
                }
            }
        }
    }
Esempio n. 2
0
    function init()
    {
        if (function_exists('version_compare') && version_compare(PHP_VERSION, '4.1.0', '<') || !extension_loaded('xml') || !extension_loaded('pcre')) {
            return false;
        }
        if (isset($_GET[$this->javascript])) {
            if (function_exists('ob_gzhandler')) {
                ob_start('ob_gzhandler');
            }
            header('Content-type: text/javascript; charset: UTF-8');
            header('Cache-Control: must-revalidate');
            header('Expires: ' . gmdate('D, d M Y H:i:s', time() + 604800) . ' GMT');
            // 7 days
            ?>
function embed_odeo(link) {
	document.writeln('<embed src="http://odeo.com/flash/audio_player_fullsize.swf" pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="440" height="80" wmode="transparent" allowScriptAccess="any" flashvars="valid_sample_rate=true&external_url='+link+'"></embed>');
}

function embed_quicktime(type, bgcolor, width, height, link, placeholder, loop) {
	if (placeholder != '') {
		document.writeln('<embed type="'+type+'" style="cursor:hand; cursor:pointer;" href="'+link+'" src="'+placeholder+'" width="'+width+'" height="'+height+'" autoplay="false" target="myself" controller="false" loop="'+loop+'" scale="aspect" bgcolor="'+bgcolor+'" pluginspage="http://www.apple.com/quicktime/download/"></embed>');
	}
	else {
		document.writeln('<embed type="'+type+'" style="cursor:hand; cursor:pointer;" src="'+link+'" width="'+width+'" height="'+height+'" autoplay="false" target="myself" controller="true" loop="'+loop+'" scale="aspect" bgcolor="'+bgcolor+'" pluginspage="http://www.apple.com/quicktime/download/"></embed>');
	}
}

function embed_flash(bgcolor, width, height, link, loop, type) {
	document.writeln('<embed src="'+link+'" pluginspage="http://www.macromedia.com/go/getflashplayer" type="'+type+'" quality="high" width="'+width+'" height="'+height+'" bgcolor="'+bgcolor+'" loop="'+loop+'"></embed>');
}

function embed_flv(width, height, link, placeholder, loop, player) {
	document.writeln('<embed src="'+player+'" pluginspage="http://www.macromedia.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="'+width+'" height="'+height+'" wmode="transparent" flashvars="file='+link+'&autostart=false&repeat='+loop+'&showdigits=true&showfsbutton=false"></embed>');
}

function embed_wmedia(width, height, link) {
	document.writeln('<embed type="application/x-mplayer2" src="'+link+'" autosize="1" width="'+width+'" height="'+height+'" showcontrols="1" showstatusbar="0" showdisplay="0" autostart="0"></embed>');
}
			<?php 
            exit;
        }
        // Pass whatever was set with config options over to the sanitizer.
        $this->sanitize->pass_cache_data($this->cache, $this->cache_location, $this->cache_name_function, $this->cache_class);
        $this->sanitize->pass_file_data($this->file_class, $this->timeout, $this->useragent, $this->force_fsockopen);
        if ($this->feed_url !== null || $this->raw_data !== null) {
            $this->data = array();
            $this->multifeed_objects = array();
            $cache = false;
            if ($this->feed_url !== null) {
                $parsed_feed_url = SimplePie_Misc::parse_url($this->feed_url);
                // Decide whether to enable caching
                if ($this->cache && $parsed_feed_url['scheme'] !== '') {
                    $cache = new $this->cache_class($this->cache_location, call_user_func($this->cache_name_function, $this->feed_url), 'spc');
                }
                // If it's enabled and we don't want an XML dump, use the cache
                if ($cache && !$this->xml_dump) {
                    // Load the Cache
                    $this->data = $cache->load();
                    if (!empty($this->data)) {
                        // If the cache is for an outdated build of SimplePie
                        if (!isset($this->data['build']) || $this->data['build'] != SIMPLEPIE_BUILD) {
                            $cache->unlink();
                            $this->data = array();
                        } elseif (isset($this->data['url']) && $this->data['url'] != $this->feed_url) {
                            $cache = false;
                            $this->data = array();
                        } elseif (isset($this->data['feed_url'])) {
                            // If the autodiscovery cache is still valid use it.
                            if ($cache->mtime() + $this->autodiscovery_cache_duration > time()) {
                                // Do not need to do feed autodiscovery yet.
                                if ($this->data['feed_url'] == $this->data['url']) {
                                    $cache->unlink();
                                    $this->data = array();
                                } else {
                                    $this->set_feed_url($this->data['feed_url']);
                                    return $this->init();
                                }
                            }
                        } elseif ($cache->mtime() + $this->cache_duration < time()) {
                            // If we have last-modified and/or etag set
                            if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag'])) {
                                $headers = array();
                                if (isset($this->data['headers']['last-modified'])) {
                                    $headers['if-modified-since'] = $this->data['headers']['last-modified'];
                                }
                                if (isset($this->data['headers']['etag'])) {
                                    $headers['if-none-match'] = $this->data['headers']['etag'];
                                }
                                $file = new $this->file_class($this->feed_url, $this->timeout / 10, 5, $headers, $this->useragent, $this->force_fsockopen);
                                if ($file->success) {
                                    if ($file->status_code == 304) {
                                        $cache->touch();
                                        return true;
                                    } else {
                                        $headers = $file->headers;
                                    }
                                } else {
                                    unset($file);
                                }
                            }
                        } else {
                            return true;
                        }
                    } else {
                        $cache->unlink();
                        $this->data = array();
                    }
                }
                // If we don't already have the file (it'll only exist if we've opened it to check if the cache has been modified), open it.
                if (!isset($file)) {
                    if (SimplePie_Misc::is_a($this->file, 'SimplePie_File') && $this->file->url == $this->feed_url) {
                        $file =& $this->file;
                    } else {
                        $file = new $this->file_class($this->feed_url, $this->timeout, 5, null, $this->useragent, $this->force_fsockopen);
                    }
                }
                // If the file connection has an error, set SimplePie::error to that and quit
                if (!$file->success) {
                    $this->error = $file->error;
                    if (!empty($this->data)) {
                        return true;
                    } else {
                        return false;
                    }
                }
                // Check if the supplied URL is a feed, if it isn't, look for it.
                $locate = new $this->locator_class($file, $this->timeout, $this->useragent, $this->file_class, $this->max_checked_feeds);
                if (!$locate->is_feed($file)) {
                    // We need to unset this so that if SimplePie::set_file() has been called that object is untouched
                    unset($file);
                    if ($file = $locate->find($this->autodiscovery)) {
                        if ($cache) {
                            if (!$cache->save(array('url' => $this->feed_url, 'feed_url' => $file->url, 'build' => SIMPLEPIE_BUILD))) {
                                trigger_error("{$cache->name} is not writeable", E_USER_WARNING);
                            }
                            $cache = new $this->cache_class($this->cache_location, call_user_func($this->cache_name_function, $file->url), 'spc');
                        }
                        $this->feed_url = $file->url;
                    } else {
                        $this->error = "A feed could not be found at {$this->feed_url}";
                        SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
                        return false;
                    }
                }
                $locate = null;
                $headers = $file->headers;
                $data = trim($file->body);
                unset($file);
            } else {
                $data = $this->raw_data;
            }
            // First check to see if input has been overridden.
            if ($this->input_encoding !== false) {
                $encoding = $this->input_encoding;
            } elseif (isset($headers['content-type']) && preg_match('/;[\\x09\\x20]*charset=([^;]*)/i', $headers['content-type'], $charset)) {
                $encoding = $charset[1];
            } elseif (preg_match("/^<\\?xml[ \t\r\n]+version([ \t\r\n]+)?=([ \t\r\n]+)?(\"1.0\"|'1.0'|\"1.1\"|'1.1')[ \t\r\n]+encoding([ \t\r\n]+)?=([ \t\r\n]+)?(\"[A-Za-z][A-Za-z0-9._\\-]*\"|'[A-Za-z][A-Za-z0-9._\\-]*')([ \t\r\n]+standalone([ \t\r\n]+)?=([ \t\r\n]+)?(\"(yes|no)\"|'(yes|no)'))?([ \t\r\n]+)?\\?>/", $data, $prolog)) {
                $encoding = substr($prolog[6], 1, -1);
            } elseif (strpos($data, "��") === 0) {
                $encoding = 'UTF-32be';
            } elseif (strpos($data, "��") === 0) {
                $encoding = 'UTF-32';
            } elseif (strpos($data, "��") === 0) {
                $encoding = 'UTF-16be';
            } elseif (strpos($data, "��") === 0) {
                $encoding = 'UTF-16le';
            } elseif (strpos($data, "") === 0) {
                $encoding = 'UTF-8';
            } elseif (isset($headers['content-type']) && strtolower(SimplePie_Misc::parse_mime($headers['content-type'])) == 'text/xml') {
                $encoding = 'US-ASCII';
            } elseif (isset($headers['content-type']) && SimplePie_Misc::stripos(SimplePie_Misc::parse_mime($headers['content-type']), 'text/') === 0) {
                $encoding = 'ISO-8859-1';
            } else {
                $encoding = 'UTF-8';
            }
            // Change the encoding to UTF-8 (as we always use UTF-8 internally)
            if ($encoding != 'UTF-8') {
                $data = SimplePie_Misc::change_encoding($data, $encoding, 'UTF-8');
            }
            // Strip illegal characters
            $data = SimplePie_Misc::utf8_bad_replace($data);
            $parser = new $this->parser_class();
            $parser->pre_process($data, 'UTF-8');
            // If we want the XML, just output that and quit
            if ($this->xml_dump) {
                header('Content-type: text/xml; charset=UTF-8');
                echo $data;
                exit;
            } elseif ($parser->parse($data)) {
                unset($data);
                $this->data = $parser->get_data();
                if (isset($this->data['child'])) {
                    if (isset($headers)) {
                        $this->data['headers'] = $headers;
                    }
                    $this->data['build'] = SIMPLEPIE_BUILD;
                    // Cache the file if caching is enabled
                    if ($cache && !$cache->save($this->data)) {
                        trigger_error("{$cache->name} is not writeable", E_USER_WARNING);
                    }
                    return true;
                } else {
                    $this->error = "A feed could not be found at {$this->feed_url}";
                    SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
                    return false;
                }
            } else {
                $this->error = sprintf('XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column());
                SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
                return false;
            }
        } elseif (!empty($this->multifeed_url)) {
            $i = 0;
            $success = 0;
            $this->multifeed_objects = array();
            foreach ($this->multifeed_url as $url) {
                if (SIMPLEPIE_PHP5) {
                    // This keyword needs to defy coding standards for PHP4 compatibility
                    $this->multifeed_objects[$i] = clone $this;
                } else {
                    $this->multifeed_objects[$i] = $this;
                }
                $this->multifeed_objects[$i]->set_feed_url($url);
                $success |= $this->multifeed_objects[$i]->init();
                $i++;
            }
            return (bool) $success;
        } else {
            return false;
        }
    }