/** * @param $str * * @return string */ private function _entity_decode($str) { static $entities; $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES; // decode if (strpos($str, $this->xss_hash()) !== false) { $str = UTF8::html_entity_decode($str, $flags); } else { $str = UTF8::urldecode($str); } // decode-again, for e.g. HHVM, PHP 5.3, miss configured applications ... if (preg_match_all('/&[a-z]{2,}[;]{0}/i', $str, $matches)) { if (null === $entities) { // links: // - http://dev.w3.org/html5/html-author/charref // - http://www.w3schools.com/charsets/ref_html_entities_n.asp $entitiesSecurity = array('�' => '', '�' => '', '' => '', '' => '', '>⃒' => '', '' => '', '' => '', '­' => '', '­' => '', '­' => '', ':' => ':', ':' => ':', ':' => ':', '(' => '(', '(' => '(', '(' => '(', ')' => ')', ')' => ')', ')' => ')', '?' => '?', '?' => '?', '?' => '?', '/' => '/', '/' => '/', '/' => '/', ''' => '\'', ''' => '\'', ''' => '\'', ''' => '\'', ''' => '\'', '\' => '\'', '\' => '\\', '\' => '\\', ',' => ',', ',' => ',', ',' => ',', '.' => '.', '.' => '.', '"' => '"', '"' => '"', '"' => '"', '"' => '"', '`' => '`', '`' => '`', '`' => '`', '`' => '`', '.' => '.', '=' => '=', '=' => '=', '=' => '=', '&newline;' => "\n", '
' => "\n", ' ' => "\n", '&tab;' => "\t", '	' => "\n", '	' => "\n"); $entitiesTmp = get_html_translation_table(HTML_ENTITIES, $flags); $entitiesTmp = array_merge(self::$entitiesFallback, $entitiesTmp); $entities = array_merge($entitiesSecurity, array_map('strtolower', array_flip($entitiesTmp))); } $replace = array(); $matches = array_unique(array_map('strtolower', $matches[0])); foreach ($matches as $match) { $match .= ';'; if (array_key_exists($match, $entities) === true) { $replace[$match] = $entities[$match]; } } if (count($replace) > 0) { $str = str_ireplace(array_keys($replace), array_values($replace), $str); } } return $str; }
/** * Transliterates characters to their ASCII equivalents. * $language specifies a priority for a specific language. * The latter is useful if languages have different rules for the same character. * * @param string $string <p>The input string.</p> * @param string $language <p>Your primary language.</p> * @param boolean $convertToAsciiOnlyViaLanguageMaps <p> * Set to <strong>true</strong> if you only want to convert the * language-maps. * (better performance, but less complete ASCII converting) * </p> * @param boolean $convertUtf8Specials <p> * Convert (html) special chars with portable-utf8 (e.g. \0, * \xE9, %F6, ...). * </p> * @param string $unknown <p>Character use if character unknown. (default is ?).</p> * * @return string */ public static function downcode($string, $language = 'de', $convertToAsciiOnlyViaLanguageMaps = false, $unknown = '', $convertUtf8Specials = true) { self::init_downcode($language); if ($convertUtf8Specials === true) { $string = UTF8::urldecode($string); } $searchArray = array(); $replaceArray = array(); if (preg_match_all(self::$regex, $string, $matches)) { $matchesCounter = count($matches[0]); /** @noinspection ForeachInvariantsInspection */ for ($i = 0; $i < $matchesCounter; $i++) { $char = $matches[0][$i]; if (isset(self::$map[$char])) { $searchArray[] = $char; $replaceArray[] = self::$map[$char]; } } } $string = str_replace($searchArray, $replaceArray, $string); if ($convertToAsciiOnlyViaLanguageMaps === true) { return (string) $string; } return UTF8::to_ascii($string, $unknown); }