public static function clean($var, $charset = NULL) { if (!$charset) { // Use the application character set $charset = JsonApiApplication::$charset; } if (is_array($var) or is_object($var)) { foreach ($var as $key => $val) { // Recursion! $var[UTF8::clean($key)] = UTF8::clean($val); } } elseif (is_string($var) and $var !== "") { // Remove control characters $var = UTF8::strip_ascii_ctrl($var); if (!UTF8::is_ascii($var)) { // Temporarily save the mb_substitute_character() value into a variable $mb_substitute_character = mb_substitute_character(); // Disable substituting illegal characters with the default '?' character mb_substitute_character("none"); // convert encoding, this is expensive, used when $var is not ASCII $var = mb_convert_encoding($var, $charset, $charset); // Reset mb_substitute_character() value back to the original setting mb_substitute_character($mb_substitute_character); } } return $var; }
/** * Updates or Creates the record depending on loaded() * * @param Validation $validation Validation object * @return ORM * * @uses User::active_user * @uses ACL::check * @uses Text::limit_words * @uses Text::markup * @uses Request::$client_ip */ public function save(Validation $validation = NULL) { // Set some defaults $this->updated = time(); $this->format = empty($this->format) ? Kohana::$config->load('inputfilter.default_format', 1) : $this->format; $this->author = empty($this->author) ? User::active_user()->id : $this->author; if (!$this->loaded()) { // New comment $this->created = $this->updated; $this->hostname = substr(Request::$client_ip, 0, 32); //set hostname only if its new comment. if (empty($this->status)) { $this->status = ACL::check('skip comment approval') ? 'publish' : 'draft'; } } // Validate the comment's title. If not specified, extract from comment body. if (trim($this->title) == '' and !empty($this->body)) { // The body may be in any format, so: // 1) Filter it into HTML // 2) Strip out all HTML tags // 3) Convert entities back to plain-text. $this->title = Text::limit_words(trim(UTF8::clean(strip_tags(Text::markup($this->body, $this->format)))), 10, ''); // Edge cases where the comment body is populated only by HTML tags will // require a default subject. if ($this->title == '') { $this->title = __('(No subject)'); } } parent::save($validation); return $this; }
/** * Recursively cleans arrays, objects, and strings. Removes ASCII control * codes and converts to the requested charset while silently discarding * incompatible characters. * * UTF8::clean($_GET); // Clean GET data * * @param mixed $var variable to clean * @param string $charset character set, defaults to Kohana::$charset * @return mixed * @uses UTF8::clean * @uses UTF8::strip_ascii_ctrl * @uses UTF8::is_ascii */ public static function clean($var, $charset = 'utf-8') { if (is_array($var) or is_object($var)) { foreach ($var as $key => $val) { // Recursion! $var[UTF8::clean($key)] = UTF8::clean($val); } } elseif (is_string($var) and $var !== '') { // Remove control characters $var = UTF8::strip_ascii_ctrl($var); if (!UTF8::is_ascii($var)) { // Disable notices $error_reporting = error_reporting(~E_NOTICE); $var = mb_convert_encoding($var, $charset, $charset); // Turn notices back on error_reporting($error_reporting); } } return $var; }
/** * Tests UTF8::clean * * @test * @dataProvider provider_clean */ public function test_clean($input, $expected) { $this->assertSame($expected, UTF8::clean($input)); }
/** * Helper for Kohana::dump(), handles recursion in arrays and objects. * * @param mixed variable to dump * @param integer maximum length of strings * @param integer recursion level (internal) * @return string */ protected static function _dump(&$var, $length = 128, $level = 0) { if ($var === NULL) { return '<small>NULL</small>'; } elseif (is_bool($var)) { return '<small>bool</small> ' . ($var ? 'TRUE' : 'FALSE'); } elseif (is_float($var)) { return '<small>float</small> ' . $var; } elseif (is_resource($var)) { if (($type = get_resource_type($var)) === 'stream' and $meta = stream_get_meta_data($var)) { $meta = stream_get_meta_data($var); if (isset($meta['uri'])) { $file = $meta['uri']; if (function_exists('stream_is_local')) { // Only exists on PHP >= 5.2.4 if (stream_is_local($file)) { $file = Core::debug_path($file); } } return '<small>resource</small><span>(' . $type . ')</span> ' . htmlspecialchars($file, ENT_NOQUOTES, Core::$charset); } } else { return '<small>resource</small><span>(' . $type . ')</span>'; } } elseif (is_string($var)) { // Clean invalid multibyte characters. iconv is only invoked // if there are non ASCII characters in the string, so this // isn't too much of a hit. $var = UTF8::clean($var); if (UTF8::strlen($var) > $length) { // Encode the truncated string $str = htmlspecialchars(UTF8::substr($var, 0, $length), ENT_NOQUOTES, Core::$charset) . ' …'; } else { // Encode the string $str = htmlspecialchars($var, ENT_NOQUOTES, Core::$charset); } return '<small>string</small><span>(' . strlen($var) . ')</span> "' . $str . '"'; } elseif (is_array($var)) { $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); static $marker = null; if ($marker === null) { // Make a unique marker $marker = uniqid(""); } if (empty($var)) { // Do nothing } elseif (isset($var[$marker])) { $output[] = "(\n{$space}{$s}*RECURSION*\n{$space})"; } elseif ($level < 5) { $output[] = "<span>("; $var[$marker] = TRUE; foreach ($var as $key => &$val) { if ($key === $marker) { continue; } if (!is_int($key)) { $key = '"' . htmlspecialchars($key, ENT_NOQUOTES, Core::$charset) . '"'; } $output[] = "{$space}{$s}{$key} => " . Docs::_dump($val, $length, $level + 1); } unset($var[$marker]); $output[] = "{$space})</span>"; } else { // Depth too great $output[] = "(\n{$space}{$s}...\n{$space})"; } return '<small>array</small><span>(' . count($var) . ')</span> ' . implode("\n", $output); } elseif (is_object($var)) { // Copy the object as an array $array = (array) $var; $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); $hash = spl_object_hash($var); // Objects that are being dumped static $objects = array(); if (empty($var)) { // Do nothing } elseif (isset($objects[$hash])) { $output[] = "{\n{$space}{$s}*RECURSION*\n{$space}}"; } elseif ($level < 10) { $output[] = "<code>{"; $objects[$hash] = TRUE; foreach ($array as $key => &$val) { if ($key[0] === "") { // Determine if the access is protected or protected $access = '<small>' . ($key[1] === '*' ? 'protected' : 'private') . '</small>'; // Remove the access level from the variable name $key = substr($key, strrpos($key, "") + 1); } else { $access = '<small>public</small>'; } $output[] = "{$space}{$s}{$access} {$key} => " . Docs::_dump($val, $length, $level + 1); } unset($objects[$hash]); $output[] = "{$space}}</code>"; } else { // Depth too great $output[] = "{\n{$space}{$s}...\n{$space}}"; } return '<small>object</small> <span>' . get_class($var) . '(' . count($array) . ')</span> ' . implode("\n", $output); } else { return '<small>' . gettype($var) . '</small> ' . htmlspecialchars(print_r($var, TRUE), ENT_NOQUOTES, Core::$charset); } }
/** * Helper for Debug::dump(), handles recursion in arrays and objects. * * @param mixed $var Variable to dump * @param integer $length Maximum length of strings [Optional] * @param integer $limit Recursion limit [Optional] * @param integer $level Current recursion level (internal usage only!) [Optional] * * @return string */ protected static function _dump(&$var, $length = 128, $limit = 10, $level = 0) { if ($var === NULL) { return '<small style="color: #3465a4">NULL</small>'; } elseif (is_bool($var)) { return '<small>bool</small> <span style="color:#4e9a06">' . ($var ? 'TRUE' : 'FALSE') . '</span>'; } elseif (is_float($var)) { return '<small>float</small> <span style="color:#4e9a06">' . $var . '</span>'; } elseif (is_integer($var)) { return '<small>int</small> <span style="color:#4e9a06">' . $var . '</span>'; } elseif (is_resource($var)) { if (($type = get_resource_type($var)) === 'stream' and $meta = stream_get_meta_data($var)) { $meta = stream_get_meta_data($var); if (isset($meta['uri'])) { $file = $meta['uri']; if (stream_is_local($file)) { $file = Debug::path($file); } return '<small>resource</small><span>(' . $type . ')</span> ' . htmlspecialchars($file, ENT_NOQUOTES, Kohana::$charset); } } else { return '<small>resource</small><span>(' . $type . ')</span>'; } } elseif (is_string($var)) { // Clean invalid multibyte characters. iconv is only invoked // if there are non ASCII characters in the string, so this // isn't too much of a hit. $var = UTF8::clean($var, Kohana::$charset); if (UTF8::strlen($var) > $length) { // Encode the truncated string $str = htmlspecialchars(UTF8::substr($var, 0, $length), ENT_NOQUOTES, Kohana::$charset) . ' …'; } else { // Encode the string $str = htmlspecialchars($var, ENT_NOQUOTES, Kohana::$charset); } return '<small>string</small> <span style="color:#cc0000">\'' . $str . '\'</span>(<span style="font-style:italic">length=' . strlen($var) . '</span>)'; } elseif (is_array($var)) { $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); static $marker; if ($marker === NULL) { // Make a unique marker $marker = uniqid(""); } if (empty($var)) { // Do nothing } elseif (isset($var[$marker])) { $output[] = "\n{$space}{$s}*RECURSION*\n{$space}"; } elseif ($level < $limit) { $output[] = "<span>"; $var[$marker] = TRUE; foreach ($var as $key => &$val) { if ($key === $marker) { continue; } if (!is_int($key)) { $key = '"' . htmlspecialchars($key, ENT_NOQUOTES, Kohana::$charset) . '"'; } $output[] = "{$space}{$s}{$key} => " . Debug::_dump($val, $length, $limit, $level + 1); } unset($var[$marker]); $output[] = "{$space}</span>"; } else { // Depth too great $output[] = "\n{$space}{$s}...\n{$space}"; } return '<strong>array</strong> <span style="font-style:italic">(size=' . count($var) . ')</span> ' . implode(PHP_EOL, $output); } elseif (is_object($var)) { // Copy the object as an array $array = (array) $var; $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); $hash = spl_object_hash($var); // Objects that are being dumped static $objects = array(); if (empty($var)) { // Do nothing } elseif (isset($objects[$hash])) { $output[] = "{\n{$space}{$s}*RECURSION*\n{$space}}"; } elseif ($level < $limit) { $output[] = "<code>"; $objects[$hash] = TRUE; foreach ($array as $key => &$val) { if ($key[0] === "") { // Determine if the access is protected or protected $access = '<span style="font-style:italic">' . ($key[1] === '*' ? 'protected' : 'private') . '</span>'; // Remove the access level from the variable name $key = substr($key, strrpos($key, "") + 1); } else { $access = '<span style="font-style:italic">public</span>'; } $output[] = "{$space}{$s}{$access} '{$key}' <span style='color:#888a85'>=></span> " . Debug::_dump($val, $length, $limit, $level + 1); } unset($objects[$hash]); $output[] = "{$space}</code>"; } else { // Depth too great $output[] = "{\n{$space}{$s}...\n{$space}}"; } return '<strong>object</strong>(<span style="font-style:italic">' . get_class($var) . '</span>)' . '[<span style="font-style:italic">' . count($array) . '</span>]' . implode(PHP_EOL, $output); } else { return '<small>' . gettype($var) . '</small> ' . htmlspecialchars(print_r($var, TRUE), ENT_NOQUOTES, Kohana::$charset); } }
/** * Helper for Debug::dump(), handles recursion in arrays and objects. * * @param mixed $var variable to dump * @param integer $length maximum length of strings * @param integer $limit recursion limit * @param integer $level current recursion level (internal usage only!) * @return string */ protected static function _dump(&$var, $length = 128, $limit = 10, $level = 0) { if ($var === null) { return '<small>null</small>'; } elseif (is_int($var)) { return '<small>integer</small> <span class="int">' . $var . '</span>'; } elseif (is_bool($var)) { return '<small>bool</small> <span class="bool">' . ($var ? 'true' : 'false') . '</span>'; } elseif (is_float($var)) { return '<small>float</small> <span class="float">' . $var . '</span>'; } elseif (is_resource($var)) { if (($type = get_resource_type($var)) === 'stream' && ($meta = stream_get_meta_data($var))) { $meta = stream_get_meta_data($var); if (isset($meta['uri'])) { $file = $meta['uri']; // Only exists on PHP >= 5.2.4 if (stream_is_local($file)) { $file = Debug::path($file); } return '<small>resource</small><span>(' . $type . ')</span> ' . htmlspecialchars($file, ENT_NOQUOTES, \Phalcana\Phalcana::$charset); } } else { return '<small>resource</small><span>(' . $type . ')</span>'; } } elseif (is_string($var)) { // Clean invalid multibyte characters. iconv is only invoked // if there are non ASCII characters in the string, so this // isn't too much of a hit. $var = UTF8::clean($var, \Phalcana\Phalcana::$charset); if (UTF8::strlen($var) > $length) { // Encode the truncated string $str = htmlspecialchars(UTF8::substr($var, 0, $length), ENT_NOQUOTES, \Phalcana\Phalcana::$charset) . ' …'; //$str = htmlspecialchars(substr($var, 0, $length), ENT_NOQUOTES, \Phalcana\Phalcana::$charset).' …'; } else { // Encode the string $str = htmlspecialchars($var, ENT_NOQUOTES, \Phalcana\Phalcana::$charset); } return '<small>string</small><span class="len">(' . strlen($var) . ')</span> <span class="string">"' . $str . '"</span>'; } elseif (is_array($var)) { $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); static $marker; if ($marker === null) { // Make a unique marker - force it to be alphanumeric so that it is always treated as a string array key $marker = uniqid("") . "x"; } if (empty($var)) { // Do nothing } elseif (isset($var[$marker])) { $output[] = "(\n{$space}{$s}*RECURSION*\n{$space})"; } elseif ($level < $limit) { $output[] = "<span>("; $var[$marker] = true; foreach ($var as $key => &$val) { if ($key === $marker) { continue; } if (!is_int($key)) { $key = '<span class="string">"' . htmlspecialchars($key, ENT_NOQUOTES, \Phalcana\Phalcana::$charset) . '"</span>'; } else { $key = '<span class="int">' . $key . '</span>'; } $output[] = "{$space}{$s}{$key} <span class=\"pointer\">=></span> " . Debug::_dump($val, $length, $limit, $level + 1); } unset($var[$marker]); $output[] = "{$space})</span>"; } else { // Depth too great $output[] = "(\n{$space}{$s}...\n{$space})"; } return '<small>array</small><span class="len">(' . count($var) . ')</span> ' . implode("\n", $output); } elseif (is_object($var)) { // Copy the object as an array $array = (array) $var; $output = array(); // Indentation for this variable $space = str_repeat($s = ' ', $level); $hash = spl_object_hash($var); // Objects that are being dumped static $objects = array(); if (empty($var)) { // Do nothing } elseif ($var instanceof \Phalcon\DI) { $output[] = "{\n{$space}{$s}*DEPENDENCY INJECTOR IGNORED*\n{$space}}"; } elseif (isset($objects[$hash])) { $output[] = "{\n{$space}{$s}*RECURSION*\n{$space}}"; } elseif ($level < $limit) { $output[] = "<code>{"; $objects[$hash] = true; foreach ($array as $key => &$val) { if ($key[0] === "") { // Determine if the access is protected or protected $access = '<small>' . ($key[1] === '*' ? 'protected' : 'private') . '</small>'; // Remove the access level from the variable name $key = substr($key, strrpos($key, "") + 1); } else { $access = '<small>public</small>'; } $output[] = "{$space}{$s}{$access} {$key} <span class=\"pointer\">=></span> " . Debug::_dump($val, $length, $limit, $level + 1); } unset($objects[$hash]); $output[] = "{$space}}</code>"; } else { // Depth too great $output[] = "{\n{$space}{$s}...\n{$space}}"; } return '<small>object</small> <span>' . get_class($var) . '(' . count($array) . ')</span> ' . implode("\n", $output); } else { return '<small>' . gettype($var) . '</small> ' . htmlspecialchars(print_r($var, true), ENT_NOQUOTES, \Phalcana\Phalcana::$charset); } }
/** * @param string $str * * @return mixed */ private function _do($str) { $str = (string) $str; $strInt = (int) $str; $strFloat = (double) $str; /** @noinspection TypeUnsafeComparisonInspection */ if (!$str || "{$strInt}" == $str || "{$strFloat}" == $str) { return $str; } // removes all non-UTF-8 characters // && // remove NULL characters (ignored by some browsers) $str = UTF8::clean($str, true, true, false); // decode the string $str = $this->decode_string($str); // and again... removes all non-UTF-8 characters $str = UTF8::clean($str, true, true, false); // remove all >= 4-Byte chars if needed if ($this->_stripe_4byte_chars === true) { $str = preg_replace('/[\\x{10000}-\\x{10FFFF}]/u', '', $str); } // remove strings that are never allowed $str = $this->_do_never_allowed($str); // make php tags safe for displaying $str = $this->make_php_tags_safe($str); // corrects words before the browser will do it $str = $this->compact_exploded_javascript($str); // remove disallowed javascript calls in links, images etc. $str = $this->remove_disallowed_javascript($str); // remove evil attributes such as style, onclick and xmlns $str = $this->remove_evil_attributes($str); // sanitize naughty HTML elements $str = $this->sanitize_naughty_html($str); // sanitize naughty JavaScript elements $str = $this->sanitize_naughty_javascript($str); // final clean up // This adds a bit of extra precaution in case // something got through the above filters. $str = $this->_do_never_allowed($str); $str = $this->_do_never_allowed_afterwards($str); return $str; }
protected static function _dump(&$var, $length = 128, $limit = 10, $level = 0) { if ($var === NULL) { return "<small>NULL</small>"; } elseif (is_bool($var)) { return "<small>bool</small> " . ($var ? "TRUE" : "FALSE"); } elseif (is_float($var)) { return "<small>float</small> " . $var; } elseif (is_resource($var)) { if (($type = get_resource_type($var)) === "stream" and $meta = stream_get_meta_data($var)) { $meta = stream_get_meta_data($var); if (isset($meta["uri"])) { $file = $meta["uri"]; if (function_exists("stream_is_local")) { // Only exists on PHP >= 5.2.4 if (stream_is_local($file)) { $file = Debug::path($file); } } return "<small>resource</small><span>(" . $type . ")</span> " . htmlspecialchars($file, ENT_NOQUOTES, JsonApiApplication::$charset); } } else { return "<small>resource</small><span>(" . $type . ")</span>"; } } elseif (is_string($var)) { $var = UTF8::clean($var, JsonApiApplication::$charset); if (UTF8::strlen($var) > $length) { $str = htmlspecialchars(UTF8::substr($var, 0, $length), ENT_NOQUOTES, JsonApiApplication::$charset) . " …"; } else { $str = htmlspecialchars($var, ENT_NOQUOTES, JsonApiApplication::$charset); } return '<small>string</small><span>(' . strlen($var) . ')</span> "' . $str . '"'; } elseif (is_array($var)) { $output = array(); $space = str_repeat($s = " ", $level); static $marker; if ($marker === NULL) { $marker = uniqid("") . "x"; } if (empty($var)) { // Do nothing } elseif (isset($var[$marker])) { $output[] = "(\n{$space}{$s}*RECURSION*\n{$space})"; } elseif ($level < $limit) { $output[] = "<span>("; $var[$marker] = TRUE; foreach ($var as $key => &$val) { if ($key === $marker) { continue; } if (!is_int($key)) { $key = '"' . htmlspecialchars($key, ENT_NOQUOTES, JsonApiApplication::$charset) . '"'; } $output[] = "{$space}{$s}{$key} => " . Debug::_dump($val, $length, $limit, $level + 1); } unset($var[$marker]); $output[] = "{$space})</span>"; } else { $output[] = "(\n{$space}{$s}...\n{$space})"; } return "<small>array</small><span>(" . count($var) . ")</span> " . implode("\n", $output); } elseif (is_object($var)) { $array = (array) $var; $output = array(); $space = str_repeat($s = " ", $level); $hash = spl_object_hash($var); static $objects = array(); if (empty($var)) { // Do nothing } elseif (isset($objects[$hash])) { $output[] = "{\n{$space}{$s}*RECURSION*\n{$space}}"; } elseif ($level < $limit) { $output[] = "<code>{"; $objects[$hash] = TRUE; foreach ($array as $key => &$val) { if ($key[0] === "") { $access = "<small>" . ($key[1] === "*" ? "protected" : "private") . "</small>"; $key = substr($key, strrpos($key, "") + 1); } else { $access = "<small>public</small>"; } $output[] = "{$space}{$s}{$access} {$key} => " . Debug::_dump($val, $length, $limit, $level + 1); } unset($objects[$hash]); $output[] = "{$space}}</code>"; } else { // Depth too great $output[] = "{\n{$space}{$s}...\n{$space}}"; } return "<small>object</small> <span>" . get_class($var) . "(" . count($array) . ")</span> " . implode("\n", $output); } else { return "<small>" . gettype($var) . "</small> " . htmlspecialchars(print_r($var, TRUE), ENT_NOQUOTES, JsonApiApplication::$charset); } }
/** * XSS Clean * * Sanitizes data so that Cross Site Scripting Hacks can be * prevented. This method does a fair amount of work but * it is extremely thorough, designed to prevent even the * most obscure XSS attempts. Nothing is ever 100% foolproof, * of course, but I haven't been able to get anything passed * the filter. * * Note: Should only be used to deal with data upon submission. * It's not something that should be used for general * runtime processing. * * @link http://channel.bitflux.ch/wiki/XSS_Prevention * Based in part on some code and ideas from Bitflux. * * @link http://ha.ckers.org/xss.html * To help develop this script I used this great list of * vulnerabilities along with a few other hacks I've * harvested from examining vulnerabilities in other programs. * * @param string|array $str input data * @param bool $is_image whether the input is an image * * @return string|array|boolean boolean: will return a boolean, if the "is_image"-parameter is true * string: will return a string, if the input is a string * array: will return a array, if the input is a array */ public function xss_clean($str, $is_image = false) { if (is_array($str)) { foreach ($str as &$value) { $value = $this->xss_clean($value); } return $str; } // removes all non-UTF-8 characters // && // remove NULL characters (ignored by some browsers) $str = UTF8::clean($str, true, true, false); // decode the string $str = $this->decode_string($str); // and again... removes all non-UTF-8 characters $str = UTF8::clean($str, true, true, false); // capture converted string for later comparison if ($is_image === true) { $converted_string = $str; } do { $old_str = $str; $str = $this->_do($str, $is_image); } while ($old_str !== $str); /* * images are Handled in a special way * * Essentially, we want to know that after all of the character * conversion is done whether any unwanted, likely XSS, code was found. * * If not, we return TRUE, as the image is clean. * * However, if the string post-conversion does not matched the * string post-removal of XSS, then it fails, as there was unwanted XSS * code found and removed/changed during processing. */ if ($is_image === true) { /** @noinspection PhpUndefinedVariableInspection */ return $str === $converted_string; } return $str; }
/** * Convert a String to URL. * * e.g.: "Petty<br>theft" to "Petty-theft" * * @param string $string <p>The text you want to convert.</p> * @param int $maxLength <p>Max. length of the output string, set to -1 to disable it</p> * @param string $language <p>The language you want to convert to.</p> * @param boolean $fileName <p> * Keep the "." from the extension e.g.: "imaäe.jpg" => "image.jpg" * </p> * @param boolean $removeWords <p> * Remove some "words" from the string.<br /> * Info: Set extra words via <strong>remove_words()</strong>. * </p> * @param boolean $strToLower <p>Use <strong>strtolower()</strong> at the end.</p> * @param string $separator <p>Define a new separator for the words.</p> * @param boolean $convertToAsciiOnlyViaLanguageMaps <p> * Set to <strong>true</strong> if you only want to convert the * language-maps. * (better performance, but less complete ASCII converting) * </p> * @param boolean $convertUtf8Specials <p> * Convert (html) special chars with portable-utf8 (e.g. \0, * \xE9, %F6, ...). * </p> * * @return string|false false on error */ public static function filter($string, $maxLength = 200, $language = 'de', $fileName = false, $removeWords = false, $strToLower = false, $separator = '-', $convertToAsciiOnlyViaLanguageMaps = false, $convertUtf8Specials = false) { if (!$language) { return ''; } // separator-fallback if (null === $separator) { $separator = ''; } elseif (!$separator) { $separator = '-'; } // escaped separator $separatorEscaped = preg_quote($separator, '/'); // set remove-array if (!isset(self::$remove_list[$language])) { self::reset_remove_list(); } if (0 === count(self::$arrayToSeparator)) { self::reset_array_to_separator(); } // get the remove-array $removeArray = self::get_remove_list($language); // 1) clean invalid chars if ($convertUtf8Specials) { $string = UTF8::clean($string); } // 2) replace with $separator $string = preg_replace(self::$arrayToSeparator, $separator, $string); // 3) remove all other html-tags $string = strip_tags($string); // 4) use special language replacer $string = self::downcode($string, $language, $convertToAsciiOnlyViaLanguageMaps, '', $convertUtf8Specials); // 5) replace with $separator, again $string = preg_replace(self::$arrayToSeparator, $separator, $string); // remove all these words from the string before urlifying if ($removeWords === true) { $removeWordsSearch = '/\\b(' . implode('|', $removeArray) . ')\\b/i'; } else { $removeWordsSearch = '//'; } // keep the "." from e.g.: a file-extension? if ($fileName) { $removePattern = '/[^' . $separatorEscaped . '.\\-a-zA-Z0-9\\s]/u'; } else { $removePattern = '/[^' . $separatorEscaped . '\\-a-zA-Z0-9\\s]/u'; } $string = preg_replace(array('/[' . ($separatorEscaped ?: ' ') . ']+/', '[^A-Za-z0-9]', $removePattern, '/[' . ($separatorEscaped ?: ' ') . '\\s]+/', '/^\\s+|\\s+$/', $removeWordsSearch), array($separator, '', '', $separator, '', ''), $string); // convert to lowercase if ($strToLower === true) { $string = strtolower($string); } // "substr" only if "$length" is set if ($maxLength && $maxLength > 0) { $string = (string) substr($string, 0, $maxLength); } // trim "$separator" from beginning and end of the string return trim($string, $separator); }