static function xmlentities($string) { $j = mb_strlen($string, 'UTF-8'); $result = ""; for ($k = 0; $k < $j; $k++) { $char = mb_substr($string, $k, 1, 'UTF-8'); switch ($char) { case "&": $result .= "&"; break; case "\"": $result .= """; break; case "<": $result .= "<"; break; case ">": $result .= ">"; break; default: $num = uniord($char); if ($num <= 31 || $num == 39 || $num == 96 || $num >= 127) { $result .= "&#" . $num . ";"; } else { $result .= $char; } break; } } return $result; }
function addChClass(&$tbl, $chars, $class, $add = false) { foreach ($chars as $ch) { $ord = uniord($ch); if (!$add || !isset($tbl[$ord])) { $tbl[$ord] = $class; } else { $tbl[$ord] = (isset($tbl[$ord]) ? $tbl[$ord] : 0) | $class; } } }
/** * Formats a number to a percent string. * * @param float $number The value to be formatted * @return string The formatted string */ function Format($number) { $val = number_format($number, $this->DecimalDigits, $this->DecimalSeparator, $this->GroupSeparator); if (strlen($this->GroupSeparator) > 0) { } $ord = uniord($this->GroupSeparator); $val = str_replace($this->GroupSeparator[0], "&#{$ord};", $val); if ($number >= 0) { return str_replace("%v", $val, $this->PositiveFormat); } return str_replace('--', '-', str_replace("%v", $val, $this->NegativeFormat)); // avoid "--" in value (negative number and - in format) }
function hash_word($word) { $HASHSIZE = 20; $hash = 0; $word_length = mb_strlen($word, 'UTF-8'); //echo 'length ' . $word_length . '<br>'; for ($i = 0; $i < $word_length; $i++) { //echo '-letter: ' . mb_substr($word, $i, 1, 'UTF-8') . ', ' . uniord( mb_substr($word, $i, 1, 'UTF-8') ) . '<br>'; $hash += uniord(mb_substr($word, $i, 1, 'UTF-8')); $hash %= $HASHSIZE; } return $hash; }
/** * Formats a number to string. * * @param float $number The value * @param int $decimals Number of decimals, defaults to this objects DecimalDigits property * @param bool $use_plain If true skips formatting * @return string The formatted string */ function Format($number, $decimals = false, $use_plain = false) { $number = doubleval($number); $val = number_format($number, $decimals !== false ? $decimals : $this->DecimalDigits, $this->DecimalSeparator, $this->GroupSeparator); if (strlen($this->GroupSeparator) > 0 && !$use_plain) { $ord = uniord($this->GroupSeparator); $val = str_replace($this->GroupSeparator[0], "&#{$ord};", $val); } if ($number >= 0) { return $val; } return str_replace('--', '-', str_replace("%v", $val, $this->NegativeFormat)); // avoid "--" in value (negative number and - in format) }
/** * Formats a currency value to string. * * @param float $amount Value to format * @param bool $use_plain If true will use curreny code instead of symbol * @param bool $only_value If true totally skips currency code or symbol * @return string Formatted currency string */ function Format($amount, $use_plain = false, $only_value = false) { $val = number_format(abs($amount), $this->DecimalDigits, $this->DecimalSeparator, $this->GroupSeparator); if (strlen($this->GroupSeparator) > 0 && !$use_plain) { $ord = uniord($this->GroupSeparator); $val = str_replace($this->GroupSeparator[0], "&#{$ord};", $val); } if ($only_value) { return $val; } $tmp = $amount >= 0 ? $this->PositiveFormat : $this->NegativeFormat; if ($use_plain) { $tmp = str_replace($this->Symbol, $this->Code, $tmp); } return unicode_cleanup_rtl(str_replace("%v", $val, $tmp)); }
function qqhash($uin, $ptwebqq) { for ($N = $ptwebqq . "password error", $T = "", $V = [];;) { if (strlen($T) <= strlen($N)) { $T .= $uin; if (strlen($T) == strlen($N)) { break; } } else { $T = substr($T, 0, strlen($N)); break; } } for ($U = 0; $U < strlen($T); $U++) { $V[$U] = uniord(substr($T, $U)) ^ uniord(substr($N, $U)); } $N = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"]; $T = ""; for ($U = 0; $U < count($V); $U++) { $T .= $N[$V[$U] >> 4 & 15]; $T .= $N[$V[$U] & 15]; } return $T; }
.red { color: #ff0000; } .blue { color: #0000ff; } </style> </head> <body> <form method="post"> <textarea name="text"></textarea><br/> <input type="submit" value="Color text"/> </form> <br/> <?php if (isset($_POST["text"])) { if (!empty($_POST["text"])) { for ($i = 0; $i < mb_strlen($_POST["text"]); $i++) { $character = mb_substr($_POST["text"], $i, 1); $order = uniord($character); echo "<span class=\"" . ($order % 2 == 0 ? "red" : "blue") . "\">{$character}</span> "; } } else { echo "<div class=\"error\">No text provided.</div>"; } } ?> </body> </html>
public static function str_depad($b) { for ($i = strlen($b); $i-- && !uniord($b[$i]);) { } $b = substr($b, 0, $i + 1); return $b; }
/** * Entitise any character that are above the 127 ASCII value * * Method will encode (html entitise) any characters that are above the 127 ASCII value as QBWC cannot interpret * them properly, even when using CDATA. We cannot use isc_convert_charset("UTF-8", "HTML-ENTITIES", "") or anything * simple like that because QBWC can understand ™ bot NOT ™, so do it all manually * * @access private * @param string $str The string to search in * @param bool &$wasConverted The referenced variable will be set to TRUE if $str was changed, FALSE if not * @return string The filtered string on success, FALSE on error */ private function filterInvalidQBXMLChars($str, &$wasConverted) { $wasConverted = false; if (!is_string($str)) { return false; } else if (trim($str) == "") { return $str; } /** * Decode eberything first */ $str = html_entity_decode($str, ENT_QUOTES, "UTF-8"); $strlen = isc_strlen($str); $newStr = ""; for ($i=0; $i<$strlen; $i++) { $char = isc_substr($str, $i, 1); $unicode = uniord($char); if ($unicode <= 127) { $newStr .= $char; continue; } $wasConverted = true; $newStr .= "&#" . $unicode . ";"; } /** * We also need to entities the &, <, >, ' and " characters. Don't use htmlspecialchars() as that will not * replcae it with the numerical value */ $ordMap = array( "<" => 60, ">" => 62, "'" => 39, '"' => 34, "&" => 38 ); foreach ($ordMap as $chr => $ord) { $amount = 0; /** * Special case for the ampersand */ if ($chr == "&") { $newStr = preg_replace("/&([^#]{1})/", "&#" . $ord . ";\\1", $newStr, -1, $amount); } else { $newStr = str_replace($chr, "&#" . $ord . ";", $newStr, $amount); } if ($amount > 0) { $wasConverted = true; } } return $newStr; }
function FixQueryForAsianWords($query) { // check if the multibyte functions we need to use are available if (!function_exists('mb_convert_encoding') || !function_exists('mb_strlen') || !function_exists('mb_substr')) { return $query; } $currCharType = 0; $lastCharType = 0; // 0 is normal, 1 is hiragana, 2 is katakana, 3 is "han" // check for hiragan/katakana splitting required $newquery = ""; $query_len = mb_strlen($query, "UTF-8"); for ($i = 0; $i < $query_len; $i++) { $ch = mb_substr($query, $i, 1, "UTF-8"); $chVal = uniord($ch); if ($chVal >= 12352 && $chVal <= 12447) { $currCharType = 1; } else { if ($chVal >= 12448 && $chVal <= 12543) { $currCharType = 2; } else { if ($chVal >= 13312 && $chVal <= 44031) { $currCharType = 3; } else { $currCharType = 0; } } } if ($lastCharType != $currCharType && $ch != " ") { $newquery .= " "; } $lastCharType = $currCharType; $newquery .= $ch; } return $newquery; }
// handle :some_emoji: $content = preg_replace_callback('/:([a-zA-Z0-9\\+\\-_&.ô’Åéãíç]+):/', function ($match) { global $g_emoji_unicode; $str_code = $match[1]; if (!isset($g_emoji_unicode[$str_code])) { return ':' . $str_code . ':'; } $unicode_code = $g_emoji_unicode[$str_code]; $unicode_image_name = ltrim(strtolower($unicode_code), "\\Uu0") . '.png'; return '<img class="emoji" title=":' . $match[1] . ':" alt=":' . $match[1] . ':" src="' . EMOJI_PNG_ROOT . $unicode_image_name . '" height="20" width="20" align="absmiddle" />'; }, $content); // handle <unicode character> // see http://stackoverflow.com/a/10584493/488666 // see https://en.wikipedia.org/wiki/Emoji#Unicode_Blocks $content = preg_replace_callback('/[' . unichr(0x1f300) . '-' . unichr(0x1f5ff) . unichr(0x1f600) . '-' . unichr(0x1f64f) . unichr(0x1f680) . '-' . unichr(0x1f6f3) . unichr(0x1f910) . '-' . unichr(0x1f918) . unichr(0x1f980) . '-' . unichr(0x1f984) . unichr(0x1f9c0) . unichr(0x2600) . '-' . unichr(0x27bf) . ']/u', function ($match) { $unicode_image_name = strtolower(dechex(intval(uniord($match[0])))) . '.png'; return '<img class="emoji" src="' . EMOJI_PNG_ROOT . $unicode_image_name . '" height="20" width="20" align="absmiddle" />'; }, $content); file_put_contents($file_to_convert, $content); // ----------------------------------------- function unichr($i) { return iconv('UCS-4LE', 'UTF-8', pack('V', $i)); } // found at http://www.php.net/manual/en/function.ord.php function uniord($string, &$offset = 0) { $code = ord(substr($string, $offset, 1)); if ($code >= 128) { //otherwise 0xxxxxxx if ($code < 224) {
private static function unicodeEscape($ch) { $out = self::JS_ESCAPE_CHAR; $chord = uniord($ch); if ($chord < self::NUMBER_OF_JS_ESCAPED_CHARS && !empty(self::$JS_CHARS_ESCAPED[$chord])) { $out .= self::$JS_CHARS_ESCAPED[$chord]; } else { if ($chord < 256) { $out .= 'x' . str_pad(dechex($chord), 2, '0', STR_PAD_LEFT); } else { $out .= 'u' . str_pad(dechex($chord), 4, '0', STR_PAD_LEFT); } } return $out; }
/** * Strips out invalid unicode characters from a string to be used in XML * * @param string The string to be cleaned * @return string The input string with invalid characters removed */ function StripInvalidXMLChars($input) { // attempt to strip using replace first $replace_input = @preg_replace("/\p{C}/u", " ", $input); if (!is_null($replace_input)) { return $replace_input; } // manually check each character $output = ""; for ($x = 0; $x < isc_strlen($input); $x++) { $char = isc_substr($input, $x, 1); $code = uniord($char); if ($code === false) { continue; } if ($code == 0x9 || $code == 0xA || $code == 0xD || ($code >= 0x20 && $code <= 0xD7FF) || ($code >= 0xE000 && $code <= 0xFFFD) || ($code >= 0x10000 && $code <= 0x10FFFF)) { $output .= $char; } } return $output; }
function addtext_check($array) { global $config; check_permission(PERM_ADDER); //read file for tokenizer $tok_exc = array_map('mb_strtolower', file($config['project']['root'] . '/scripts/tokenizer/tokenizer_exceptions.txt', FILE_IGNORE_NEW_LINES)); $tok_prefixes = file($config['project']['root'] . '/scripts/tokenizer/tokenizer_prefixes.txt', FILE_IGNORE_NEW_LINES); //removing bad symbols $clear_text = ''; for ($i = 0; $i < mb_strlen($array['txt'], 'UTF-8'); ++$i) { $char = mb_substr($array['txt'], $i, 1, 'UTF-8'); $code = uniord($char); if ($code != 769 && $code != 173 && ($code < 8192 || $code > 8203) && !in_array($code, array(8206, 8207)) && !in_array($code, array(160, 8237, 8239, 8288, 12288))) { $clear_text .= $char; } } $out = array('full' => $clear_text, 'select0' => get_books_for_select(0)); $pars = split2paragraphs($clear_text); foreach ($pars as $par) { $par_array = array(); $sents = split2sentences($par); foreach ($sents as $sent) { if (!preg_match('/\\S/', $sent)) { continue; } $sent_array = array('src' => $sent); $tokens = tokenize_ml($sent, $tok_exc, $tok_prefixes); foreach ($tokens as $token) { $sent_array['tokens'][] = array('text' => $token[0], 'class' => form_exists($token[0]), 'border' => $token[1], 'vector' => $token[2]); } $par_array['sentences'][] = $sent_array; } $out['paragraphs'][] = $par_array; } //book if (isset($array['book_id'])) { $book_id = (int) $array['book_id']; $r = sql_fetch_array(sql_query("SELECT parent_id FROM books WHERE book_id={$book_id} LIMIT 1")); if ($r['parent_id'] > 0) { $out['selected0'] = $r['parent_id']; $out['select1'] = get_books_for_select($r['parent_id']); $out['selected1'] = $book_id; } else { $out['selected0'] = $book_id; } } return $out; }
private function contains_format12($character, &$fh) { // also note that in a format 12 subtable, every USHORT is an entry in the GlyphIdArray $cmapformat12 = CMAPFormat12::createCMAPFormat12($fh); $mark = ftell($fh); // lookup is based on the unicode number again. $c = uniord($character); // start looking for the segment our character should be in, which we'll call "i" again. for ($i = 0; $i < $cmapformat12->nGroups; $i++) { $startCharCode =& $cmapformat12->groups[$i]["startCharCode"]; $endCharCode =& $cmapformat12->groups[$i]["endCharCode"]; // did we find a segment containing our character? if ($startCharCode <= $c && $c <= $endCharCode) { $startGlyphID = $cmapformat12->groups[$i]["startGlyphID"]; $diff = $c - $startCharCode; return $startGlyphID + $diff; } elseif ($startCharCode > $c) { break; } } return false; }
protected function url(&$url, &$href) { $hrefPrefix = null; if (!empty($this->curPos) && $this->getCharClass(uniord($this->textBuf[$this->curPos - 1])) === self::PRINTABLE) { return false; } if ($this->curCh === 'h') { if ($this->simpleMatchString('http://')) { $hrefPrefix = 'http://'; $this->goToPosition($this->curPos + 7); } else { if ($this->simpleMatchString('https://')) { $hrefPrefix = 'https://'; $this->goToPosition($this->curPos + 8); } } } else { if ($this->curCh === 'w') { if ($this->simpleMatchString('www.')) { $hrefPrefix = 'http://www.'; $urlPrefix = 'www.'; $this->goToPosition($this->curPos + 4); } } } if ($hrefPrefix === null) { return false; } $this->saveState(); $url = ''; $urlChMask = self::URL | self::ALPHA; $urlPunctValidChars = array(":", ",", "!", "'", "~", ".", ";"); while ($this->curChClass & $urlChMask || in_array($this->curCh, $urlPunctValidChars)) { $url .= $this->curCh; $this->getCh(); } $chCount = 0; for ($i = mb_strlen($url) - 1; $i >= 0; $i--) { if (in_array(mb_substr($url, $i, 1), $urlPunctValidChars)) { $chCount++; } else { break; } } if ($chCount > 0) { $url = mb_substr($url, 0, mb_strlen($url) - $chCount); $this->goToPosition($this->curPos - $chCount); } if (mb_strlen($url) == 0) { return false; } if (!mb_strlen($url)) { $this->restoreState(); return false; } $href = $hrefPrefix . $url; $url = !empty($urlPrefix) ? $urlPrefix . $url : $href; return true; }
█~≈~≈≈≈~~,~....≈~~~≈,~.~~O═══════════════════╗.'`,~~≈≈,≈'~~`~`≈~≈≈≈≈..~~'`≈≈≈~~█ █~≈,~≈≈~≈≈╞≈~"~..≈~≈≈~≈.≈≈≈∞≈~≈~≈≈~.~,≈≈~~τ,≈║`,'≈≈≈~~~≈"~≈≈~≈~≈~~≈¶≈~~,~,"`~.,█ █~,~,≈~τ≈``~≈,'≈≈≈≈~.~"`'`≈.~.≈≈~,.≈~~~"≈≈~~~║≈≈~~.."≈'≈~~'τ~.'."'~≈~≈╞≈≈.~,≈≈.█ █,τ.≈~≈~.~~,~.∞~..~"≈~~',~"~`~≈≈,≈.~•••••••≈~║.≈'~~'≈~≈.≈≈≈~`≈≈≈,`≈~`.`'≈~,`'≈'█ █,~~`τ≈~≈≈,~≈∞≈≈,,~≈≈~≈.≈≈~≈.,≈~≈≈~.•••••••`~║≈≈~~'~~~~`~τ≈≈~,'≈~~,τ≈~'~≈`.≈~≈~█ █`~~≈≈.≈≈~.~~~'O███O~~≈τ~'≈'≈.~~τ~~~•••••••~≈║',~≈"~∞≈≈≈..≈~≈≈"≈~≈≈~≈~`≈~"`,~"≈█ █"'≈~`~..~~"'≈~█████≈▓▓▓≈≈≈.≈,≈`¢.≈~•••••••≈~║≈"~~.`≈≈"~~~≈≈~~~≈..~≈≈.≈∞~~~`~≈~█ █≈≈≈≈≈≈~∞≈≈''~.██O██≈▓σ▓≈~.~¶`≈~≈≈~~',~≈,~~~≈║~~~≈,~≈≈~~≈≈≈≈`≈~≈τ≈≈~≈,~'≈~≈≈~~~█ █'∞≈`~',~".≈`≈`█████`▓/▓~`~≈,~≈~≈"≈≈=====~.'"║~≈.~~"`~≈≈≈"`~τ''`',~≈~'~τ,~'"``~█ █`≈~,,''≈≈≈~≈"~O███O~'≈~~≈~≈~≈≈≈`~~.====="~~"║~≈~~≈`∞`~≈τ≈,~≈~~≈≈,~≈"~'`~~≈'≈~~█ █≈~≈≈,≈~≈≈~~≈,~~..~'"~~,≈~~'~τ~≈,≈~,≈.≈,~`≈'.║≈≈`~≈~.≈,'≈≈~.≈.~≈'≈`≈≈,≈'~~~~∞~≈█ █≈~'~`'~≈≈≈~.~≈`≈,`≈≈O═══════════════════════╝"≈`~~~≈.≈`≈~`~"≈,≈τ≈~≈≈~'~'≈≈"~~≈█ █~≈~.,¶,≈',~`≈.~≈~~~≈~','~≈≈'≈≈≈~.≈'≈`≈~~~≈~~≈`'.~~~≈≈≈~,',≈`≈'~,≈,~≈"~~τ~'≈≈~.█ █~~≈~≈≈~≈~.~~τ`+≈~`¶'≈`~~,"~~≈≈.~~~~≈~~~,.~~`'~~`~"≈~~'~~.~≈~`"~~≈,~~~~,~≈≈,~≈~1 █.≈~≈~~~`','~~≈~`≈≈≈≈≈~~≈≈≈~~≈∞≈`.,≈╞~≈'╞≈~'~≈."≈≈~~≈.≈`~τ≈.,'~≈~≈~≈`~~~~≈.~≈"~2 █≈~≈≈▒.,`,`≈~~≈~~≈~~',.,~∞~~~≈≈≈≈~≈`'~∞≈'~``'≈~.,≈≈≈≈~.`≈∞~.~≈~≈".≈"`~≈."≈~≈`≈~8 ████████████████████████████████████████████████████████████████████████████████ DORFFORT; $text_rows = explode("\n", $text); echo 'rows: ' . count($text_rows); if ($rows > 30) { die('Error slicing rows.'); } echo '&#' . uniord("O") . ';'; /*foreach ($text_rows as $row) { // echo "<!-- $row -->"; for ($i = 0; $i < 80; $i++) { echo '<span class="back_BLACK fore_LRED">&#' . uniord(mb_substr($row, $i, 1, 'UTF8')) . ';</span>'; } echo '<br/>' . "\n"; }*/
/** * Function for handling the {{\#codepointhex }} parser function. */ public static function doCodepointHex($parser, $character) { $value = uniord($character); if ($value === -1) { $error_msg = array('Invalid character', $character); return smwfEncodeMessages($error_msg); } return dechex($value); }
} else { if ($h <= 0xf4) { return ($h & 0xf) << 18 | (ord($char[1]) & 0x3f) << 12 | (ord($char[2]) & 0x3f) << 6 | ord($char[3]) & 0x3f; } else { return false; } } } } } } if ($_POST && !empty($_POST['text'])) { $text = $_POST['text']; for ($i = 0; $i < mb_strlen($text); $i++) { $char = mb_substr($text, $i, 1); if (uniord($char) % 2 !== 0) { ?> <span style="color:blue"><?php echo $char; ?> </span> <?php } else { ?> <span style="color:red"><?php echo $char; ?> </span> <?php } }
/** * Перемещение на указанную позицию во входной строке и считывание символа * @return string символ в указанной позиции */ protected function goToPosition($position) { $this->curPos = $position; if ($this->curPos < $this->textLen) { $this->curCh = $this->textBuf[$this->curPos]; $this->curChOrd = uniord($this->curCh); $this->curChClass = $this->getCharClass($this->curChOrd); } else { $this->curCh = null; $this->curChOrd = 0; $this->curChClass = 0; } return $this->curCh; }
function is_arabic($str) { if (mb_detect_encoding($str) !== 'UTF-8') { $str = mb_convert_encoding($str, mb_detect_encoding($str), 'UTF-8'); } /* $str = str_split($str); <- this function is not mb safe, it splits by bytes, not characters. we cannot use it $str = preg_split('//u',$str); <- this function woulrd probably work fine but there was a bug reported in some php version so it pslits by bytes and not chars as well */ preg_match_all('/.|\\n/u', $str, $matches); $chars = $matches[0]; $arabic_count = 0; $latin_count = 0; $total_count = 0; foreach ($chars as $char) { //$pos = ord($char); we cant use that, its not binary safe $pos = uniord($char); // echo $char ." --> ".$pos.PHP_EOL; if ($pos >= 1536 && $pos <= 1791) { $arabic_count++; } else { if ($pos > 123 && $pos < 123) { $latin_count++; } } $total_count++; } if ($arabic_count / $total_count > 0.6) { // 60% arabic chars, its probably arabic return true; } return false; }
function is_rtl($str) { if (mb_detect_encoding($str) !== 'UTF-8') { $str = mb_convert_encoding($str, mb_detect_encoding($str), 'UTF-8'); } preg_match_all('/[^\\n\\s]+/', $str, $matches); preg_match_all('/.|\\n\\s/u', $str, $matches); $chars = $matches[0]; $arabic_count = 0; $latin_count = 0; $total_count = 0; foreach ($chars as $char) { $pos = uniord($char); if ($pos >= 1536 && $pos <= 1791) { $arabic_count++; } else { if ($pos > 123 && $pos < 123) { $latin_count++; } } $total_count++; } return $arabic_count / $total_count > 0.5; }
protected function url(&$url, &$href) { $hrefPrefix = null; if (!empty($this->curPos) && $this->getCharClass(uniord($this->textBuf[$this->curPos - 1])) === self::PRINTABLE) { return false; } if ($this->curCh === 'h') { if ($this->simpleMatchString('http://')) { $hrefPrefix = 'http://'; $this->goToPosition($this->curPos + 7); } else { if ($this->simpleMatchString('https://')) { $hrefPrefix = 'https://'; $this->goToPosition($this->curPos + 8); } } } else { if ($this->curCh === 'w') { if ($this->simpleMatchString('www.')) { $hrefPrefix = 'http://www.'; $urlPrefix = 'www.'; $this->goToPosition($this->curPos + 4); } } } if ($hrefPrefix === null) { return false; } $this->saveState(); $url = ''; $urlChMask = self::URL | self::ALPHA; while ($this->curChClass & $urlChMask || in_array($this->curCh, array(":", ",", "!", "'"))) { $url .= $this->curCh; $this->getCh(); } if (!mb_strlen($url)) { $this->restoreState(); return false; } $href = $hrefPrefix . $url; $url = !empty($urlPrefix) ? $urlPrefix . $url : $href; return true; }