Esempio n. 1
0
 static function xmlentities($string)
 {
     $j = mb_strlen($string, 'UTF-8');
     $result = "";
     for ($k = 0; $k < $j; $k++) {
         $char = mb_substr($string, $k, 1, 'UTF-8');
         switch ($char) {
             case "&":
                 $result .= "&amp;";
                 break;
             case "\"":
                 $result .= "&quot;";
                 break;
             case "<":
                 $result .= "&lt;";
                 break;
             case ">":
                 $result .= "&gt;";
                 break;
             default:
                 $num = uniord($char);
                 if ($num <= 31 || $num == 39 || $num == 96 || $num >= 127) {
                     $result .= "&#" . $num . ";";
                 } else {
                     $result .= $char;
                 }
                 break;
         }
     }
     return $result;
 }
Esempio n. 2
0
function addChClass(&$tbl, $chars, $class, $add = false)
{
    foreach ($chars as $ch) {
        $ord = uniord($ch);
        if (!$add || !isset($tbl[$ord])) {
            $tbl[$ord] = $class;
        } else {
            $tbl[$ord] = (isset($tbl[$ord]) ? $tbl[$ord] : 0) | $class;
        }
    }
}
 /**
  * Formats a number to a percent string.
  * 
  * @param float $number The value to be formatted
  * @return string The formatted string
  */
 function Format($number)
 {
     $val = number_format($number, $this->DecimalDigits, $this->DecimalSeparator, $this->GroupSeparator);
     if (strlen($this->GroupSeparator) > 0) {
     }
     $ord = uniord($this->GroupSeparator);
     $val = str_replace($this->GroupSeparator[0], "&#{$ord};", $val);
     if ($number >= 0) {
         return str_replace("%v", $val, $this->PositiveFormat);
     }
     return str_replace('--', '-', str_replace("%v", $val, $this->NegativeFormat));
     // avoid "--" in value (negative number and - in format)
 }
Esempio n. 4
0
function hash_word($word)
{
    $HASHSIZE = 20;
    $hash = 0;
    $word_length = mb_strlen($word, 'UTF-8');
    //echo 'length ' . $word_length . '<br>';
    for ($i = 0; $i < $word_length; $i++) {
        //echo '-letter: ' . mb_substr($word, $i, 1, 'UTF-8') . ', ' . uniord( mb_substr($word, $i, 1, 'UTF-8') ) . '<br>';
        $hash += uniord(mb_substr($word, $i, 1, 'UTF-8'));
        $hash %= $HASHSIZE;
    }
    return $hash;
}
Esempio n. 5
0
 /**
  * Formats a number to string.
  * 
  * @param float $number The value
  * @param int $decimals Number of decimals, defaults to this objects DecimalDigits property
  * @param bool $use_plain If true skips formatting
  * @return string The formatted string
  */
 function Format($number, $decimals = false, $use_plain = false)
 {
     $number = doubleval($number);
     $val = number_format($number, $decimals !== false ? $decimals : $this->DecimalDigits, $this->DecimalSeparator, $this->GroupSeparator);
     if (strlen($this->GroupSeparator) > 0 && !$use_plain) {
         $ord = uniord($this->GroupSeparator);
         $val = str_replace($this->GroupSeparator[0], "&#{$ord};", $val);
     }
     if ($number >= 0) {
         return $val;
     }
     return str_replace('--', '-', str_replace("%v", $val, $this->NegativeFormat));
     // avoid "--" in value (negative number and - in format)
 }
 /**
  * Formats a currency value to string.
  * 
  * @param float $amount Value to format
  * @param bool $use_plain If true will use curreny code instead of symbol
  * @param bool $only_value If true totally skips currency code or symbol
  * @return string Formatted currency string
  */
 function Format($amount, $use_plain = false, $only_value = false)
 {
     $val = number_format(abs($amount), $this->DecimalDigits, $this->DecimalSeparator, $this->GroupSeparator);
     if (strlen($this->GroupSeparator) > 0 && !$use_plain) {
         $ord = uniord($this->GroupSeparator);
         $val = str_replace($this->GroupSeparator[0], "&#{$ord};", $val);
     }
     if ($only_value) {
         return $val;
     }
     $tmp = $amount >= 0 ? $this->PositiveFormat : $this->NegativeFormat;
     if ($use_plain) {
         $tmp = str_replace($this->Symbol, $this->Code, $tmp);
     }
     return unicode_cleanup_rtl(str_replace("%v", $val, $tmp));
 }
Esempio n. 7
0
function qqhash($uin, $ptwebqq)
{
    for ($N = $ptwebqq . "password error", $T = "", $V = [];;) {
        if (strlen($T) <= strlen($N)) {
            $T .= $uin;
            if (strlen($T) == strlen($N)) {
                break;
            }
        } else {
            $T = substr($T, 0, strlen($N));
            break;
        }
    }
    for ($U = 0; $U < strlen($T); $U++) {
        $V[$U] = uniord(substr($T, $U)) ^ uniord(substr($N, $U));
    }
    $N = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"];
    $T = "";
    for ($U = 0; $U < count($V); $U++) {
        $T .= $N[$V[$U] >> 4 & 15];
        $T .= $N[$V[$U] & 15];
    }
    return $T;
}
        .red {
            color: #ff0000;
        }

        .blue {
            color: #0000ff;
        }
    </style>
</head>
<body>
<form method="post">
    <textarea name="text"></textarea><br/>
    <input type="submit" value="Color text"/>
</form>
<br/>
<?php 
if (isset($_POST["text"])) {
    if (!empty($_POST["text"])) {
        for ($i = 0; $i < mb_strlen($_POST["text"]); $i++) {
            $character = mb_substr($_POST["text"], $i, 1);
            $order = uniord($character);
            echo "<span class=\"" . ($order % 2 == 0 ? "red" : "blue") . "\">{$character}</span> ";
        }
    } else {
        echo "<div class=\"error\">No text provided.</div>";
    }
}
?>
</body>
</html>
Esempio n. 9
0
 public static function str_depad($b)
 {
     for ($i = strlen($b); $i-- && !uniord($b[$i]);) {
     }
     $b = substr($b, 0, $i + 1);
     return $b;
 }
Esempio n. 10
0
	/**
	 * Entitise any character that are above the 127 ASCII value
	 *
	 * Method will encode (html entitise) any characters that are above the 127 ASCII value as QBWC cannot interpret
	 * them properly, even when using CDATA. We cannot use isc_convert_charset("UTF-8", "HTML-ENTITIES", "") or anything
	 * simple like that because QBWC can understand &#8482; bot NOT &trade;, so do it all manually
	 *
	 * @access private
	 * @param string $str The string to search in
	 * @param bool &$wasConverted The referenced variable will be set to TRUE if $str was changed, FALSE if not
	 * @return string The filtered string on success, FALSE on error
	 */
	private function filterInvalidQBXMLChars($str, &$wasConverted)
	{
		$wasConverted = false;

		if (!is_string($str)) {
			return false;
		} else if (trim($str) == "") {
			return $str;
		}

		/**
		 * Decode eberything first
		 */
		$str = html_entity_decode($str, ENT_QUOTES, "UTF-8");

		$strlen = isc_strlen($str);
		$newStr = "";

		for ($i=0; $i<$strlen; $i++) {
			$char = isc_substr($str, $i, 1);
			$unicode = uniord($char);

			if ($unicode <= 127) {
				$newStr .= $char;
				continue;
			}

			$wasConverted = true;
			$newStr .= "&#" . $unicode . ";";
		}


		/**
		 * We also need to entities the &, <, >, ' and " characters. Don't use htmlspecialchars() as that will not
		 * replcae it with the numerical value
		 */
		$ordMap = array(
			"<" => 60,
			">" => 62,
			"'" => 39,
			'"' => 34,
			"&" => 38
		);

		foreach ($ordMap as $chr => $ord) {

			$amount = 0;

			/**
			 * Special case for the ampersand
			 */
			if ($chr == "&") {
				$newStr = preg_replace("/&([^#]{1})/", "&#" . $ord . ";\\1", $newStr, -1, $amount);
			} else {
				$newStr = str_replace($chr, "&#" . $ord . ";", $newStr, $amount);
			}

			if ($amount > 0) {
				$wasConverted = true;
			}
		}


		return $newStr;
	}
Esempio n. 11
0
function FixQueryForAsianWords($query)
{
    // check if the multibyte functions we need to use are available
    if (!function_exists('mb_convert_encoding') || !function_exists('mb_strlen') || !function_exists('mb_substr')) {
        return $query;
    }
    $currCharType = 0;
    $lastCharType = 0;
    // 0 is normal, 1 is hiragana, 2 is katakana, 3 is "han"
    // check for hiragan/katakana splitting required
    $newquery = "";
    $query_len = mb_strlen($query, "UTF-8");
    for ($i = 0; $i < $query_len; $i++) {
        $ch = mb_substr($query, $i, 1, "UTF-8");
        $chVal = uniord($ch);
        if ($chVal >= 12352 && $chVal <= 12447) {
            $currCharType = 1;
        } else {
            if ($chVal >= 12448 && $chVal <= 12543) {
                $currCharType = 2;
            } else {
                if ($chVal >= 13312 && $chVal <= 44031) {
                    $currCharType = 3;
                } else {
                    $currCharType = 0;
                }
            }
        }
        if ($lastCharType != $currCharType && $ch != " ") {
            $newquery .= " ";
        }
        $lastCharType = $currCharType;
        $newquery .= $ch;
    }
    return $newquery;
}
Esempio n. 12
0
// handle :some_emoji:
$content = preg_replace_callback('/:([a-zA-Z0-9\\+\\-_&.ô’Åéãíç]+):/', function ($match) {
    global $g_emoji_unicode;
    $str_code = $match[1];
    if (!isset($g_emoji_unicode[$str_code])) {
        return ':' . $str_code . ':';
    }
    $unicode_code = $g_emoji_unicode[$str_code];
    $unicode_image_name = ltrim(strtolower($unicode_code), "\\Uu0") . '.png';
    return '<img class="emoji" title=":' . $match[1] . ':" alt=":' . $match[1] . ':" src="' . EMOJI_PNG_ROOT . $unicode_image_name . '" height="20" width="20" align="absmiddle" />';
}, $content);
// handle <unicode character>
// see http://stackoverflow.com/a/10584493/488666
// see https://en.wikipedia.org/wiki/Emoji#Unicode_Blocks
$content = preg_replace_callback('/[' . unichr(0x1f300) . '-' . unichr(0x1f5ff) . unichr(0x1f600) . '-' . unichr(0x1f64f) . unichr(0x1f680) . '-' . unichr(0x1f6f3) . unichr(0x1f910) . '-' . unichr(0x1f918) . unichr(0x1f980) . '-' . unichr(0x1f984) . unichr(0x1f9c0) . unichr(0x2600) . '-' . unichr(0x27bf) . ']/u', function ($match) {
    $unicode_image_name = strtolower(dechex(intval(uniord($match[0])))) . '.png';
    return '<img class="emoji" src="' . EMOJI_PNG_ROOT . $unicode_image_name . '" height="20" width="20" align="absmiddle" />';
}, $content);
file_put_contents($file_to_convert, $content);
// -----------------------------------------
function unichr($i)
{
    return iconv('UCS-4LE', 'UTF-8', pack('V', $i));
}
// found at http://www.php.net/manual/en/function.ord.php
function uniord($string, &$offset = 0)
{
    $code = ord(substr($string, $offset, 1));
    if ($code >= 128) {
        //otherwise 0xxxxxxx
        if ($code < 224) {
 private static function unicodeEscape($ch)
 {
     $out = self::JS_ESCAPE_CHAR;
     $chord = uniord($ch);
     if ($chord < self::NUMBER_OF_JS_ESCAPED_CHARS && !empty(self::$JS_CHARS_ESCAPED[$chord])) {
         $out .= self::$JS_CHARS_ESCAPED[$chord];
     } else {
         if ($chord < 256) {
             $out .= 'x' . str_pad(dechex($chord), 2, '0', STR_PAD_LEFT);
         } else {
             $out .= 'u' . str_pad(dechex($chord), 4, '0', STR_PAD_LEFT);
         }
     }
     return $out;
 }
Esempio n. 14
0
	/**
	* Strips out invalid unicode characters from a string to be used in XML
	*
	* @param string The string to be cleaned
	* @return string The input string with invalid characters removed
	*/
	function StripInvalidXMLChars($input)
	{
		// attempt to strip using replace first
		$replace_input = @preg_replace("/\p{C}/u", " ", $input);
		if (!is_null($replace_input)) {
			return $replace_input;
		}

		// manually check each character
		$output = "";
		for ($x = 0; $x < isc_strlen($input); $x++) {
			$char = isc_substr($input, $x, 1);
			$code = uniord($char);

			if ($code === false) {
				continue;
			}

			if ($code == 0x9 ||
				$code == 0xA ||
				$code == 0xD ||
				($code >= 0x20 && $code <= 0xD7FF) ||
				($code >= 0xE000 && $code <= 0xFFFD) ||
				($code >= 0x10000 && $code <= 0x10FFFF)) {

				$output .= $char;
			}
		}

		return $output;
	}
Esempio n. 15
0
function addtext_check($array)
{
    global $config;
    check_permission(PERM_ADDER);
    //read file for tokenizer
    $tok_exc = array_map('mb_strtolower', file($config['project']['root'] . '/scripts/tokenizer/tokenizer_exceptions.txt', FILE_IGNORE_NEW_LINES));
    $tok_prefixes = file($config['project']['root'] . '/scripts/tokenizer/tokenizer_prefixes.txt', FILE_IGNORE_NEW_LINES);
    //removing bad symbols
    $clear_text = '';
    for ($i = 0; $i < mb_strlen($array['txt'], 'UTF-8'); ++$i) {
        $char = mb_substr($array['txt'], $i, 1, 'UTF-8');
        $code = uniord($char);
        if ($code != 769 && $code != 173 && ($code < 8192 || $code > 8203) && !in_array($code, array(8206, 8207)) && !in_array($code, array(160, 8237, 8239, 8288, 12288))) {
            $clear_text .= $char;
        }
    }
    $out = array('full' => $clear_text, 'select0' => get_books_for_select(0));
    $pars = split2paragraphs($clear_text);
    foreach ($pars as $par) {
        $par_array = array();
        $sents = split2sentences($par);
        foreach ($sents as $sent) {
            if (!preg_match('/\\S/', $sent)) {
                continue;
            }
            $sent_array = array('src' => $sent);
            $tokens = tokenize_ml($sent, $tok_exc, $tok_prefixes);
            foreach ($tokens as $token) {
                $sent_array['tokens'][] = array('text' => $token[0], 'class' => form_exists($token[0]), 'border' => $token[1], 'vector' => $token[2]);
            }
            $par_array['sentences'][] = $sent_array;
        }
        $out['paragraphs'][] = $par_array;
    }
    //book
    if (isset($array['book_id'])) {
        $book_id = (int) $array['book_id'];
        $r = sql_fetch_array(sql_query("SELECT parent_id FROM books WHERE book_id={$book_id} LIMIT 1"));
        if ($r['parent_id'] > 0) {
            $out['selected0'] = $r['parent_id'];
            $out['select1'] = get_books_for_select($r['parent_id']);
            $out['selected1'] = $book_id;
        } else {
            $out['selected0'] = $book_id;
        }
    }
    return $out;
}
Esempio n. 16
0
 private function contains_format12($character, &$fh)
 {
     // also note that in a format 12 subtable, every USHORT is an entry in the GlyphIdArray
     $cmapformat12 = CMAPFormat12::createCMAPFormat12($fh);
     $mark = ftell($fh);
     // lookup is based on the unicode number again.
     $c = uniord($character);
     // start looking for the segment our character should be in, which we'll call "i" again.
     for ($i = 0; $i < $cmapformat12->nGroups; $i++) {
         $startCharCode =& $cmapformat12->groups[$i]["startCharCode"];
         $endCharCode =& $cmapformat12->groups[$i]["endCharCode"];
         // did we find a segment containing our character?
         if ($startCharCode <= $c && $c <= $endCharCode) {
             $startGlyphID = $cmapformat12->groups[$i]["startGlyphID"];
             $diff = $c - $startCharCode;
             return $startGlyphID + $diff;
         } elseif ($startCharCode > $c) {
             break;
         }
     }
     return false;
 }
Esempio n. 17
0
 protected function url(&$url, &$href)
 {
     $hrefPrefix = null;
     if (!empty($this->curPos) && $this->getCharClass(uniord($this->textBuf[$this->curPos - 1])) === self::PRINTABLE) {
         return false;
     }
     if ($this->curCh === 'h') {
         if ($this->simpleMatchString('http://')) {
             $hrefPrefix = 'http://';
             $this->goToPosition($this->curPos + 7);
         } else {
             if ($this->simpleMatchString('https://')) {
                 $hrefPrefix = 'https://';
                 $this->goToPosition($this->curPos + 8);
             }
         }
     } else {
         if ($this->curCh === 'w') {
             if ($this->simpleMatchString('www.')) {
                 $hrefPrefix = 'http://www.';
                 $urlPrefix = 'www.';
                 $this->goToPosition($this->curPos + 4);
             }
         }
     }
     if ($hrefPrefix === null) {
         return false;
     }
     $this->saveState();
     $url = '';
     $urlChMask = self::URL | self::ALPHA;
     $urlPunctValidChars = array(":", ",", "!", "'", "~", ".", ";");
     while ($this->curChClass & $urlChMask || in_array($this->curCh, $urlPunctValidChars)) {
         $url .= $this->curCh;
         $this->getCh();
     }
     $chCount = 0;
     for ($i = mb_strlen($url) - 1; $i >= 0; $i--) {
         if (in_array(mb_substr($url, $i, 1), $urlPunctValidChars)) {
             $chCount++;
         } else {
             break;
         }
     }
     if ($chCount > 0) {
         $url = mb_substr($url, 0, mb_strlen($url) - $chCount);
         $this->goToPosition($this->curPos - $chCount);
     }
     if (mb_strlen($url) == 0) {
         return false;
     }
     if (!mb_strlen($url)) {
         $this->restoreState();
         return false;
     }
     $href = $hrefPrefix . $url;
     $url = !empty($urlPrefix) ? $urlPrefix . $url : $href;
     return true;
 }
Esempio n. 18
0
█~≈~≈≈≈~~,~....≈~~~≈,~.~~O═══════════════════╗.'`,~~≈≈,≈'~~`~`≈~≈≈≈≈..~~'`≈≈≈~~█
█~≈,~≈≈~≈≈╞≈~"~..≈~≈≈~≈.≈≈≈∞≈~≈~≈≈~.~,≈≈~~τ,≈║`,'≈≈≈~~~≈"~≈≈~≈~≈~~≈¶≈~~,~,"`~.,█
█~,~,≈~τ≈``~≈,'≈≈≈≈~.~"`'`≈.~.≈≈~,.≈~~~"≈≈~~~║≈≈~~.."≈'≈~~'τ~.'."'~≈~≈╞≈≈.~,≈≈.█
█,τ.≈~≈~.~~,~.∞~..~"≈~~',~"~`~≈≈,≈.~•••••••≈~║.≈'~~'≈~≈.≈≈≈~`≈≈≈,`≈~`.`'≈~,`'≈'█
█,~~`τ≈~≈≈,~≈∞≈≈,,~≈≈~≈.≈≈~≈.,≈~≈≈~.•••••••`~║≈≈~~'~~~~`~τ≈≈~,'≈~~,τ≈~'~≈`.≈~≈~█
█`~~≈≈.≈≈~.~~~'O███O~~≈τ~'≈'≈.~~τ~~~•••••••~≈║',~≈"~∞≈≈≈..≈~≈≈"≈~≈≈~≈~`≈~"`,~"≈█
█"'≈~`~..~~"'≈~█████≈▓▓▓≈≈≈.≈,≈`¢.≈~•••••••≈~║≈"~~.`≈≈"~~~≈≈~~~≈..~≈≈.≈∞~~~`~≈~█
█≈≈≈≈≈≈~∞≈≈''~.██O██≈▓σ▓≈~.~¶`≈~≈≈~~',~≈,~~~≈║~~~≈,~≈≈~~≈≈≈≈`≈~≈τ≈≈~≈,~'≈~≈≈~~~█
█'∞≈`~',~".≈`≈`█████`▓/▓~`~≈,~≈~≈"≈≈=====~.'"║~≈.~~"`~≈≈≈"`~τ''`',~≈~'~τ,~'"``~█
█`≈~,,''≈≈≈~≈"~O███O~'≈~~≈~≈~≈≈≈`~~.====="~~"║~≈~~≈`∞`~≈τ≈,~≈~~≈≈,~≈"~'`~~≈'≈~~█
█≈~≈≈,≈~≈≈~~≈,~~..~'"~~,≈~~'~τ~≈,≈~,≈.≈,~`≈'.║≈≈`~≈~.≈,'≈≈~.≈.~≈'≈`≈≈,≈'~~~~∞~≈█
█≈~'~`'~≈≈≈~.~≈`≈,`≈≈O═══════════════════════╝"≈`~~~≈.≈`≈~`~"≈,≈τ≈~≈≈~'~'≈≈"~~≈█
█~≈~.,¶,≈',~`≈.~≈~~~≈~','~≈≈'≈≈≈~.≈'≈`≈~~~≈~~≈`'.~~~≈≈≈~,',≈`≈'~,≈,~≈"~~τ~'≈≈~.█
█~~≈~≈≈~≈~.~~τ`+≈~`¶'≈`~~,"~~≈≈.~~~~≈~~~,.~~`'~~`~"≈~~'~~.~≈~`"~~≈,~~~~,~≈≈,~≈~1
█.≈~≈~~~`','~~≈~`≈≈≈≈≈~~≈≈≈~~≈∞≈`.,≈╞~≈'╞≈~'~≈."≈≈~~≈.≈`~τ≈.,'~≈~≈~≈`~~~~≈.~≈"~2
█≈~≈≈▒.,`,`≈~~≈~~≈~~',.,~∞~~~≈≈≈≈~≈`'~∞≈'~``'≈~.,≈≈≈≈~.`≈∞~.~≈~≈".≈"`~≈."≈~≈`≈~8
████████████████████████████████████████████████████████████████████████████████
DORFFORT;
$text_rows = explode("\n", $text);
echo 'rows: ' . count($text_rows);
if ($rows > 30) {
    die('Error slicing rows.');
}
echo '&#' . uniord("O") . ';';
/*foreach ($text_rows as $row) {
	// echo "<!-- $row -->";
	for ($i = 0; $i < 80; $i++) {
		echo '<span class="back_BLACK fore_LRED">&#' . uniord(mb_substr($row, $i, 1, 'UTF8')) . ';</span>';
	}
	echo '<br/>' . "\n";
}*/
Esempio n. 19
0
 /**
  * Function for handling the {{\#codepointhex }} parser function.
  */
 public static function doCodepointHex($parser, $character)
 {
     $value = uniord($character);
     if ($value === -1) {
         $error_msg = array('Invalid character', $character);
         return smwfEncodeMessages($error_msg);
     }
     return dechex($value);
 }
Esempio n. 20
0
                } else {
                    if ($h <= 0xf4) {
                        return ($h & 0xf) << 18 | (ord($char[1]) & 0x3f) << 12 | (ord($char[2]) & 0x3f) << 6 | ord($char[3]) & 0x3f;
                    } else {
                        return false;
                    }
                }
            }
        }
    }
}
if ($_POST && !empty($_POST['text'])) {
    $text = $_POST['text'];
    for ($i = 0; $i < mb_strlen($text); $i++) {
        $char = mb_substr($text, $i, 1);
        if (uniord($char) % 2 !== 0) {
            ?>
            <span style="color:blue"><?php 
            echo $char;
            ?>
</span>
        <?php 
        } else {
            ?>
            <span style="color:red"><?php 
            echo $char;
            ?>
</span>
        <?php 
        }
    }
Esempio n. 21
0
 /**
  * Перемещение на указанную позицию во входной строке и считывание символа
  * @return string символ в указанной позиции
  */
 protected function goToPosition($position)
 {
     $this->curPos = $position;
     if ($this->curPos < $this->textLen) {
         $this->curCh = $this->textBuf[$this->curPos];
         $this->curChOrd = uniord($this->curCh);
         $this->curChClass = $this->getCharClass($this->curChOrd);
     } else {
         $this->curCh = null;
         $this->curChOrd = 0;
         $this->curChClass = 0;
     }
     return $this->curCh;
 }
function is_arabic($str)
{
    if (mb_detect_encoding($str) !== 'UTF-8') {
        $str = mb_convert_encoding($str, mb_detect_encoding($str), 'UTF-8');
    }
    /*
    $str = str_split($str); <- this function is not mb safe, it splits by bytes, not characters. we cannot use it
    $str = preg_split('//u',$str); <- this function woulrd probably work fine but there was a bug reported in some php version so it pslits by bytes and not chars as well
    */
    preg_match_all('/.|\\n/u', $str, $matches);
    $chars = $matches[0];
    $arabic_count = 0;
    $latin_count = 0;
    $total_count = 0;
    foreach ($chars as $char) {
        //$pos = ord($char); we cant use that, its not binary safe
        $pos = uniord($char);
        // echo $char ." --> ".$pos.PHP_EOL;
        if ($pos >= 1536 && $pos <= 1791) {
            $arabic_count++;
        } else {
            if ($pos > 123 && $pos < 123) {
                $latin_count++;
            }
        }
        $total_count++;
    }
    if ($arabic_count / $total_count > 0.6) {
        // 60% arabic chars, its probably arabic
        return true;
    }
    return false;
}
Esempio n. 23
0
function is_rtl($str)
{
    if (mb_detect_encoding($str) !== 'UTF-8') {
        $str = mb_convert_encoding($str, mb_detect_encoding($str), 'UTF-8');
    }
    preg_match_all('/[^\\n\\s]+/', $str, $matches);
    preg_match_all('/.|\\n\\s/u', $str, $matches);
    $chars = $matches[0];
    $arabic_count = 0;
    $latin_count = 0;
    $total_count = 0;
    foreach ($chars as $char) {
        $pos = uniord($char);
        if ($pos >= 1536 && $pos <= 1791) {
            $arabic_count++;
        } else {
            if ($pos > 123 && $pos < 123) {
                $latin_count++;
            }
        }
        $total_count++;
    }
    return $arabic_count / $total_count > 0.5;
}
Esempio n. 24
0
 protected function url(&$url, &$href)
 {
     $hrefPrefix = null;
     if (!empty($this->curPos) && $this->getCharClass(uniord($this->textBuf[$this->curPos - 1])) === self::PRINTABLE) {
         return false;
     }
     if ($this->curCh === 'h') {
         if ($this->simpleMatchString('http://')) {
             $hrefPrefix = 'http://';
             $this->goToPosition($this->curPos + 7);
         } else {
             if ($this->simpleMatchString('https://')) {
                 $hrefPrefix = 'https://';
                 $this->goToPosition($this->curPos + 8);
             }
         }
     } else {
         if ($this->curCh === 'w') {
             if ($this->simpleMatchString('www.')) {
                 $hrefPrefix = 'http://www.';
                 $urlPrefix = 'www.';
                 $this->goToPosition($this->curPos + 4);
             }
         }
     }
     if ($hrefPrefix === null) {
         return false;
     }
     $this->saveState();
     $url = '';
     $urlChMask = self::URL | self::ALPHA;
     while ($this->curChClass & $urlChMask || in_array($this->curCh, array(":", ",", "!", "'"))) {
         $url .= $this->curCh;
         $this->getCh();
     }
     if (!mb_strlen($url)) {
         $this->restoreState();
         return false;
     }
     $href = $hrefPrefix . $url;
     $url = !empty($urlPrefix) ? $urlPrefix . $url : $href;
     return true;
 }