/** * 編集距離(レーベンシュタイン距離)を求める(マルチバイト文字対応) * @param $str1 * @param $str2 * @param $encoding * @param $costReplace * @return 数値(距離),かぶっていた文字の数 */ function LevenshteinDistance($str1, $str2, $costReplace = 2, $encoding = 'UTF-8') { $count_same_letter = 0; $d = array(); $mb_len1 = mb_strlen($str1, $encoding); $mb_len2 = mb_strlen($str2, $encoding); $mb_str1 = mbStringToArray($str1, $encoding); $mb_str2 = mbStringToArray($str2, $encoding); for ($i1 = 0; $i1 <= $mb_len1; $i1++) { $d[$i1] = array(); $d[$i1][0] = $i1; } for ($i2 = 0; $i2 <= $mb_len2; $i2++) { $d[0][$i2] = $i2; } for ($i1 = 1; $i1 <= $mb_len1; $i1++) { for ($i2 = 1; $i2 <= $mb_len2; $i2++) { // $cost = ($str1[$i1 - 1] == $str2[$i2 - 1]) ? 0 : 1; if ($mb_str1[$i1 - 1] === $mb_str2[$i2 - 1]) { $cost = 0; $count_same_letter++; } else { $cost = $costReplace; //置換 } $d[$i1][$i2] = min($d[$i1 - 1][$i2] + 1, $d[$i1][$i2 - 1] + 1, $d[$i1 - 1][$i2 - 1] + $cost); } } //return $d[$mb_len1][$mb_len2]; return array('distance' => $d[$mb_len1][$mb_len2], 'count_same_letter' => $count_same_letter); }
function wcdispatch($prev, $position) { global $wovels, $consonants, $h_wovels; if (in_array($prev, mbStringToArray($wovels))) { $chars = $consonants . $position[$prev]; } else { $chars = checkharmony($prev) . $position[$prev]; } return $chars; }
function processString($string) { $inputArray = mbStringToArray($string); $nextIndex = 0; $result = ''; $length = count($inputArray); for ($index = 0; $index < $length; ++$index) { $char = $inputArray[$index]; switch ($char) { case OPEN_BRACE: $result .= processOpenBrace($index + 1, $inputArray, $nextIndex); $index = $nextIndex; break; default: $result .= $char; break; } } unset($index, $char); return $result; }
/** * Compute the Levenshtein distance between two multi-bytes string * * @param string $str1 First string * @param string $str2 Second string * @param integer $costReplace Replacement cost * @param string $encoding Strings encoding * * @return number Levenshtein distance between $str1 and $str2 * * @since 0.0.1 */ function levenshteinDistance($str1, $str2, $costReplace = 2, $encoding = 'UTF-8') { $d = array(); $mb_len1 = mb_strlen($str1, $encoding); $mb_len2 = mb_strlen($str2, $encoding); $mb_str1 = mbStringToArray($str1, $encoding); $mb_str2 = mbStringToArray($str2, $encoding); for ($i1 = 0; $i1 <= $mb_len1; $i1++) { $d[$i1] = array(); $d[$i1][0] = $i1; } for ($i2 = 0; $i2 <= $mb_len2; $i2++) { $d[0][$i2] = $i2; } for ($i1 = 1; $i1 <= $mb_len1; $i1++) { for ($i2 = 1; $i2 <= $mb_len2; $i2++) { $d[$i1][$i2] = min($d[$i1 - 1][$i2] + 1, $d[$i1][$i2 - 1] + 1, $d[$i1 - 1][$i2 - 1] + ($mb_str1[$i1 - 1] === $mb_str2[$i2 - 1] ? 0 : $costReplace)); } } return $d[$mb_len1][$mb_len2]; }
?> <?php $row = $query->row_array(); ?> <?php $mIndex = 0; ?> <?php echo $this->uri->segment(4); ?> : <br/> <?php /*每跑一個字就搜尋是否存在於候選詞裡的起始位置*/ $i = 0; foreach (mbStringToArray($row['Label'] . " ") as $char) { if ($i == sizeof($LabelIndex)) { break; } $flag = 0; foreach ($dStart->result() as $dStart_row) { if ($dStart_row->Start == $LabelIndex[$i]) { echo '<div class="divbox" id="divbox_' . $LabelIndex[$i] . '"><div id="num_' . $LabelIndex[$i] . '">' . " " . $char . "</div></div>"; $flag = 1; break; } } if ($flag == 0) { echo '<div class="divbox2"><div id="num_' . $LabelIndex[$i] . '">' . " " . $char . "</div></div>"; } $i++;
$key = getCharKey($i, $i <= 7 ? 2 : 3); $substr[$key] = $i; } /** * START ENCODING! */ $scriptText = file_get_contents($argv[1]); if (!$scriptText) { die("File not found\n"); } // support variable for ignored charaters $prevIgnored = false; // for encoded text (result) $encodedScript = ""; // split text $chars = mbStringToArray($scriptText, 0, 1, "UTF-8"); // var_dump($chars); // encoded each character for ($i = 0; $i < count($chars); $i++) { // get character $ch = $chars[$i]; // if character is ignored if (strpos($ignoreChars, $ch) != false) { // if not first character // and prev element not ignored if ($i && !$prevIgnored) { // open quotes $encodedScript .= '+"'; } // if character is quotes if ($ch == '"') {
function ex_strlen($str) { $byte = 0; $array = mbStringToArray($str); $s = ''; $i = 0; while ($array) { $st = array_shift($array); $i = preg_match("/^([¥x20-¥x7E]|¥xEF¥xBD[¥xA1-¥xFF]|¥xEF¥xBE[¥x01-¥x9F])\$/", $st) ? 1 : 2; $byte += $i; } return $byte; }
/** * 在中文字符串中插入字符 * @param $str 字符串 * @param $len 长度 * @param $glue 要填充的字符 */ function mb_chunk_split($str, $len, $glue) { if (empty($str)) { return false; } $array = mbStringToArray($str); $n = -1; $new = ''; foreach ($array as $char) { $n++; if ($n < $len) { $new .= $char; } elseif ($n == $len) { $new .= $glue . $char; $n = 0; } } return $new; }