/**
 * 編集距離(レーベンシュタイン距離)を求める(マルチバイト文字対応)
 * @param $str1
 * @param $str2
 * @param $encoding
 * @param $costReplace
 * @return 数値(距離),かぶっていた文字の数
 */
function LevenshteinDistance($str1, $str2, $costReplace = 2, $encoding = 'UTF-8')
{
    $count_same_letter = 0;
    $d = array();
    $mb_len1 = mb_strlen($str1, $encoding);
    $mb_len2 = mb_strlen($str2, $encoding);
    $mb_str1 = mbStringToArray($str1, $encoding);
    $mb_str2 = mbStringToArray($str2, $encoding);
    for ($i1 = 0; $i1 <= $mb_len1; $i1++) {
        $d[$i1] = array();
        $d[$i1][0] = $i1;
    }
    for ($i2 = 0; $i2 <= $mb_len2; $i2++) {
        $d[0][$i2] = $i2;
    }
    for ($i1 = 1; $i1 <= $mb_len1; $i1++) {
        for ($i2 = 1; $i2 <= $mb_len2; $i2++) {
            //			$cost = ($str1[$i1 - 1] == $str2[$i2 - 1]) ? 0 : 1;
            if ($mb_str1[$i1 - 1] === $mb_str2[$i2 - 1]) {
                $cost = 0;
                $count_same_letter++;
            } else {
                $cost = $costReplace;
                //置換
            }
            $d[$i1][$i2] = min($d[$i1 - 1][$i2] + 1, $d[$i1][$i2 - 1] + 1, $d[$i1 - 1][$i2 - 1] + $cost);
        }
    }
    //return $d[$mb_len1][$mb_len2];
    return array('distance' => $d[$mb_len1][$mb_len2], 'count_same_letter' => $count_same_letter);
}
Exemple #2
0
function wcdispatch($prev, $position)
{
    global $wovels, $consonants, $h_wovels;
    if (in_array($prev, mbStringToArray($wovels))) {
        $chars = $consonants . $position[$prev];
    } else {
        $chars = checkharmony($prev) . $position[$prev];
    }
    return $chars;
}
Exemple #3
0
function processString($string)
{
    $inputArray = mbStringToArray($string);
    $nextIndex = 0;
    $result = '';
    $length = count($inputArray);
    for ($index = 0; $index < $length; ++$index) {
        $char = $inputArray[$index];
        switch ($char) {
            case OPEN_BRACE:
                $result .= processOpenBrace($index + 1, $inputArray, $nextIndex);
                $index = $nextIndex;
                break;
            default:
                $result .= $char;
                break;
        }
    }
    unset($index, $char);
    return $result;
}
Exemple #4
0
/**
 * Compute the Levenshtein distance between two multi-bytes string
 *
 * @param   string   $str1         First string
 * @param   string   $str2         Second string
 * @param   integer  $costReplace  Replacement cost
 * @param   string   $encoding     Strings encoding
 *
 * @return  number  Levenshtein distance between $str1 and $str2
 *
 * @since  0.0.1
 */
function levenshteinDistance($str1, $str2, $costReplace = 2, $encoding = 'UTF-8')
{
    $d = array();
    $mb_len1 = mb_strlen($str1, $encoding);
    $mb_len2 = mb_strlen($str2, $encoding);
    $mb_str1 = mbStringToArray($str1, $encoding);
    $mb_str2 = mbStringToArray($str2, $encoding);
    for ($i1 = 0; $i1 <= $mb_len1; $i1++) {
        $d[$i1] = array();
        $d[$i1][0] = $i1;
    }
    for ($i2 = 0; $i2 <= $mb_len2; $i2++) {
        $d[0][$i2] = $i2;
    }
    for ($i1 = 1; $i1 <= $mb_len1; $i1++) {
        for ($i2 = 1; $i2 <= $mb_len2; $i2++) {
            $d[$i1][$i2] = min($d[$i1 - 1][$i2] + 1, $d[$i1][$i2 - 1] + 1, $d[$i1 - 1][$i2 - 1] + ($mb_str1[$i1 - 1] === $mb_str2[$i2 - 1] ? 0 : $costReplace));
        }
    }
    return $d[$mb_len1][$mb_len2];
}
Exemple #5
0
            ?>
						<?php 
            $row = $query->row_array();
            ?>
						<?php 
            $mIndex = 0;
            ?>
						<?php 
            echo $this->uri->segment(4);
            ?>
:
						<br/>
						<?php 
            /*每跑一個字就搜尋是否存在於候選詞裡的起始位置*/
            $i = 0;
            foreach (mbStringToArray($row['Label'] . " ") as $char) {
                if ($i == sizeof($LabelIndex)) {
                    break;
                }
                $flag = 0;
                foreach ($dStart->result() as $dStart_row) {
                    if ($dStart_row->Start == $LabelIndex[$i]) {
                        echo '<div class="divbox" id="divbox_' . $LabelIndex[$i] . '"><div id="num_' . $LabelIndex[$i] . '">' . " " . $char . "</div></div>";
                        $flag = 1;
                        break;
                    }
                }
                if ($flag == 0) {
                    echo '<div class="divbox2"><div id="num_' . $LabelIndex[$i] . '">' . " " . $char . "</div></div>";
                }
                $i++;
Exemple #6
0
    $key = getCharKey($i, $i <= 7 ? 2 : 3);
    $substr[$key] = $i;
}
/**
 * START ENCODING!
 */
$scriptText = file_get_contents($argv[1]);
if (!$scriptText) {
    die("File not found\n");
}
// support variable for ignored charaters
$prevIgnored = false;
// for encoded text (result)
$encodedScript = "";
// split text
$chars = mbStringToArray($scriptText, 0, 1, "UTF-8");
// var_dump($chars);
// encoded each character
for ($i = 0; $i < count($chars); $i++) {
    // get character
    $ch = $chars[$i];
    // if character is ignored
    if (strpos($ignoreChars, $ch) != false) {
        // if not first character
        // and prev element not ignored
        if ($i && !$prevIgnored) {
            // open quotes
            $encodedScript .= '+"';
        }
        // if character is quotes
        if ($ch == '"') {
 function ex_strlen($str)
 {
     $byte = 0;
     $array = mbStringToArray($str);
     $s = '';
     $i = 0;
     while ($array) {
         $st = array_shift($array);
         $i = preg_match("/^([¥x20-¥x7E]|¥xEF¥xBD[¥xA1-¥xFF]|¥xEF¥xBE[¥x01-¥x9F])\$/", $st) ? 1 : 2;
         $byte += $i;
     }
     return $byte;
 }
Exemple #8
0
/**
 * 在中文字符串中插入字符
 * @param $str  字符串
 * @param $len  长度
 * @param $glue 要填充的字符
 */
function mb_chunk_split($str, $len, $glue)
{
    if (empty($str)) {
        return false;
    }
    $array = mbStringToArray($str);
    $n = -1;
    $new = '';
    foreach ($array as $char) {
        $n++;
        if ($n < $len) {
            $new .= $char;
        } elseif ($n == $len) {
            $new .= $glue . $char;
            $n = 0;
        }
    }
    return $new;
}