/** * replace consecutive Japanese/Kanji chars with a given array * @param string $text [the text which will be processed on] * @param array &$srRepArray [key/value = search/replace] * @param boolean $ignoreLengthDiff [true = each search's length equals its corresponding replace's length * false = otherwise, but regex is not allowed in $srRepArray] * @return none */ protected function replaceConsecutiveJpnChars(MbString &$mbText, array &$srRepArray, $ignoreLengthDiff = false) { $encoding =& $this->encoding; $mbText_len = $mbText->strlen(); // search consecutive Japanese chars in $mbText for ($idx = 0; $idx < $mbText_len; ++$idx) { $jpnFound = $this->searchConsecutiveJpnChars($mbText, $idx); if ($jpnFound === false) { break; } list($jpnStart, $jpnLength) = $jpnFound; // do partial replacements for $mbText $jpnString = $mbText->substr($jpnStart, $jpnLength); $textLengthDiff = 0; // text length maybe different after replacing foreach ($srRepArray as $sr => &$rep) { $jpnString = preg_replace("/{$sr}/uimS", $rep, $jpnString, -1, $count); if (!$ignoreLengthDiff) { $textLengthDiff += $count * (MbString::static_strlen($rep, $encoding) - MbString::static_strlen($sr, $encoding)); } } // update $mbText with $jpnString internally $mbText->substr_replace_i($jpnString, $jpnStart, $jpnLength); $idx = $jpnStart + $jpnLength - 1 + $textLengthDiff; } }
protected function replaceRepeatPattern(&$text, array $convTable, $context) { // split text into text parts (even key) and symbol parts (odd key) $textSplit = preg_split($this->punctuationRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE); $textSplitCnt = count($textSplit); // get text parts and merge them into a new string to $textNoSymbol $textNoSymbol = []; for ($i = 0; $i < $textSplitCnt; $i += 2) { $textNoSymbol[] =& $textSplit[$i]; } $textNoSymbol = implode('', $textNoSymbol); $mbTextNoSymbol = new MbString($textNoSymbol, $this->encoding); $mbTextNoSymbol_len = $mbTextNoSymbol->strlen(); // do conversion on the de-symboled text, i.e., $mbTextNoSymbol foreach ($convTable as $sr => &$repArr) { list($rep, $conditionRegex) = $repArr; $mbSr = new MbString($sr, $this->encoding); $mbRep = new MbString($rep, $this->encoding); $mbSr_len = $mbSr->strlen(); $mbRep_len = $mbRep->strlen(); // skip replacements which will cause different lengths if ($mbSr_len != $mbRep_len) { unset($mbSr, $mbRep); continue; } // start the replacement $seek = -1; while (true) { // find the position of the searched string $seek = $mbTextNoSymbol->strpos($sr, $seek + 1); if ($seek === false) { break; } // check the $conditionRegex $textSlice = $mbTextNoSymbol->substr($seek > $context ? $seek - $context : 0, $mbSr_len + $context << 1); if (!empty($conditionRegex) && !preg_match("/{$conditionRegex}/u", $textSlice)) { continue; } // replace frontward $seekFront = $seek; while ($seekFront > 0) { --$seekFront; // check $charToCheck is in $sr or not $charToCheck = $mbTextNoSymbol[$seekFront]; $charToCheckPosInSr = $mbSr->strpos($charToCheck); if ($charToCheckPosInSr === false) { break; } // replace $charToCheck with the corresponding one $repChar = $mbRep[$charToCheckPosInSr]; $mbTextNoSymbol->substr_replace_i($repChar, $seekFront, 1); } // replace backward $seekBack = $seek + $mbSr_len; while ($seekBack < $mbTextNoSymbol_len) { // check $charToCheck is in $sr or not $charToCheck = $mbTextNoSymbol[$seekBack]; $charToCheckPosInSr = $mbSr->strpos($charToCheck); if ($charToCheckPosInSr === false) { break; } // replace $charToCheck with the corresponding one $repChar = $mbRep[$charToCheckPosInSr]; $mbTextNoSymbol->substr_replace_i($repChar, $seekBack, 1); ++$seekBack; } // replace the center $mbTextNoSymbol->substr_replace_i($rep, $seek, $mbSr_len); } unset($mbSr, $mbRep); } // patch text parts in $textSplit by using $mbTextNoSymbol for ($i = $seek = 0; $i < $textSplitCnt; $i += 2) { $pieceLength = MbString::static_strlen($textSplit[$i], $this->encoding); $textSplit[$i] = $mbTextNoSymbol->substr($seek, $pieceLength); $seek += $pieceLength; } unset($mbTextNoSymbol); // re-construct $text by concatenating $textSplit $text = implode('', $textSplit); }