/** * search consecutive Japanese/Kanji chars * @param MbString &$mbText [the haystack] * @param integer $offset [search neighbors to this offset] * @return array/false [0/1 = position/length, false otherwise] */ protected function searchConsecutiveJpnChars(MbString &$mbText, $offset = 0) { $mbText_len = $mbText->strlen(); for ($idx = $offset; $idx < $mbText_len; ++$idx) { // search a Japanese char/word's location ($idx) in $mbText if (!$this->mbMustBeJpn->has($mbText[$idx])) { continue; } // search consecutive Japanese chars frontward for ($idxF = $idx - 1; $idxF >= 0; --$idxF) { if (!$this->mbMayBeJpnChars->has($mbText[$idxF])) { break; } } ++$idxF; // position correction // search consecutive Japanese chars backward for ($idxB = $idx + 1; $idxB < $mbText_len; ++$idxB) { if (!$this->mbMayBeJpnChars->has($mbText[$idxB])) { break; } } --$idxB; // position correction // a Japanese region has been found break; } return $idx < $mbText_len ? [$idxF, $idxB - $idxF + 1] : false; }
protected function replaceRepeatPattern(&$text, array $convTable, $context) { // split text into text parts (even key) and symbol parts (odd key) $textSplit = preg_split($this->punctuationRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE); $textSplitCnt = count($textSplit); // get text parts and merge them into a new string to $textNoSymbol $textNoSymbol = []; for ($i = 0; $i < $textSplitCnt; $i += 2) { $textNoSymbol[] =& $textSplit[$i]; } $textNoSymbol = implode('', $textNoSymbol); $mbTextNoSymbol = new MbString($textNoSymbol, $this->encoding); $mbTextNoSymbol_len = $mbTextNoSymbol->strlen(); // do conversion on the de-symboled text, i.e., $mbTextNoSymbol foreach ($convTable as $sr => &$repArr) { list($rep, $conditionRegex) = $repArr; $mbSr = new MbString($sr, $this->encoding); $mbRep = new MbString($rep, $this->encoding); $mbSr_len = $mbSr->strlen(); $mbRep_len = $mbRep->strlen(); // skip replacements which will cause different lengths if ($mbSr_len != $mbRep_len) { unset($mbSr, $mbRep); continue; } // start the replacement $seek = -1; while (true) { // find the position of the searched string $seek = $mbTextNoSymbol->strpos($sr, $seek + 1); if ($seek === false) { break; } // check the $conditionRegex $textSlice = $mbTextNoSymbol->substr($seek > $context ? $seek - $context : 0, $mbSr_len + $context << 1); if (!empty($conditionRegex) && !preg_match("/{$conditionRegex}/u", $textSlice)) { continue; } // replace frontward $seekFront = $seek; while ($seekFront > 0) { --$seekFront; // check $charToCheck is in $sr or not $charToCheck = $mbTextNoSymbol[$seekFront]; $charToCheckPosInSr = $mbSr->strpos($charToCheck); if ($charToCheckPosInSr === false) { break; } // replace $charToCheck with the corresponding one $repChar = $mbRep[$charToCheckPosInSr]; $mbTextNoSymbol->substr_replace_i($repChar, $seekFront, 1); } // replace backward $seekBack = $seek + $mbSr_len; while ($seekBack < $mbTextNoSymbol_len) { // check $charToCheck is in $sr or not $charToCheck = $mbTextNoSymbol[$seekBack]; $charToCheckPosInSr = $mbSr->strpos($charToCheck); if ($charToCheckPosInSr === false) { break; } // replace $charToCheck with the corresponding one $repChar = $mbRep[$charToCheckPosInSr]; $mbTextNoSymbol->substr_replace_i($repChar, $seekBack, 1); ++$seekBack; } // replace the center $mbTextNoSymbol->substr_replace_i($rep, $seek, $mbSr_len); } unset($mbSr, $mbRep); } // patch text parts in $textSplit by using $mbTextNoSymbol for ($i = $seek = 0; $i < $textSplitCnt; $i += 2) { $pieceLength = MbString::static_strlen($textSplit[$i], $this->encoding); $textSplit[$i] = $mbTextNoSymbol->substr($seek, $pieceLength); $seek += $pieceLength; } unset($mbTextNoSymbol); // re-construct $text by concatenating $textSplit $text = implode('', $textSplit); }