Exemplo n.º 1
0
 /**
  * replace consecutive Japanese/Kanji chars with a given array
  * @param  string  $text             [the text which will be processed on]
  * @param  array   &$srRepArray      [key/value = search/replace]
  * @param  boolean $ignoreLengthDiff [true  = each search's length equals its corresponding replace's length
  *                                    false = otherwise, but regex is not allowed in $srRepArray]
  * @return none
  */
 protected function replaceConsecutiveJpnChars(MbString &$mbText, array &$srRepArray, $ignoreLengthDiff = false)
 {
     $encoding =& $this->encoding;
     $mbText_len = $mbText->strlen();
     // search consecutive Japanese chars in $mbText
     for ($idx = 0; $idx < $mbText_len; ++$idx) {
         $jpnFound = $this->searchConsecutiveJpnChars($mbText, $idx);
         if ($jpnFound === false) {
             break;
         }
         list($jpnStart, $jpnLength) = $jpnFound;
         // do partial replacements for $mbText
         $jpnString = $mbText->substr($jpnStart, $jpnLength);
         $textLengthDiff = 0;
         // text length maybe different after replacing
         foreach ($srRepArray as $sr => &$rep) {
             $jpnString = preg_replace("/{$sr}/uimS", $rep, $jpnString, -1, $count);
             if (!$ignoreLengthDiff) {
                 $textLengthDiff += $count * (MbString::static_strlen($rep, $encoding) - MbString::static_strlen($sr, $encoding));
             }
         }
         // update $mbText with $jpnString internally
         $mbText->substr_replace_i($jpnString, $jpnStart, $jpnLength);
         $idx = $jpnStart + $jpnLength - 1 + $textLengthDiff;
     }
 }
Exemplo n.º 2
0
 protected function replaceRepeatPattern(&$text, array $convTable, $context)
 {
     // split text into text parts (even key) and symbol parts (odd key)
     $textSplit = preg_split($this->punctuationRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE);
     $textSplitCnt = count($textSplit);
     // get text parts and merge them into a new string to $textNoSymbol
     $textNoSymbol = [];
     for ($i = 0; $i < $textSplitCnt; $i += 2) {
         $textNoSymbol[] =& $textSplit[$i];
     }
     $textNoSymbol = implode('', $textNoSymbol);
     $mbTextNoSymbol = new MbString($textNoSymbol, $this->encoding);
     $mbTextNoSymbol_len = $mbTextNoSymbol->strlen();
     // do conversion on the de-symboled text, i.e., $mbTextNoSymbol
     foreach ($convTable as $sr => &$repArr) {
         list($rep, $conditionRegex) = $repArr;
         $mbSr = new MbString($sr, $this->encoding);
         $mbRep = new MbString($rep, $this->encoding);
         $mbSr_len = $mbSr->strlen();
         $mbRep_len = $mbRep->strlen();
         // skip replacements which will cause different lengths
         if ($mbSr_len != $mbRep_len) {
             unset($mbSr, $mbRep);
             continue;
         }
         // start the replacement
         $seek = -1;
         while (true) {
             // find the position of the searched string
             $seek = $mbTextNoSymbol->strpos($sr, $seek + 1);
             if ($seek === false) {
                 break;
             }
             // check the $conditionRegex
             $textSlice = $mbTextNoSymbol->substr($seek > $context ? $seek - $context : 0, $mbSr_len + $context << 1);
             if (!empty($conditionRegex) && !preg_match("/{$conditionRegex}/u", $textSlice)) {
                 continue;
             }
             // replace frontward
             $seekFront = $seek;
             while ($seekFront > 0) {
                 --$seekFront;
                 // check $charToCheck is in $sr or not
                 $charToCheck = $mbTextNoSymbol[$seekFront];
                 $charToCheckPosInSr = $mbSr->strpos($charToCheck);
                 if ($charToCheckPosInSr === false) {
                     break;
                 }
                 // replace $charToCheck with the corresponding one
                 $repChar = $mbRep[$charToCheckPosInSr];
                 $mbTextNoSymbol->substr_replace_i($repChar, $seekFront, 1);
             }
             // replace backward
             $seekBack = $seek + $mbSr_len;
             while ($seekBack < $mbTextNoSymbol_len) {
                 // check $charToCheck is in $sr or not
                 $charToCheck = $mbTextNoSymbol[$seekBack];
                 $charToCheckPosInSr = $mbSr->strpos($charToCheck);
                 if ($charToCheckPosInSr === false) {
                     break;
                 }
                 // replace $charToCheck with the corresponding one
                 $repChar = $mbRep[$charToCheckPosInSr];
                 $mbTextNoSymbol->substr_replace_i($repChar, $seekBack, 1);
                 ++$seekBack;
             }
             // replace the center
             $mbTextNoSymbol->substr_replace_i($rep, $seek, $mbSr_len);
         }
         unset($mbSr, $mbRep);
     }
     // patch text parts in $textSplit by using $mbTextNoSymbol
     for ($i = $seek = 0; $i < $textSplitCnt; $i += 2) {
         $pieceLength = MbString::static_strlen($textSplit[$i], $this->encoding);
         $textSplit[$i] = $mbTextNoSymbol->substr($seek, $pieceLength);
         $seek += $pieceLength;
     }
     unset($mbTextNoSymbol);
     // re-construct $text by concatenating $textSplit
     $text = implode('', $textSplit);
 }