/** * Helper function to do the splitting by sentence. Note: one punctuations * mark belongs to the preceding sentence. Whitespace between sentences is * marked as boundary. * */ private function parseSubjectBySentence() { $i = 0; $j = 0; $count = 0; $delimitersMatches = array(); preg_match_all('/' . self::REGEXP_SENTENCE_DELIMITERS . '/', $this->subject, $delimitersMatches); $splittedSentence = preg_split('/' . self::REGEXP_SENTENCE_DELIMITERS . '/', $this->subject); if (count($splittedSentence) == 1) { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($splittedSentence[0], 0, \TYPO3\FLOW3\Utility\Unicode\Functions::strlen($splittedSentence[0]), FALSE)); return; } foreach ($splittedSentence as $currentPart) { $currentPart = preg_replace('/^\\s|\\s$/', '', $currentPart, -1, $count); $whiteSpace = ''; for ($k = 0; $k < $count; $k++) { $whiteSpace .= ' '; } if ($whiteSpace != '') { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($whiteSpace, $i, $count, TRUE)); } $i += $count; if ($currentPart != '' && $j < count($delimitersMatches[0])) { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($currentPart . $delimitersMatches[0][$j], $i, \TYPO3\FLOW3\Utility\Unicode\Functions::strlen($currentPart . $delimitersMatches[0][$j]), FALSE)); $i += \TYPO3\FLOW3\Utility\Unicode\Functions::strlen($currentPart . $delimitersMatches[0][$j]); $j++; } elseif ($j < count($delimitersMatches[0])) { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($delimitersMatches[0][$j], $i, 1, TRUE)); $i++; $j++; } } }
/** * Checks if our version of strlen can handle some common special chars * * @test */ public function strlenWorksWithCertainSpecialChars() { $testString = 'here are some characters: äöüäöüßéèêååøøæ朜“” ...'; $this->assertEquals(50, \TYPO3\FLOW3\Utility\Unicode\Functions::strlen($testString), 'strlen() did not return the correct string length for unicode string.'); }