/** * Helper function to do the splitting by sentence. Note: one punctuations * mark belongs to the preceding sentence. Whitespace between sentences is * marked as boundary. * */ private function parseSubjectBySentence() { $i = 0; $j = 0; $count = 0; $delimitersMatches = array(); preg_match_all('/' . self::REGEXP_SENTENCE_DELIMITERS . '/', $this->subject, $delimitersMatches); $splittedSentence = preg_split('/' . self::REGEXP_SENTENCE_DELIMITERS . '/', $this->subject); if (count($splittedSentence) == 1) { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($splittedSentence[0], 0, \TYPO3\FLOW3\Utility\Unicode\Functions::strlen($splittedSentence[0]), FALSE)); return; } foreach ($splittedSentence as $currentPart) { $currentPart = preg_replace('/^\\s|\\s$/', '', $currentPart, -1, $count); $whiteSpace = ''; for ($k = 0; $k < $count; $k++) { $whiteSpace .= ' '; } if ($whiteSpace != '') { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($whiteSpace, $i, $count, TRUE)); } $i += $count; if ($currentPart != '' && $j < count($delimitersMatches[0])) { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($currentPart . $delimitersMatches[0][$j], $i, \TYPO3\FLOW3\Utility\Unicode\Functions::strlen($currentPart . $delimitersMatches[0][$j]), FALSE)); $i += \TYPO3\FLOW3\Utility\Unicode\Functions::strlen($currentPart . $delimitersMatches[0][$j]); $j++; } elseif ($j < count($delimitersMatches[0])) { $this->iteratorCache->append(new \TYPO3\FLOW3\Utility\Unicode\TextIteratorElement($delimitersMatches[0][$j], $i, 1, TRUE)); $i++; $j++; } } }
/** * Checks if our version of strpos can handle some common special characters * * @test */ public function strposWorksWithCertainSpecialChars() { $testString = 'Åeugiat tincidunt duo id, 23 quam delenit vocibus nam eu'; $this->assertEquals(8, \TYPO3\FLOW3\Utility\Unicode\Functions::strpos($testString, 'tincidunt'), 'strpos() did not return the correct positions for a unicode string.'); }