public function testGetPrevWhitespace() { $this->assertSame(false, StringUtil::getPrevWhitespace('Saya')); $this->assertSame(0, StringUtil::getPrevWhitespace(' ')); $this->assertEquals(10, StringUtil::getPrevWhitespace('segmentasi kalimat')); $this->assertEquals(12, StringUtil::getPrevWhitespace('Saya belajar segmentasi kalimat.', 15)); // exclusive current position $this->assertEquals(4, StringUtil::getPrevWhitespace('Saya belajar segmentasi kalimat.', 12)); }
private function findToken($text, $position) { if ($position < strlen($text) - 1) { $nextWs = StringUtil::getNextWhitespace($text, $position); $prevWs = StringUtil::getPrevWhitespace($text, $position); $tokenStart = $prevWs === false ? 0 : $prevWs + 1; $tokenEnd = $nextWs === false ? strlen($text) : $nextWs; $token = substr($text, $tokenStart, $tokenEnd - $tokenStart); $span = new Span($tokenStart, $tokenEnd); return $span; } }
public static function findToken($text, $position) { if ($position < strlen($text) - 1 && !StringUtil::isWhitespace(substr($text, $position + 1, 1))) { $nextWs = StringUtil::getNextWhitespace($text, $position); $prevWs = StringUtil::getPrevWhitespace($text, $position); $tokenStart = $prevWs === false ? 0 : $prevWs + 1; $tokenEnd = $nextWs === false ? strlen($text) : $nextWs; $token = substr($text, $tokenStart, $tokenEnd - $tokenStart); // strip trailing . if (!empty($token) && in_array($token[strlen($token) - 1], self::$standardEosChars)) { $token = substr($token, 0, strlen($token) - 1); $span = new Span($tokenStart, $tokenEnd - 1); } else { $span = new Span($tokenStart, $tokenEnd); } return $span; } }