public function testGetPrevNonWhitespace() { $this->assertSame(false, StringUtil::getPrevNonWhitespace(' ')); $this->assertSame(26, StringUtil::getPrevNonWhitespace('natural language processing')); $this->assertEquals(17, StringUtil::getPrevNonWhitespace('segmentasi kalimat')); $this->assertEquals(11, StringUtil::getPrevNonWhitespace('Saya belajar segmentasi kalimat.', 12)); // exclusive current position $this->assertEquals(4, StringUtil::getPrevNonWhitespace('bahasa', 5)); }
private function getToken($text, $position) { if ($position >= strlen($text) - 1) { return ''; } $nextWs = StringUtil::getNextWhitespace($text, $position); $prevWs = StringUtil::getPrevWhitespace($text, $position); $tokenStart = $prevWs === false ? 0 : $prevWs + 1; $tokenEnd = ($nextWs === false ? strlen($text) : $nextWs) - 1; $token = substr($text, $tokenStart, $tokenEnd - $tokenStart); return $token; }
private function getToken($text, $position) { if ($position < strlen($text) - 1 && !StringUtil::isWhitespace(substr($text, $position + 1, 1))) { $nextWs = StringUtil::getNextWhitespace($text, $position); $prevWs = StringUtil::getPrevWhitespace($text, $position); $tokenStart = $prevWs === false ? 0 : $prevWs + 1; $tokenEnd = $nextWs === false ? strlen($text) : $nextWs; $token = substr($text, $tokenStart, $tokenEnd - $tokenStart); // strip trailing . if (!empty($token) && in_array($token[strlen($token) - 1], $this->eosChars)) { $token = substr($token, 0, strlen($token) - 1); } return $token; } else { return ''; } }
private function getLeftoverSpan($text, array $positions) { if ($positions[count($positions) - 1] != strlen($text) - 1) { $start = StringUtil::getNextNonWhitespace($text, $positions[count($positions) - 1]); $end = StringUtil::getPrevNonWhitespace($text); if ($start !== false && $end - $start > 0) { return new Span($start, $end + 1); } } }