public function testGetPrevNonWhitespace()
 {
     $this->assertSame(false, StringUtil::getPrevNonWhitespace(' '));
     $this->assertSame(26, StringUtil::getPrevNonWhitespace('natural language processing'));
     $this->assertEquals(17, StringUtil::getPrevNonWhitespace('segmentasi kalimat'));
     $this->assertEquals(11, StringUtil::getPrevNonWhitespace('Saya belajar segmentasi kalimat.', 12));
     // exclusive current position
     $this->assertEquals(4, StringUtil::getPrevNonWhitespace('bahasa', 5));
 }
 private function getToken($text, $position)
 {
     if ($position >= strlen($text) - 1) {
         return '';
     }
     $nextWs = StringUtil::getNextWhitespace($text, $position);
     $prevWs = StringUtil::getPrevWhitespace($text, $position);
     $tokenStart = $prevWs === false ? 0 : $prevWs + 1;
     $tokenEnd = ($nextWs === false ? strlen($text) : $nextWs) - 1;
     $token = substr($text, $tokenStart, $tokenEnd - $tokenStart);
     return $token;
 }
示例#3
0
 private function getToken($text, $position)
 {
     if ($position < strlen($text) - 1 && !StringUtil::isWhitespace(substr($text, $position + 1, 1))) {
         $nextWs = StringUtil::getNextWhitespace($text, $position);
         $prevWs = StringUtil::getPrevWhitespace($text, $position);
         $tokenStart = $prevWs === false ? 0 : $prevWs + 1;
         $tokenEnd = $nextWs === false ? strlen($text) : $nextWs;
         $token = substr($text, $tokenStart, $tokenEnd - $tokenStart);
         // strip trailing .
         if (!empty($token) && in_array($token[strlen($token) - 1], $this->eosChars)) {
             $token = substr($token, 0, strlen($token) - 1);
         }
         return $token;
     } else {
         return '';
     }
 }
 private function getLeftoverSpan($text, array $positions)
 {
     if ($positions[count($positions) - 1] != strlen($text) - 1) {
         $start = StringUtil::getNextNonWhitespace($text, $positions[count($positions) - 1]);
         $end = StringUtil::getPrevNonWhitespace($text);
         if ($start !== false && $end - $start > 0) {
             return new Span($start, $end + 1);
         }
     }
 }