Ejemplo n.º 1
0
 /**
  * {@inheritdoc}
  */
 protected function call(Lexer $lexer)
 {
     while (in_array($lexer->peek(), self::CHARS, true)) {
         $lexer->next();
     }
     $lexer->emit(new WhitespaceToken());
     return new TextState();
 }
Ejemplo n.º 2
0
 public function testAbbreviations()
 {
     $text = 'Hello Mr. Jones, please turn on the T.V.';
     $expected = ['T_CAPITALIZED_WORD<"Hello">', 'T_CAPITALIZED_WORD<"Mr">', 'T_PERIOD<".">', 'T_CAPITALIZED_WORD<"Jones,">', 'T_WORD<"please">', 'T_WORD<"turn">', 'T_WORD<"on">', 'T_WORD<"the">', 'T_CAPITALIZED_WORD<"T">', 'T_PERIOD<".">', 'T_CAPITALIZED_WORD<"V">', 'T_PERIOD<".">', 'T_EOF'];
     $lexer = new Lexer();
     $tokens = $lexer->run($text);
     $actual = $this->getTokensString($tokens);
     $this->assertEquals($expected, $actual);
 }
Ejemplo n.º 3
0
 /**
  * {@inheritdoc}
  */
 protected function call(Lexer $lexer)
 {
     while (true) {
         $peek = $lexer->peek();
         //file_put_contents(__DIR__ . '/foo.log', '#' . $lexer->pos() . ' ' . $peek . ' (' . $lexer->getTokenValue() . ')' . PHP_EOL, FILE_APPEND);
         if ($peek === null) {
             $lexer->emit(new EOFToken());
             return;
         }
         if ('.' === $peek) {
             $lexer->next();
             $lexer->emit(new PeriodToken());
             continue;
         }
         if ('?' === $peek) {
             $lexer->next();
             $lexer->emit(new QuestionMarkToken());
             continue;
         }
         if ('!' === $peek) {
             $lexer->next();
             $lexer->emit(new ExclamationPointToken());
             continue;
         }
         if (in_array($peek, QuotedStringState::CHARS, true)) {
             return new QuotedStringState();
         }
         if (in_array($peek, WhitespaceState::CHARS, true)) {
             return new WhitespaceState();
         }
         return new WordState();
     }
 }
 /**
  * {@inheritdoc}
  */
 protected function call(Lexer $lexer)
 {
     $start = $lexer->next();
     while (true) {
         $next = $lexer->next();
         if ($next === null) {
             throw new StateException('Failed to find end of quote. Reached end of input. Read: ' . $lexer->getTokenValue());
         }
         if ($start === $next) {
             break;
         }
     }
     $lexer->emit(new QuotedStringToken());
     return new TextState();
 }
Ejemplo n.º 5
0
 /**
  * @param string $text
  *
  * @return string[]
  */
 public function split($text)
 {
     $this->probabilityCalculator->setAbbreviations($this->getAbbreviations());
     $tokens = $this->lexer->run($text);
     $probabilities = $this->probabilityCalculator->calculate($tokens);
     $sentences = $this->sentenceBuilder->build($probabilities);
     return $sentences;
 }
 /**
  * @param string $input
  * @param array  $expectedResult
  * @param array  $abbreviations
  */
 private function runCalculateTest($input, array $expectedResult, array $abbreviations)
 {
     $lexer = new Lexer();
     $tokens = $lexer->run($input);
     $rules = IniConfiguration::loadFile(__DIR__ . '/../../rules/rules.ini')->getRules();
     $calc = new ProbabilityCalculator($rules);
     $calc->setAbbreviations(new Abbreviations($abbreviations));
     $probabilities = $calc->calculate($tokens);
     $actual = [];
     foreach ($probabilities as $probability) {
         $token = $probability->getToken();
         if ($token instanceof WordToken || $token instanceof CapitalizedWordToken || $token instanceof WhitespaceToken || $token instanceof EOFToken) {
             continue;
         }
         $actual[] = $token->getName() . ' ' . $probability->getProbability();
     }
     $this->assertEquals($expectedResult, $actual);
 }
Ejemplo n.º 7
0
 /**
  * {@inheritdoc}
  */
 protected function call(Lexer $lexer)
 {
     $nonWordChars = $this->getNonWordChars();
     while (!in_array($lexer->peek(), $nonWordChars, true)) {
         $lexer->next();
     }
     $value = $lexer->getTokenValue();
     $firstChar = substr($value, 0, 1);
     if (ctype_upper($firstChar)) {
         $lexer->emit(new CapitalizedWordToken());
     } else {
         $lexer->emit(new WordToken());
     }
     return new TextState();
 }