/**
  * @param string $text
  *
  * @return string[]
  */
 public function split($text)
 {
     $this->probabilityCalculator->setAbbreviations($this->getAbbreviations());
     $tokens = $this->lexer->run($text);
     $probabilities = $this->probabilityCalculator->calculate($tokens);
     $sentences = $this->sentenceBuilder->build($probabilities);
     return $sentences;
 }
Example #2
0
 public function testAbbreviations()
 {
     $text = 'Hello Mr. Jones, please turn on the T.V.';
     $expected = ['T_CAPITALIZED_WORD<"Hello">', 'T_CAPITALIZED_WORD<"Mr">', 'T_PERIOD<".">', 'T_CAPITALIZED_WORD<"Jones,">', 'T_WORD<"please">', 'T_WORD<"turn">', 'T_WORD<"on">', 'T_WORD<"the">', 'T_CAPITALIZED_WORD<"T">', 'T_PERIOD<".">', 'T_CAPITALIZED_WORD<"V">', 'T_PERIOD<".">', 'T_EOF'];
     $lexer = new Lexer();
     $tokens = $lexer->run($text);
     $actual = $this->getTokensString($tokens);
     $this->assertEquals($expected, $actual);
 }
 /**
  * @param string $input
  * @param array  $expectedResult
  * @param array  $abbreviations
  */
 private function runCalculateTest($input, array $expectedResult, array $abbreviations)
 {
     $lexer = new Lexer();
     $tokens = $lexer->run($input);
     $rules = IniConfiguration::loadFile(__DIR__ . '/../../rules/rules.ini')->getRules();
     $calc = new ProbabilityCalculator($rules);
     $calc->setAbbreviations(new Abbreviations($abbreviations));
     $probabilities = $calc->calculate($tokens);
     $actual = [];
     foreach ($probabilities as $probability) {
         $token = $probability->getToken();
         if ($token instanceof WordToken || $token instanceof CapitalizedWordToken || $token instanceof WhitespaceToken || $token instanceof EOFToken) {
             continue;
         }
         $actual[] = $token->getName() . ' ' . $probability->getProbability();
     }
     $this->assertEquals($expectedResult, $actual);
 }