Example #1
0
 /**
  * @dataProvider textByLanguageProvider
  */
 public function testByLanguage($languageCode, $text, $expected)
 {
     $stopwordAnalyzer = new StopwordAnalyzer();
     $stopwordAnalyzer->loadListByLanguage($languageCode);
     $sanitizer = new Sanitizer($text);
     $sanitizer->toLowercase();
     $string = $sanitizer->sanitizeBy($stopwordAnalyzer);
     $this->assertEquals($expected, $string);
 }
Example #2
0
 /**
  * @dataProvider stringProvider
  */
 public function testGetTokens($string, $flag, $expected)
 {
     $instance = new Sanitizer($string);
     $this->assertEquals($expected, $instance->getTokens($flag));
 }
Example #3
0
 public function testTrySanitizeByStopwordsWithProximityCheck()
 {
     $text = 'foo foo テスト テスト';
     $tokenizer = $this->getMockBuilder('\\Onoi\\Tesa\\Tokenizer\\Tokenizer')->disableOriginalConstructor()->getMockForAbstractClass();
     $tokenizer->expects($this->once())->method('isWordTokenizer')->will($this->returnValue(true));
     $tokenizer->expects($this->once())->method('tokenize')->with($this->equalTo($text))->will($this->returnValue(array('foo', 'foo', 'テスト', 'テスト')));
     $synonymizer = $this->getMockBuilder('\\Onoi\\Tesa\\Synonymizer\\Synonymizer')->disableOriginalConstructor()->getMockForAbstractClass();
     $synonymizer->expects($this->any())->method('synonymize')->will($this->returnArgument(0));
     $instance = new Sanitizer($text);
     $stopwordAnalyzer = $this->sanitizerFactory->newArrayStopwordAnalyzer();
     $this->assertEquals('foo テスト', $instance->sanitizeWith($tokenizer, $stopwordAnalyzer, $synonymizer));
 }