/** * @dataProvider textByLanguageProvider */ public function testByLanguage($languageCode, $text, $expected) { $stopwordAnalyzer = new StopwordAnalyzer(); $stopwordAnalyzer->loadListByLanguage($languageCode); $sanitizer = new Sanitizer($text); $sanitizer->toLowercase(); $string = $sanitizer->sanitizeBy($stopwordAnalyzer); $this->assertEquals($expected, $string); }
/** * @dataProvider stringProvider */ public function testGetTokens($string, $flag, $expected) { $instance = new Sanitizer($string); $this->assertEquals($expected, $instance->getTokens($flag)); }
public function testTrySanitizeByStopwordsWithProximityCheck() { $text = 'foo foo テスト テスト'; $tokenizer = $this->getMockBuilder('\\Onoi\\Tesa\\Tokenizer\\Tokenizer')->disableOriginalConstructor()->getMockForAbstractClass(); $tokenizer->expects($this->once())->method('isWordTokenizer')->will($this->returnValue(true)); $tokenizer->expects($this->once())->method('tokenize')->with($this->equalTo($text))->will($this->returnValue(array('foo', 'foo', 'テスト', 'テスト'))); $synonymizer = $this->getMockBuilder('\\Onoi\\Tesa\\Synonymizer\\Synonymizer')->disableOriginalConstructor()->getMockForAbstractClass(); $synonymizer->expects($this->any())->method('synonymize')->will($this->returnArgument(0)); $instance = new Sanitizer($text); $stopwordAnalyzer = $this->sanitizerFactory->newArrayStopwordAnalyzer(); $this->assertEquals('foo テスト', $instance->sanitizeWith($tokenizer, $stopwordAnalyzer, $synonymizer)); }