applyTransformation() 공개 메소드

Apply the transformation
public applyTransformation ( TextAnalysis\Interfaces\ITokenTransformation $transformer, $removeNulls = true ) : TokensDocument
$transformer TextAnalysis\Interfaces\ITokenTransformation
리턴 TokensDocument
 public function testGetCollocationsByPmi()
 {
     $testData = (new SpacePunctuationFilter())->transform(self::$text);
     $tokens = (new GeneralTokenizer(" \n\t\r"))->tokenize($testData);
     $tokenDoc = new TokensDocument($tokens);
     $tokenDoc->applyTransformation(new LowerCaseFilter())->applyTransformation(new PunctuationFilter([]), false)->applyTransformation(new StopWordsFilter([]))->applyTransformation(new QuotesFilter())->applyTransformation(new CharFilter());
     $finder = new CollocationFinder($tokenDoc->toArray(), 2);
     $this->assertArrayHasKey('outlying cottages', $finder->getCollocationsByPmi());
 }
예제 #2
0
 public function testRake()
 {
     $stopwords = array_map('trim', file(VENDOR_DIR . 'yooper/stop-words/data/stop-words_english_1_en.txt'));
     // all punctuation must be moved 1 over. Fixes issues with sentences
     $testData = (new SpacePunctuationFilter([':', '\\/']))->transform($this->getTestData());
     //rake MUST be split on whitespace and new lines only
     $tokens = (new GeneralTokenizer(" \n\t\r"))->tokenize($testData);
     $tokenDoc = new TokensDocument($tokens);
     $tokenDoc->applyTransformation(new LowerCaseFilter())->applyTransformation(new StopWordsFilter($stopwords), false)->applyTransformation(new PunctuationFilter(['@', ':', '\\/']), false)->applyTransformation(new CharFilter(), false);
     $rake = new Rake($tokenDoc, 3);
     $results = $rake->getKeywordScores();
     $this->assertArrayHasKey('minimal generating sets', $results);
     $this->assertArrayHasKey('8/8/2016 5:51 pm', $results);
 }
 /**
  * Returns an array of stop words and their frequencies
  * @return string[]
  */
 public function getStopwords()
 {
     if (!empty($this->stopWords)) {
         return $this->stopWords;
     }
     foreach ($this->getFilePaths() as $filePath) {
         $content = $this->getFileContent($filePath);
         $doc = new TokensDocument((new GeneralTokenizer())->tokenize($content));
         $doc->applyTransformation(new LowerCaseFilter())->applyTransformation(new PossessiveNounFilter())->applyTransformation(new PunctuationFilter())->applyTransformation(new CharFilter());
         if ($this->mode === self::MODE_FREQ) {
             $this->computeUsingFreqDist($doc->getDocumentData());
         }
     }
     arsort($this->stopWords);
     return $this->stopWords;
 }