PHP TextAnalysis\Documents TokensDocumentの例

プログラミング言語: PHP

名前空間/パッケージ名: TextAnalysis\Documents

クラス/型: TokensDocument

hotexamples.comのコード掲載数: 7

PHP TextAnalysis\Documents TokensDocument - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPHPのTextAnalysis\Documents\TokensDocumentの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

getDocumentData(4)

applyTransformation(3)

getId(2)

toArray(1)

Author: yooper (yooper)

Inheritance: extends TextAnalysis\Documents\DocumentAbstract

TokensDocument Class Documentation

コード例 #1

ファイルを表示

ファイル: CollocationFinderTest.php プロジェクト: yooper/php-text-analysis

 public function testGetCollocationsByPmi()
 {
     $testData = (new SpacePunctuationFilter())->transform(self::$text);
     $tokens = (new GeneralTokenizer(" \n\t\r"))->tokenize($testData);
     $tokenDoc = new TokensDocument($tokens);
     $tokenDoc->applyTransformation(new LowerCaseFilter())->applyTransformation(new PunctuationFilter([]), false)->applyTransformation(new StopWordsFilter([]))->applyTransformation(new QuotesFilter())->applyTransformation(new CharFilter());
     $finder = new CollocationFinder($tokenDoc->toArray(), 2);
     $this->assertArrayHasKey('outlying cottages', $finder->getCollocationsByPmi());
 }

コード例 #2

ファイルを表示

ファイル: InvertedIndex.php プロジェクト: yooper/php-text-analysis

 /**
  * Add a document
  * @param TokensDocument $document
  * @return void
  */
 public function addDocument(TokensDocument $document)
 {
     foreach ($document->getDocumentData() as $term) {
         if (isset($this->index[$term])) {
             $this->index[$term][self::FREQ]++;
             $this->index[$term][self::POSTINGS][] = $document->getId();
         } else {
             $this->index[$term] = [self::FREQ => 1, self::POSTINGS => [$document->getId()]];
         }
     }
 }

コード例 #3

ファイルを表示

ファイル: RakeTest.php プロジェクト: yooper/php-text-analysis

 public function testRake()
 {
     $stopwords = array_map('trim', file(VENDOR_DIR . 'yooper/stop-words/data/stop-words_english_1_en.txt'));
     // all punctuation must be moved 1 over. Fixes issues with sentences
     $testData = (new SpacePunctuationFilter([':', '\\/']))->transform($this->getTestData());
     //rake MUST be split on whitespace and new lines only
     $tokens = (new GeneralTokenizer(" \n\t\r"))->tokenize($testData);
     $tokenDoc = new TokensDocument($tokens);
     $tokenDoc->applyTransformation(new LowerCaseFilter())->applyTransformation(new StopWordsFilter($stopwords), false)->applyTransformation(new PunctuationFilter(['@', ':', '\\/']), false)->applyTransformation(new CharFilter(), false);
     $rake = new Rake($tokenDoc, 3);
     $results = $rake->getKeywordScores();
     $this->assertArrayHasKey('minimal generating sets', $results);
     $this->assertArrayHasKey('8/8/2016 5:51 pm', $results);
 }

コード例 #4

ファイルを表示

ファイル: StanfordPosTaggerTest.php プロジェクト: yooper/php-text-analysis

 public function testStanfordPos()
 {
     if (getenv('SKIP_TEST') || !getenv('JAVA_HOME')) {
         return;
     }
     $document = new TokensDocument((new WhitespaceTokenizer())->tokenize($this->text));
     $jarPath = get_storage_path('corpora/stanford_pos_tagger') . 'stanford-postagger-3.6.0.jar';
     $modelPath = get_storage_path('corpora/stanford_pos_tagger' . DIRECTORY_SEPARATOR . "models") . "english-left3words-distsim.tagger";
     $tagger = new StanfordPosTagger($jarPath, $modelPath);
     $output = $tagger->tag($document->getDocumentData());
     $this->assertFileExists($tagger->getTmpFilePath());
     $this->assertEquals(138, filesize($tagger->getTmpFilePath()));
     $this->assertEquals(['Michigan', 'NNP'], $output[15], "Did you set JAVA_HOME env variable?");
 }

コード例 #5

ファイルを表示

ファイル: StanfordNerTaggerTest.php プロジェクト: yooper/php-text-analysis

 public function testStanfordNer()
 {
     if (getenv('SKIP_TEST') || !getenv('JAVA_HOME')) {
         return;
     }
     $document = new TokensDocument((new WhitespaceTokenizer())->tokenize($this->text));
     $jarPath = get_storage_path('ner') . 'stanford-ner.jar';
     $classiferPath = get_storage_path('ner' . DIRECTORY_SEPARATOR . "classifiers") . "english.all.3class.distsim.crf.ser.gz";
     $tagger = new StanfordNerTagger($jarPath, $classiferPath);
     $output = $tagger->tag($document->getDocumentData());
     $this->assertFileExists($tagger->getTmpFilePath());
     $this->assertEquals(138, filesize($tagger->getTmpFilePath()));
     $this->assertEquals(['Michigan', 'LOCATION'], $output[15], "Did you set JAVA_HOME env variable?");
 }

コード例 #6

ファイルを表示

ファイル: StopwordGenerator.php プロジェクト: yooper/php-text-analysis

 /**
  * Returns an array of stop words and their frequencies
  * @return string[]
  */
 public function getStopwords()
 {
     if (!empty($this->stopWords)) {
         return $this->stopWords;
     }
     foreach ($this->getFilePaths() as $filePath) {
         $content = $this->getFileContent($filePath);
         $doc = new TokensDocument((new GeneralTokenizer())->tokenize($content));
         $doc->applyTransformation(new LowerCaseFilter())->applyTransformation(new PossessiveNounFilter())->applyTransformation(new PunctuationFilter())->applyTransformation(new CharFilter());
         if ($this->mode === self::MODE_FREQ) {
             $this->computeUsingFreqDist($doc->getDocumentData());
         }
     }
     arsort($this->stopWords);
     return $this->stopWords;
 }

コード例 #7

ファイルを表示

ファイル: InvertedIndexEngine.php プロジェクト: yooper/php-text-analysis

 /**
  * Removes the document from the index
  * @param TokensDocument $document
  */
 public function removeDocument(TokensDocument $document)
 {
     $this->isDirty = true;
     $this->getInvertedIndex()->removeDocument($document->getId());
     unset($this->metadata[$document->getId()]);
 }