コード例 #1
0
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     if (ftell(STDIN) === 0) {
         $contents = '';
         while (!feof(STDIN)) {
             $contents .= fread(STDIN, 1024);
         }
         // filtered tokens
         $tokens = array_map([$this, 'filter'], (new GeneralTokenizer())->tokenize($contents));
         $tokens = array_unique($tokens);
         // stem the tokens
         $stemmer = new DictionaryStemmer(new PspellAdapter(), new SnowballStemmer());
         $stemmedTokens = array_map(function ($token) use($stemmer) {
             return $stemmer->stem($token);
         }, $tokens);
         // use a dictionary to catch all stemmed words that must be fixed or ignored in this data set
         $stemmedTokens = array_map('mb_strtolower', $stemmedTokens);
         $comparison = new LevenshteinComparison();
         for ($index = 0; $index < count($tokens); $index++) {
             // the stemmed word is not a word in the dictionary. The original token
             // will need to be manually mapped
             if (isset($stemmedTokens[$index]) && isset($tokens[$index]) && $comparison->distance($tokens[$index], $stemmedTokens[$index]) >= 4) {
                 echo "{$tokens[$index]},{$stemmedTokens[$index]}" . PHP_EOL;
             }
         }
     } else {
         throw new \RuntimeException("Please pipe in STDIN");
     }
 }
コード例 #2
0
 public function testHatCat()
 {
     $comparison = new LevenshteinComparison();
     $this->assertEquals(1, $comparison->distance('hat', 'cat'));
 }