protected function execute(InputInterface $input, OutputInterface $output)
 {
     if (ftell(STDIN) === 0) {
         $contents = '';
         while (!feof(STDIN)) {
             $contents .= fread(STDIN, 1024);
         }
         // filtered tokens
         $tokens = array_map([$this, 'filter'], (new GeneralTokenizer())->tokenize($contents));
         $tokens = array_unique($tokens);
         // stem the tokens
         $stemmer = new DictionaryStemmer(new PspellAdapter(), new SnowballStemmer());
         $stemmedTokens = array_map(function ($token) use($stemmer) {
             return $stemmer->stem($token);
         }, $tokens);
         // use a dictionary to catch all stemmed words that must be fixed or ignored in this data set
         $stemmedTokens = array_map('mb_strtolower', $stemmedTokens);
         $comparison = new LevenshteinComparison();
         for ($index = 0; $index < count($tokens); $index++) {
             // the stemmed word is not a word in the dictionary. The original token
             // will need to be manually mapped
             if (isset($stemmedTokens[$index]) && isset($tokens[$index]) && $comparison->distance($tokens[$index], $stemmedTokens[$index]) >= 4) {
                 echo "{$tokens[$index]},{$stemmedTokens[$index]}" . PHP_EOL;
             }
         }
     } else {
         throw new \RuntimeException("Please pipe in STDIN");
     }
 }
 public function testPspell()
 {
     if (getenv('SKIP_TEST')) {
         return;
     }
     $stemmer = new DictionaryStemmer(new PspellAdapter(), new SnowballStemmer());
     $this->assertEquals("judge", $stemmer->stem("judges"));
     // some times approach does not work
     $this->assertNotEquals('university', $stemmer->stem("university"));
     $this->assertEquals('hammock', $stemmer->stem("hammok"));
 }