/**
  * Remove stop words.
  *
  * @param  string $text The text which stop words to be removed
  * @return string The text after removal
  */
 public function remove($text)
 {
     $words = explode(' ', $text);
     foreach ($words as $i => $word) {
         if ($this->dictionary->contains($word)) {
             unset($words[$i]);
         }
     }
     return implode(' ', $words);
 }
Example #2
0
 /**
  * ECS Loop Pengembalian Akhiran
  */
 public function loopPengembalianAkhiran()
 {
     // restore prefix to form [DP+[DP+[DP]]] + Root word
     $this->restorePrefix();
     $removals = $this->removals;
     $reversedRemovals = array_reverse($removals);
     $currentWord = $this->getCurrentWord();
     foreach ($reversedRemovals as $removal) {
         if (!$this->isSuffixRemoval($removal)) {
             continue;
         }
         if ($removal->getRemovedPart() == 'kan') {
             $this->setCurrentWord($removal->getResult() . 'k');
             // step 4, 5
             $this->removePrefixes();
             if ($this->dictionary->contains($this->getCurrentWord())) {
                 return;
             }
             $this->setCurrentWord($removal->getResult() . 'kan');
         } else {
             $this->setCurrentWord($removal->getSubject());
         }
         // step 4, 5
         $this->removePrefixes();
         if ($this->dictionary->contains($this->getCurrentWord())) {
             return;
         }
         $this->removals = $removals;
         $this->setCurrentWord($currentWord);
     }
 }
Example #3
0
 /**
  * Stem a plural word to its common stem form.
  * Asian J. (2007) “Effective Techniques for Indonesian Text Retrieval” page 76-77.
  *
  * @param  string $plural the word to stem, e.g : bersama-sama
  * @return string common stem form, e.g : sama
  * @link   http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
  */
 protected function stemPluralWord($plural)
 {
     preg_match('/^(.*)-(.*)$/', $plural, $words);
     if (!isset($words[1]) || !isset($words[2])) {
         return $plural;
     }
     // malaikat-malaikat-nya -> malaikat malaikat-nya
     $suffix = $words[2];
     if (in_array($suffix, array('ku', 'mu', 'nya', 'lah', 'kah', 'tah', 'pun')) && preg_match('/^(.*)-(.*)$/', $words[1], $words)) {
         $words[2] .= '-' . $suffix;
     }
     // berbalas-balasan -> balas
     $rootWord1 = $this->stemSingularWord($words[1]);
     $rootWord2 = $this->stemSingularWord($words[2]);
     // meniru-nirukan -> tiru
     if (!$this->dictionary->contains($words[2]) && $rootWord2 === $words[2]) {
         $rootWord2 = $this->stemSingularWord('me' . $words[2]);
     }
     if ($rootWord1 == $rootWord2) {
         return $rootWord1;
     } else {
         return $plural;
     }
 }