/** * Remove stop words. * * @param string $text The text which stop words to be removed * @return string The text after removal */ public function remove($text) { $words = explode(' ', $text); foreach ($words as $i => $word) { if ($this->dictionary->contains($word)) { unset($words[$i]); } } return implode(' ', $words); }
/** * ECS Loop Pengembalian Akhiran */ public function loopPengembalianAkhiran() { // restore prefix to form [DP+[DP+[DP]]] + Root word $this->restorePrefix(); $removals = $this->removals; $reversedRemovals = array_reverse($removals); $currentWord = $this->getCurrentWord(); foreach ($reversedRemovals as $removal) { if (!$this->isSuffixRemoval($removal)) { continue; } if ($removal->getRemovedPart() == 'kan') { $this->setCurrentWord($removal->getResult() . 'k'); // step 4, 5 $this->removePrefixes(); if ($this->dictionary->contains($this->getCurrentWord())) { return; } $this->setCurrentWord($removal->getResult() . 'kan'); } else { $this->setCurrentWord($removal->getSubject()); } // step 4, 5 $this->removePrefixes(); if ($this->dictionary->contains($this->getCurrentWord())) { return; } $this->removals = $removals; $this->setCurrentWord($currentWord); } }
/** * Stem a plural word to its common stem form. * Asian J. (2007) “Effective Techniques for Indonesian Text Retrieval” page 76-77. * * @param string $plural the word to stem, e.g : bersama-sama * @return string common stem form, e.g : sama * @link http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf */ protected function stemPluralWord($plural) { preg_match('/^(.*)-(.*)$/', $plural, $words); if (!isset($words[1]) || !isset($words[2])) { return $plural; } // malaikat-malaikat-nya -> malaikat malaikat-nya $suffix = $words[2]; if (in_array($suffix, array('ku', 'mu', 'nya', 'lah', 'kah', 'tah', 'pun')) && preg_match('/^(.*)-(.*)$/', $words[1], $words)) { $words[2] .= '-' . $suffix; } // berbalas-balasan -> balas $rootWord1 = $this->stemSingularWord($words[1]); $rootWord2 = $this->stemSingularWord($words[2]); // meniru-nirukan -> tiru if (!$this->dictionary->contains($words[2]) && $rootWord2 === $words[2]) { $rootWord2 = $this->stemSingularWord('me' . $words[2]); } if ($rootWord1 == $rootWord2) { return $rootWord1; } else { return $plural; } }