/** * {@inheritdoc} */ public function normalize(array $tokens) { foreach ($tokens as $k => $token) { $tokens[$k] = stemword($token, $this->lang, $this->charset); } return $tokens; }
public function filter_keywords(array $keywords) { $keywords = parent::filter_keywords($keywords); $tagger = __NAMESPACE__ . '\\POS\\' . ucfirst($this->lang) . '\\Tagger'; if (class_exists($tagger)) { $keywords = $tagger::get($keywords); } $keywords = array_filter($keywords, function ($word) { $word = mb_strtolower($word); $normalized_word = stemword($word, 'french', 'UTF_8'); return empty($this->stopword[$word]) && empty($this->common_words[$normalized_word]) && stristr($word, 'www') === false && stristr($word, '@') === false && stristr($word, '.com') === false && stristr($word, '.fr') === false; }); return $keywords; }
private function getPhpStemmerPseudoRoot($word) { $word = mb_strtolower($word, 'utf-8'); return stemword($word, $this->languageDetect($word), 'UTF_8'); }