public static function extrairPalavrasChaves($string = NULL, $max_count = 10) { #echo PHP_EOL . 'ORIGINAL ========================================' . PHP_EOL; #echo $string; $string = Uteis::cleanTextArea($string, true); #echo PHP_EOL . PHP_EOL . 'CLEANTEXTAREA ========================================' . PHP_EOL . PHP_EOL . PHP_EOL; #echo $string; if (Uteis::vazio($string)) { return ''; } $stop_words = file(Kernel::get_conf('sys_path') . DIRECTORY_SEPARATOR . '_data' . DIRECTORY_SEPARATOR . 'stopWords_pt-BR3.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); #echo PHP_EOL . PHP_EOL . 'STOPWORDS ========================================' . PHP_EOL . PHP_EOL . PHP_EOL; #var_dump($stop_words); //$string = utf8_decode($string); $match_words = array(); preg_match_all('/\\b.*?\\b/ium', $string, $match_words); $match_words = $match_words[0]; #echo PHP_EOL . PHP_EOL . 'WORDS ========================================' . PHP_EOL . PHP_EOL . PHP_EOL; #var_dump($match_words); foreach ($match_words as $key => $item) { if ($item == '' || in_array(mb_strtolower($item), $stop_words) || mb_strlen($item) < 2) { unset($match_words[$key]); } else { $match_words[$key] = mb_strtolower($item); } } #echo PHP_EOL . PHP_EOL . 'WORDS SEM STOPWORDS ========================================' . PHP_EOL . PHP_EOL . PHP_EOL; #var_dump($match_words); //$word_count = str_word_count( utf8_decode(implode(" ", $match_words)) , 1, '0123456789'); $word_count = preg_split('/\\W+/iu', implode(" ", $match_words), -1, PREG_SPLIT_NO_EMPTY); #echo PHP_EOL . PHP_EOL . 'WORD COUNT ========================================' . PHP_EOL . PHP_EOL . PHP_EOL; #var_dump($word_count); $frequency = array_count_values($word_count); arsort($frequency); $keywords = array_slice($frequency, 0, $max_count, 1); $keywords = join(',', array_keys($keywords)); #echo PHP_EOL . PHP_EOL . 'KEY WORDS ========================================' . PHP_EOL . PHP_EOL . PHP_EOL; #var_dump($keywords); return $keywords; }