function relevanssi_tokenize($str, $remove_stops = true, $min_word_length = -1) { $tokens = array(); if (is_array($str)) { foreach ($str as $part) { $tokens = array_merge($tokens, relevanssi_tokenize($part, $remove_stops, $min_word_length)); } } if (is_array($str)) { return $tokens; } if (function_exists('mb_internal_encoding')) { mb_internal_encoding("UTF-8"); } if ($remove_stops) { $stopword_list = relevanssi_fetch_stopwords(); } if (function_exists('relevanssi_thousandsep')) { $str = relevanssi_thousandsep($str); } $str = apply_filters('relevanssi_remove_punctuation', $str); if (function_exists('mb_strtolower')) { $str = mb_strtolower($str); } else { $str = strtolower($str); } $t = strtok($str, "\n\t "); while ($t !== false) { $t = strval($t); $accept = true; if (strlen($t) < $min_word_length) { $t = strtok("\n\t "); continue; } if ($remove_stops == false) { $accept = true; } else { if (count($stopword_list) > 0) { //added by OdditY -> got warning when stopwords table was empty if (in_array($t, $stopword_list)) { $accept = false; } } } if (RELEVANSSI_PREMIUM) { $t = apply_filters('relevanssi_premium_tokenizer', $t); } if ($accept) { $t = relevanssi_mb_trim($t); if (is_numeric($t)) { $t = " {$t}"; } // $t ends up as an array index, and numbers just don't work there if (!isset($tokens[$t])) { $tokens[$t] = 1; } else { $tokens[$t]++; } } $t = strtok("\n\t "); } return $tokens; }
function relevanssi_tokenize($str, $remove_stops = true) { $tokens = array(); if (is_array($str)) { foreach ($str as $part) { $tokens = array_merge($tokens, relevanssi_tokenize($part, $remove_stops)); } } if (is_array($str)) { return $tokens; } if (function_exists('mb_internal_encoding')) { mb_internal_encoding("UTF-8"); } if ($remove_stops) { $stopword_list = relevanssi_fetch_stopwords(); } $str = mb_strtolower(apply_filters('relevanssi_remove_punctuation', $str)); $tokens = array(); $t = strtok($str, "\n\t "); while ($t !== false) { $accept = true; if ($remove_stops == false) { $accept = true; } else { if (count($stopword_list) > 0) { //added by OdditY -> got warning when stopwords table was empty if (in_array($t, $stopword_list)) { $accept = false; } } } if ($accept) { $t = relevanssi_mb_trim($t); if (!isset($tokens[$t])) { $tokens[$t] = 1; } else { $tokens[$t]++; } } $t = strtok("\n\t "); } return $tokens; }