function sterilizeTextField($s) { $str = new CString(); $s = htmlspecialchars(str_replace(";", "", $str->mergeWhiteSpace($str->stripHTML($s)))); return $s; }
function getIndexWordsFromText($text, $max_text_len_to_scan = 1024, $min_len_index_word = 1, $max_len_index_word = 50) { loadCoreFile('cstring.php'); $cstring = new CString(); $text = _ml_substr($text, 0, $max_text_len_to_scan); $text = $cstring->stripHTML($text); // strip punctuation $chars = array("\$", ".", "!", "?", "@", ",", "#", "%", "^", "&", "*", "(", ")", "_", "+", "=", "(", ")", "{", "}", "[", "]", "\\", "|", ";", ":", "\"", "<", ">", "/", "~", "-", "'"); $search = array(); $replace = array(); $search[] = "/([" . implode("\\", $chars) . "]+)/"; $replace[] = " "; $text = preg_replace($search, $replace, $text); $text = $cstring->mergeWhiteSpace($text); $words = explode(" ", $text); // one more, remove wrong-length words foreach ($words as $key => $word) { if (0 == _ml_strlen($word) || _ml_strlen($word) < $min_len_index_word || _ml_strlen($word) > $max_len_index_word) { unset($words[$key]); } } return $words; }