/** * Make sure that a wildcard string is allowed. * @param string $word -- the word to check for wildcard * @return bool */ private function verify_wildcard($word) { //not sure what this is for -- probably doesn't do anything since * doesn't have //an upper case. However the code I cribbed this from does it this way and it //doesn't hurt anything. $wordlower = strtolower($word); $options = vB::getDatastore()->get_value('options'); $minlength = $options['minsearchlength']; return vB_Api_Search::is_index_word($wordlower, true); return true; }
/** * breaks up the text into words * @param string $text * @return string */ protected static function break_words($text) { $text = strip_tags($text); $text = strip_bbcode($text, true, false, false, true); // multibyte $is_mb = preg_match('/[^\\x00-\\x7F]/', $text); if (!$is_mb) { $is_mb = strlen($text) != vB_String::vbStrlen($text); } if ($is_mb) { $text = vB_String::toUtf8($text, vB_String::getCharSet()); if (preg_match('/&#([0-9]+);|[^\\x00-\\x7F]/esiU', $text) and function_exists('mb_decode_numericentity')) { $text = mb_decode_numericentity($text, array(0x0, 0x2ffff, 0, 0xffff), 'UTF-8'); } $pattern = '/[\\s,.!?@#$%^&*\\(\\)\\/<>"\';:\\[\\]\\{\\}\\+|-]/'; } else { $pattern = '/[^a-z0-9_]+/i'; } $words = preg_split($pattern, vB_String::vBStrToLower($text), -1, PREG_SPLIT_NO_EMPTY); foreach ($words as $index => $word) { if (!vB_Api_Search::is_index_word($word, true)) { unset($words[$index]); continue; } //$words[$index] = $is_mb ? vB_String::toUtf8($word) : $word; if (empty($words[$index])) { unset($words[$index]); continue; } } return $words; }