/** * Split a string into a clean array of keywords * @param $text string * @param $allowWildcards boolean * @return array of keywords */ function &filterKeywords($text, $allowWildcards = false) { $minLength = Config::getVar('search', 'min_word_length'); $maxLength = Config::getVar('search', 'max_word_length'); $stopwords =& SearchIndex::loadStopwords(); // Remove punctuation if (is_array($text)) { $text = join("\n", $text); } $cleanText = String::regexp_replace('/[!"\\#\\$%\'\\(\\)\\.\\?@\\[\\]\\^`\\{\\}~]/', '', $text); $cleanText = String::regexp_replace('/[\\+,:;&\\/<=>\\|\\\\]/', ' ', $cleanText); $cleanText = String::regexp_replace('/[\\*]/', $allowWildcards ? '%' : ' ', $cleanText); $cleanText = String::strtolower($cleanText); // Split into words $words = String::regexp_split('/\\s+/', $cleanText); // FIXME Do not perform further filtering for some fields, e.g., author names? // Remove stopwords $keywords = array(); foreach ($words as $k) { if (!isset($stopwords[$k]) && String::strlen($k) >= $minLength && !is_numeric($k)) { $keywords[] = String::substr($k, 0, $maxLength); } } return $keywords; }