Exemplo n.º 1
0
 /**
  * Split a string into a clean array of keywords
  * @param $text string
  * @param $allowWildcards boolean
  * @return array of keywords
  */
 function &filterKeywords($text, $allowWildcards = false)
 {
     $minLength = Config::getVar('search', 'min_word_length');
     $maxLength = Config::getVar('search', 'max_word_length');
     $stopwords =& SearchIndex::loadStopwords();
     // Remove punctuation
     if (is_array($text)) {
         $text = join("\n", $text);
     }
     $cleanText = String::regexp_replace('/[!"\\#\\$%\'\\(\\)\\.\\?@\\[\\]\\^`\\{\\}~]/', '', $text);
     $cleanText = String::regexp_replace('/[\\+,:;&\\/<=>\\|\\\\]/', ' ', $cleanText);
     $cleanText = String::regexp_replace('/[\\*]/', $allowWildcards ? '%' : ' ', $cleanText);
     $cleanText = String::strtolower($cleanText);
     // Split into words
     $words = String::regexp_split('/\\s+/', $cleanText);
     // FIXME Do not perform further filtering for some fields, e.g., author names?
     // Remove stopwords
     $keywords = array();
     foreach ($words as $k) {
         if (!isset($stopwords[$k]) && String::strlen($k) >= $minLength && !is_numeric($k)) {
             $keywords[] = String::substr($k, 0, $maxLength);
         }
     }
     return $keywords;
 }