Esempio n. 1
0
 /**
  * Parse the user's query and transform it into an SQL fragment which will
  * become part of a WHERE clause
  *
  * @return string
  */
 function parseQuery($filteredText, $fulltext)
 {
     global $wgContLang;
     $lc = SearchEngine::legalSearchChars();
     // Minus format chars
     $searchon = '';
     $this->searchTerms = array();
     $m = array();
     if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\\*?)|"[^"]*")/', $filteredText, $m, PREG_SET_ORDER)) {
         foreach ($m as $bits) {
             @(list(, $modifier, $term, $nonQuoted, $wildcard) = $bits);
             if ($nonQuoted != '') {
                 $term = $nonQuoted;
                 $quote = '';
             } else {
                 $term = str_replace('"', '', $term);
                 $quote = '"';
             }
             if ($searchon !== '') {
                 $searchon .= ' ';
             }
             // Some languages such as Serbian store the input form in the search index,
             // so we may need to search for matches in multiple writing system variants.
             $convertedVariants = $wgContLang->autoConvertToAllVariants($term);
             if (is_array($convertedVariants)) {
                 $variants = array_unique(array_values($convertedVariants));
             } else {
                 $variants = array($term);
             }
             // The low-level search index does some processing on input to work
             // around problems with minimum lengths and encoding in MySQL's
             // fulltext engine.
             // For Chinese this also inserts spaces between adjacent Han characters.
             $strippedVariants = array_map(array($wgContLang, 'normalizeForSearch'), $variants);
             // Some languages such as Chinese force all variants to a canonical
             // form when stripping to the low-level search index, so to be sure
             // let's check our variants list for unique items after stripping.
             $strippedVariants = array_unique($strippedVariants);
             $searchon .= $modifier;
             if (count($strippedVariants) > 1) {
                 $searchon .= '(';
             }
             foreach ($strippedVariants as $stripped) {
                 if ($nonQuoted && strpos($stripped, ' ') !== false) {
                     // Hack for Chinese: we need to toss in quotes for
                     // multiple-character phrases since normalizeForSearch()
                     // added spaces between them to make word breaks.
                     $stripped = '"' . trim($stripped) . '"';
                 }
                 $searchon .= "{$quote}{$stripped}{$quote}{$wildcard} ";
             }
             if (count($strippedVariants) > 1) {
                 $searchon .= ')';
             }
             // Match individual terms or quoted phrase in result highlighting...
             // Note that variants will be introduced in a later stage for highlighting!
             $regexp = $this->regexTerm($term, $wildcard);
             $this->searchTerms[] = $regexp;
         }
     } else {
         wfDebug(__METHOD__ . ": Can't understand search query '{$filteredText}'\n");
     }
     $searchon = $this->db->strencode($searchon);
     $field = $this->getIndexField($fulltext);
     return " {$field} MATCH '{$searchon}' ";
 }