/** * Parse the user's query and transform it into an SQL fragment which will * become part of a WHERE clause * * @return string */ function parseQuery($filteredText, $fulltext) { global $wgContLang; $lc = SearchEngine::legalSearchChars(); // Minus format chars $searchon = ''; $this->searchTerms = array(); $m = array(); if (preg_match_all('/([-+<>~]?)(([' . $lc . ']+)(\\*?)|"[^"]*")/', $filteredText, $m, PREG_SET_ORDER)) { foreach ($m as $bits) { @(list(, $modifier, $term, $nonQuoted, $wildcard) = $bits); if ($nonQuoted != '') { $term = $nonQuoted; $quote = ''; } else { $term = str_replace('"', '', $term); $quote = '"'; } if ($searchon !== '') { $searchon .= ' '; } // Some languages such as Serbian store the input form in the search index, // so we may need to search for matches in multiple writing system variants. $convertedVariants = $wgContLang->autoConvertToAllVariants($term); if (is_array($convertedVariants)) { $variants = array_unique(array_values($convertedVariants)); } else { $variants = array($term); } // The low-level search index does some processing on input to work // around problems with minimum lengths and encoding in MySQL's // fulltext engine. // For Chinese this also inserts spaces between adjacent Han characters. $strippedVariants = array_map(array($wgContLang, 'normalizeForSearch'), $variants); // Some languages such as Chinese force all variants to a canonical // form when stripping to the low-level search index, so to be sure // let's check our variants list for unique items after stripping. $strippedVariants = array_unique($strippedVariants); $searchon .= $modifier; if (count($strippedVariants) > 1) { $searchon .= '('; } foreach ($strippedVariants as $stripped) { if ($nonQuoted && strpos($stripped, ' ') !== false) { // Hack for Chinese: we need to toss in quotes for // multiple-character phrases since normalizeForSearch() // added spaces between them to make word breaks. $stripped = '"' . trim($stripped) . '"'; } $searchon .= "{$quote}{$stripped}{$quote}{$wildcard} "; } if (count($strippedVariants) > 1) { $searchon .= ')'; } // Match individual terms or quoted phrase in result highlighting... // Note that variants will be introduced in a later stage for highlighting! $regexp = $this->regexTerm($term, $wildcard); $this->searchTerms[] = $regexp; } } else { wfDebug(__METHOD__ . ": Can't understand search query '{$filteredText}'\n"); } $searchon = $this->db->strencode($searchon); $field = $this->getIndexField($fulltext); return " {$field} MATCH '{$searchon}' "; }