/** * Converts some characters for MySQL's indexing to grok it correctly, * and pads short words to overcome limitations. * @return mixed|string */ function normalizeText($string) { global $wgContLang; wfProfileIn(__METHOD__); $out = parent::normalizeText($string); // MySQL fulltext index doesn't grok utf-8, so we // need to fold cases and convert to hex $out = preg_replace_callback("/([\\xc0-\\xff][\\x80-\\xbf]*)/", array($this, 'stripForSearchCallback'), $wgContLang->lc($out)); // And to add insult to injury, the default indexing // ignores short words... Pad them so we can pass them // through without reconfiguring the server... $minLength = $this->minSearchLength(); if ($minLength > 1) { $n = $minLength - 1; $out = preg_replace("/\\b(\\w{1,{$n}})\\b/", "\$1u800", $out); } // Periods within things like hostnames and IP addresses // are also important -- we want a search for "example.com" // or "192.168.1.1" to work sanely. // // MySQL's search seems to ignore them, so you'd match on // "example.wikipedia.com" and "192.168.83.1" as well. $out = preg_replace("/(\\w)\\.(\\w|\\*)/u", "\$1u82e\$2", $out); wfProfileOut(__METHOD__); return $out; }
public static function legalSearchChars() { return "\"" . parent::legalSearchChars(); }