Esempio n. 1
0
 /**
  * Converts some characters for MySQL's indexing to grok it correctly,
  * and pads short words to overcome limitations.
  * @return mixed|string
  */
 function normalizeText($string)
 {
     global $wgContLang;
     wfProfileIn(__METHOD__);
     $out = parent::normalizeText($string);
     // MySQL fulltext index doesn't grok utf-8, so we
     // need to fold cases and convert to hex
     $out = preg_replace_callback("/([\\xc0-\\xff][\\x80-\\xbf]*)/", array($this, 'stripForSearchCallback'), $wgContLang->lc($out));
     // And to add insult to injury, the default indexing
     // ignores short words... Pad them so we can pass them
     // through without reconfiguring the server...
     $minLength = $this->minSearchLength();
     if ($minLength > 1) {
         $n = $minLength - 1;
         $out = preg_replace("/\\b(\\w{1,{$n}})\\b/", "\$1u800", $out);
     }
     // Periods within things like hostnames and IP addresses
     // are also important -- we want a search for "example.com"
     // or "192.168.1.1" to work sanely.
     //
     // MySQL's search seems to ignore them, so you'd match on
     // "example.wikipedia.com" and "192.168.83.1" as well.
     $out = preg_replace("/(\\w)\\.(\\w|\\*)/u", "\$1u82e\$2", $out);
     wfProfileOut(__METHOD__);
     return $out;
 }