Esempio n. 1
0
 /**
  * Get a normalized string representation of a title suitable for
  * including in a search index
  *
  * @param SearchEngine $search
  * @return string A stripped-down title string ready for the search index
  */
 private function getNormalizedTitle(SearchEngine $search)
 {
     global $wgContLang;
     $ns = $this->title->getNamespace();
     $title = $this->title->getText();
     $lc = $search->legalSearchChars() . '&#;';
     $t = $wgContLang->normalizeForSearch($title);
     $t = preg_replace("/[^{$lc}]+/", ' ', $t);
     $t = $wgContLang->lc($t);
     # Handle 's, s'
     $t = preg_replace("/([{$lc}]+)'s( |\$)/", "\\1 \\1's ", $t);
     $t = preg_replace("/([{$lc}]+)s'( |\$)/", "\\1s ", $t);
     $t = preg_replace("/\\s+/", ' ', $t);
     if ($ns == NS_FILE) {
         $t = preg_replace("/ (png|gif|jpg|jpeg|ogg)\$/", "", $t);
     }
     return $search->normalizeText(trim($t));
 }
Esempio n. 2
0
 /**
  * Converts some characters for MySQL's indexing to grok it correctly,
  * and pads short words to overcome limitations.
  */
 function normalizeText($string)
 {
     global $wgContLang;
     wfProfileIn(__METHOD__);
     $out = parent::normalizeText($string);
     // MySQL fulltext index doesn't grok utf-8, so we
     // need to fold cases and convert to hex
     $out = preg_replace_callback("/([\\xc0-\\xff][\\x80-\\xbf]*)/", array($this, 'stripForSearchCallback'), $wgContLang->lc($out));
     // And to add insult to injury, the default indexing
     // ignores short words... Pad them so we can pass them
     // through without reconfiguring the server...
     $minLength = $this->minSearchLength();
     if ($minLength > 1) {
         $n = $minLength - 1;
         $out = preg_replace("/\\b(\\w{1,{$n}})\\b/", "\$1u800", $out);
     }
     // Periods within things like hostnames and IP addresses
     // are also important -- we want a search for "example.com"
     // or "192.168.1.1" to work sanely.
     //
     // MySQL's search seems to ignore them, so you'd match on
     // "example.wikipedia.com" and "192.168.83.1" as well.
     $out = preg_replace("/(\\w)\\.(\\w|\\*)/u", "\$1u82e\$2", $out);
     wfProfileOut(__METHOD__);
     return $out;
 }