/**
  * auto convert to zh-hans and normalize special characters.
  *
  * @param $string String
  * @param $autoVariant String, default to 'zh-hans'
  * @return String
  */
 function normalizeForSearch($string, $autoVariant = 'zh-hans')
 {
     wfProfileIn(__METHOD__);
     // always convert to zh-hans before indexing. it should be
     // better to use zh-hans for search, since conversion from
     // Traditional to Simplified is less ambiguous than the
     // other way around
     $s = $this->mConverter->autoConvert($string, $autoVariant);
     // LanguageZh_hans::normalizeForSearch
     $s = parent::normalizeForSearch($s);
     wfProfileOut(__METHOD__);
     return $s;
 }
 function stripForSearch($string)
 {
     $fname = "LanguageZh::stripForSearch";
     wfProfileIn($fname);
     // eventually this should be a word segmentation
     // for now just treat each character as a word
     $t = preg_replace("/([\\xc0-\\xff][\\x80-\\xbf]*)/e", "' ' .\"\$1\"", $string);
     //always convert to zh-hans before indexing. it should be
     //better to use zh-hans for search, since conversion from
     //Traditional to Simplified is less ambiguous than the
     //other way around
     $t = $this->mConverter->autoConvert($t, 'zh-hans');
     $t = parent::stripForSearch($t);
     wfProfileOut($fname);
     return $t;
 }
Exemple #3
0
 function stripForSearch($string)
 {
     wfProfileIn(__METHOD__);
     // eventually this should be a word segmentation
     // for now just treat each character as a word
     // @fixme only do this for Han characters...
     $t = preg_replace("/([\\xc0-\\xff][\\x80-\\xbf]*)/", " \$1", $string);
     //always convert to zh-hans before indexing. it should be
     //better to use zh-hans for search, since conversion from
     //Traditional to Simplified is less ambiguous than the
     //other way around
     $t = $this->mConverter->autoConvert($t, 'zh-hans');
     $t = parent::stripForSearch($t);
     wfProfileOut(__METHOD__);
     return $t;
 }