/** * auto convert to zh-hans and normalize special characters. * * @param $string String * @param $autoVariant String, default to 'zh-hans' * @return String */ function normalizeForSearch($string, $autoVariant = 'zh-hans') { wfProfileIn(__METHOD__); // always convert to zh-hans before indexing. it should be // better to use zh-hans for search, since conversion from // Traditional to Simplified is less ambiguous than the // other way around $s = $this->mConverter->autoConvert($string, $autoVariant); // LanguageZh_hans::normalizeForSearch $s = parent::normalizeForSearch($s); wfProfileOut(__METHOD__); return $s; }
function stripForSearch($string) { $fname = "LanguageZh::stripForSearch"; wfProfileIn($fname); // eventually this should be a word segmentation // for now just treat each character as a word $t = preg_replace("/([\\xc0-\\xff][\\x80-\\xbf]*)/e", "' ' .\"\$1\"", $string); //always convert to zh-hans before indexing. it should be //better to use zh-hans for search, since conversion from //Traditional to Simplified is less ambiguous than the //other way around $t = $this->mConverter->autoConvert($t, 'zh-hans'); $t = parent::stripForSearch($t); wfProfileOut($fname); return $t; }
function stripForSearch($string) { wfProfileIn(__METHOD__); // eventually this should be a word segmentation // for now just treat each character as a word // @fixme only do this for Han characters... $t = preg_replace("/([\\xc0-\\xff][\\x80-\\xbf]*)/", " \$1", $string); //always convert to zh-hans before indexing. it should be //better to use zh-hans for search, since conversion from //Traditional to Simplified is less ambiguous than the //other way around $t = $this->mConverter->autoConvert($t, 'zh-hans'); $t = parent::stripForSearch($t); wfProfileOut(__METHOD__); return $t; }