/** * 拆分字符串 * @param type $string 要拆分的字符 * @param type $source_charset 输入字符的字符编码 * @param type $target_charset 输出字符的字符编码 * @return array 拆分后的字符以数组形式返回,键名为字符,键值为字符数量 */ public static function split_word($string, $source_charset = '', $target_charset = 'utf-8', $load_all = TRUE) { $charset = C("CHARSET"); $source_charset = empty($source_charset) ? preg_replace("/utf8|utf-8/i", "utf-8", $charset) : $source_charset; self::$addonDicFile = PATH_ORG . str_replace(PATH_ORG, '', self::$addonDicFile); self::$mainDicFileZip = PATH_ORG . str_replace(PATH_ORG, '', self::$mainDicFileZip); self::$mainDicFile = PATH_ORG . str_replace(PATH_ORG, '', self::$mainDicFile); self::SetSource($string, $source_charset, $target_charset); self::$isLoadAll = $load_all; if (file_exists(self::$mainDicFile)) { self::$isUnpacked = TRUE; } if (self::$loadInit) { self::LoadDict(); } self::StartAnalysis(); $string = self::GetFinallyIndex(); if (is_array($string)) { $s = array(); foreach ($string as $k => $v) { $k = string::remove_punctuation($k); if (empty($k) || mb_strlen($k, $source_charset) == 1) { continue; } $s[$k] = $v; } $string = $s; } return $string; }
/** * 拆分字符串 * @param type $string 要拆分的字符 * @param type $source_charset 输入字符的字符编码 * @param type $target_charset 输出字符的字符编码 * @return array 拆分后的字符以数组形式返回,键名为字符,键值为字符数量 */ static public function splitWord($string, $source_charset = '', $target_charset = 'utf-8', $load_all = TRUE) { self::$splitword_path = HDPHP_EXTEND_PATH.'Org/SplitWord/'; $charset = C("CHARSET"); $source_charset = empty($source_charset) ? preg_replace("/utf8|utf-8/i", "utf-8", $charset) : $source_charset; self::$addonDicFile = self::$splitword_path . self::$addonDicFile; self::$mainDicFileZip = self::$splitword_path . self::$mainDicFileZip; self::$mainDicFile = self::$splitword_path . self::$mainDicFile; self::SetSource($string, $source_charset, $target_charset); self::$isLoadAll = $load_all; if (file_exists(self::$mainDicFile)) self::$isUnpacked = TRUE; if (self::$loadInit) self::LoadDict(); self::StartAnalysis(); $string = self::GetFinallyIndex(); if(is_array($string)){ $s = array(); foreach($string as $k=>$v){ $k = string::removePunctuation($k); // $k = preg_replace("/\w/i",'',$k); if(empty($k) || mb_strlen($k,$source_charset)==1){ continue; } $s[$k]=$v; } $string=$s; } return $string; }