/** * 拆分字符串 * @param type $string 要拆分的字符 * @param type $source_charset 输入字符的字符编码 * @param type $target_charset 输出字符的字符编码 * @return array 拆分后的字符以数组形式返回,键名为字符,键值为字符数量 */ static public function splitWord($string, $source_charset = '', $target_charset = 'utf-8', $load_all = TRUE) { self::$splitword_path = HDPHP_EXTEND_PATH.'Org/SplitWord/'; $charset = C("CHARSET"); $source_charset = empty($source_charset) ? preg_replace("/utf8|utf-8/i", "utf-8", $charset) : $source_charset; self::$addonDicFile = self::$splitword_path . self::$addonDicFile; self::$mainDicFileZip = self::$splitword_path . self::$mainDicFileZip; self::$mainDicFile = self::$splitword_path . self::$mainDicFile; self::SetSource($string, $source_charset, $target_charset); self::$isLoadAll = $load_all; if (file_exists(self::$mainDicFile)) self::$isUnpacked = TRUE; if (self::$loadInit) self::LoadDict(); self::StartAnalysis(); $string = self::GetFinallyIndex(); if(is_array($string)){ $s = array(); foreach($string as $k=>$v){ $k = string::removePunctuation($k); // $k = preg_replace("/\w/i",'',$k); if(empty($k) || mb_strlen($k,$source_charset)==1){ continue; } $s[$k]=$v; } $string=$s; } return $string; }