public function init() { scws_set_charset($this->tool, "utf8"); scws_set_dict($this->tool, self::ROOT . "etc/dict.utf8.xdb"); scws_set_rule($this->tool, self::ROOT . "etc/rules.utf8.ini"); scws_set_ignore($this->tool, true); //清楚标点 scws_set_multi($this->tool, self::SCWS_MULTI_DUALITY); #scws_set_multi($this->tool,self::SCWS_MULTI_SHORT); scws_set_duality($this->tool, false); }
/** * 分词处理,取出词频最高的词组,并可以指定词性进行查找 * * @param string $str * @param int $limit * 可选参数,返回的词的最大数量,缺省是 10 * @param string $attr * 可选参数,是一系列词性组成的字符串,各词性之间以半角的逗号隔开, 这表示返回的词性必须在列表中,如果以~开头,则表示取反,词性必须不在列表中,缺省为NULL,返回全部词性,不过滤。 * @return multitype: */ function scwsTop($str, $limit = 10, $attr = null) { if (!function_exists('scws_open')) { return false; } $rst = array(); $str = preg_replace("/[\\s\t\r\n]+/", '', $str); if (!empty($str)) { $sh = scws_open(); scws_set_charset($sh, 'utf8'); scws_set_ignore($sh, true); scws_set_multi($sh, SCWS_MULTI_SHORT | SCWS_MULTI_DUALITY); scws_set_duality($sh, true); scws_send_text($sh, $str); $rst = scws_get_tops($sh, $limit, $attr); scws_close($sh); } return $rst; }