public function scws($key) { if (function_exists('scws')) { $so = scws_new(); $so->set_charset('utf-8'); $so->add_dict(ini_get('scws.default.fpath') . '/dict.utf8.xdb'); //自定义词库 //$so->add_dict(APP_PATH . '/library/dict/scws.txt', SCWS_XDICT_TXT); $so->set_rule(ini_get('scws.default.fpath') . '/rules.utf8.ini'); $so->set_ignore(true); $so->set_multi(true); $so->set_duality(true); } else { require_once APP_PATH . '/library/scws/pscws4.class.php'; $so = new PSCWS4('utf-8'); $so->set_dict(APP_PATH . '/library/scws/etc/dict.utf8.xdb'); $so->set_rule(APP_PATH . '/library/scws/etc/rules.utf8.ini'); $so->set_multi(true); $so->set_ignore(true); $so->set_duality(true); } $keys = str_replace(array(" ", " ", "\t", "\n", "\r"), array("", "", "", "", ""), $key); $so->send_text($keys); $words_array = $so->get_result(); $words = ''; foreach ($words_array as $v) { $words = $words . '|"' . $v['word'] . '"'; } $so->close(); return $words = trim($words, '|'); }
/** * [keyword 获取关键词] * @param [type] $text [文本] * @param string $filter [过滤条件] * @return [type] [description] */ public function keyword($text, $filter = '') { $filter = $this->makeFilter($filter); vendor('pscws4.class#pscws4'); $pscws = new PSCWS4(); $pscws->set_charset('utf-8'); $pscws->send_text($text); $some = $pscws->get_tops(10, $filter); $_result = $this->makeAttr($some); $filter_reg = explode(',', str_replace('~', '', $filter)); if (!empty($filter)) { if ($filter != "~") { foreach ($_result as $k => $v) { if (in_array($v['attr'], $filter_reg)) { $_res[] = $v; } } } else { $_res = $_result; } } else { $_res = $_result; } return $_res; }
/** * @param $str 要解析的字符串 * @param $num 获取的词的数量 * @func 实现分词功能 */ public function run($str, $num) { $pscws = new \PSCWS4(); $pscws->set_dict(APP_ROOT . '/dict.utf8.xdb'); $pscws->set_rule(APP_ROOT . '/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($str); $words = $pscws->get_tops($num); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } $pscws->close(); return $tags; }
function get_tags_arr($title) { $pscws = new PSCWS4(); $pscws->set_dict(APP_ROOT . '/scws/dict.utf8.xdb'); $pscws->set_rule(APP_ROOT . '/scws/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops(10); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } $pscws->close(); return $tags; }
public function ajax_getKeywords($title, $num = 5) { import('ORG.Pscws.Pscws4'); $pscws = new PSCWS4(); $pscws->set_dict(LIBRARY_PATH . 'ORG/Pscws/dict.utf8.xdb'); $pscws->set_rule(LIBRARY_PATH . 'ORG/Pscws/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops($num); $pscws->close(); $list = array(); foreach ($words as $value) { $list[] = $value['word']; } echo json_encode(array('num' => count($list), 'list' => $list)); }
public function get_tags_by_title($title, $num = 10) { vendor('pscws4.pscws4', '', '.class.php'); $pscws = new PSCWS4(); $pscws->set_dict(PIN_DATA_PATH . 'scws/dict.utf8.xdb'); $pscws->set_rule(PIN_DATA_PATH . 'scws/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops($num); $pscws->close(); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } return $tags; }
function getKeyword($title) { require './pscws.class.php'; $pscws = new PSCWS4(); $pscws->set_dict('./scws/dict.utf8.xdb'); $pscws->set_rule('./scws/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops(); $res = ''; foreach ($words as $val) { $res .= '|' . $val['word']; } $pscws->close(); return substr($res, 1); }
function getkeyword($title, $contents = '') { // 加入头文件 require_once 'pscws4.class.php'; // 建立分词类对像, 参数为字符集, 默认为 gbk, 可在后面调用 set_charset 改变 $pscws = new PSCWS4('utf8'); $pscws->set_dict('../function/etc/dict.xdb'); $pscws->set_rule('../function/etc/rules.ini'); $pscws->set_duality(true); $text = $title; $pscws->send_text($text); $tops = $pscws->get_tops(10, ''); foreach ($tops as $k) { $keywords = $keywords . $k[word] . ' '; } return ' ' . trim($keywords); }
public static function segments($arr, $num = 10) { $list = array(); if (empty($text)) { return $list; } $words = array(); //检测是否已安装php_scws扩展 if (function_exists("scws_open")) { $sh = scws_open(); scws_set_charset($sh, 'utf8'); scws_set_dict($sh, APP_ROOT_PATH . 'system/scws/dict.utf8.xdb'); scws_set_rule($sh, APP_ROOT_PATH . 'system/scws/rules.utf8.ini'); scws_set_ignore($sh, true); foreach ($arr as $key => $text) { scws_send_text($sh, $text); $words[] = scws_get_tops($sh, $num); } scws_close($sh); } else { require_once APP_ROOT_PATH . 'system/scws/pscws4.class.php'; $pscws = new PSCWS4(); $pscws->set_dict(APP_ROOT_PATH . 'system/scws/dict.utf8.xdb'); $pscws->set_rule(APP_ROOT_PATH . 'system/scws/rules.utf8.ini'); $pscws->set_ignore(true); foreach ($arr as $key => $text) { $pscws->send_text($text); $words[] = $pscws->get_tops($num); } $pscws->close(); } for ($i = 0; $i < $num; $i++) { foreach ($words as $item) { if (isset($item[$i])) { $word = $item[$i]['word']; if (isset($list[$word])) { $list[$word]++; } else { $list[$word] = 1; } } } } $list = array_slice($list, 0, $num); return array_keys($list); }
public function get_tags_by_title($title, $num = 10) { //vendor('pscws4.pscws4', '', '.class.php'); import("Extend.pscws4.pscws4", APP_PATH . 'Lib/'); ///分组后 /// 分组前 import("@.Extend.pscws4.pscws4"); // dump(APP_PATH . 'Extend/pscws4/scws/dict.utf8.xdb'); $pscws = new \PSCWS4(); $pscws->set_dict(APP_PATH . 'Lib/' . 'Extend/pscws4/scws/dict.utf8.xdb'); $pscws->set_rule(APP_PATH . 'Lib/' . 'Extend/pscws4/scws/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops($num); $pscws->close(); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } return $tags; }
/** * 分词 * @param string $source_string 源字符串 * @return array 分好的单词数组 */ private function wordSplit($source_string) { $words = array(); $pscws_path = APPPATH . 'third_party/pscws4/'; require_once $pscws_path . 'pscws4.class.php'; $dicts = array('UTF-8' => $pscws_path . 'dict.utf8.xdb'); $rules = array('UTF-8' => $pscws_path . 'etc/rules.utf8.ini'); $charset = strtoupper($this->config->item('charset')); $pscws = new PSCWS4($charset); $dict = $dicts[$charset]; $rule = $rules[$charset]; $pscws->set_dict($dict); $pscws->set_rule($rule); $pscws->set_ignore(true); $pscws->send_text($source_string); for ($some_words = $pscws->get_result(); $some_words !== false; $some_words = $pscws->get_result()) { foreach ($some_words as $one_word) { $words[] = $one_word['word']; } } $pscws->close(); return $words; }
public function get_tags($title, $num = 10) { vendor('Pscws.Pscws4', '', '.class.php'); $pscws = new PSCWS4(); $pscws->set_dict(CONF_PATH . 'etc/dict.utf8.xdb'); $pscws->set_rule(CONF_PATH . 'etc/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops($num); $pscws->close(); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } return implode(',', $tags); }
function get_keywords($title) { require './Common/scws/pscws4.class.php'; $pscws = new PSCWS4(); $pscws->set_dict('./Common/scws/scws/dict.utf8.xdb'); $pscws->set_rule('./Common/scws/scws/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops(5); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } $pscws->close(); return $tags; }
if (strpos($text, "\n") === false && is_file($text)) { $text = file_get_contents($text); } } elseif (isset($_SERVER['QUERY_STRING']) && !empty($_SERVER['QUERY_STRING'])) { $text = $_SERVER['QUERY_STRING']; } // require 'pscws4.class.php'; $cws = new PSCWS4('gbk'); $cws->set_dict(ini_get('scws.default.fpath') . '/dict.xdb'); $cws->set_rule('etc/rules.ini'); //$cws->set_multi(3); //$cws->set_ignore(true); //$cws->set_debug(true); //$cws->set_duality(true); $cws->send_text($text); if (php_sapi_name() != 'cli') { header('Content-Type: text/plain'); } echo "pscws version: ", $cws->version(), "\n"; echo "Segment result:\n\n"; while ($tmp = $cws->get_result()) { $line = ''; foreach ($tmp as $w) { if ($w['word'] == "\r") { continue; } if ($w['word'] == "\n") { $line = rtrim($line, ' ') . "\n"; } else { $line .= $w['word'] . " ";
/** * 文本分词 * @param string $text 需要分词的文本 * @return array */ public static function segmentAll($text) { $list = array(); if (empty($text)) { return $list; } //检测是否已安装php_scws扩展 if (function_exists("scws_open")) { $sh = scws_open(); scws_set_charset($sh, 'utf8'); scws_set_dict($sh, APP_ROOT_PATH . 'system/scws/dict.utf8.xdb'); scws_set_rule($sh, APP_ROOT_PATH . 'system/rules.utf8.ini'); scws_set_ignore($sh, true); scws_send_text($sh, $text); while ($words = scws_get_result($sh)) { foreach ($words as $word) { $list[] = $word['word']; } } scws_close($sh); } else { require_once APP_ROOT_PATH . 'system/scws/pscws4.class.php'; $pscws = new PSCWS4(); $pscws->set_dict(APP_ROOT_PATH . 'system/scws/dict.utf8.xdb'); $pscws->set_rule(APP_ROOT_PATH . 'system/scws/rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($text); while ($words = $pscws->get_result()) { foreach ($words as $word) { $list[] = $word['word']; } } $pscws->close(); } return $list; }
function pscws() { $pscws_path = APPPATH . 'third_party/pscws4/'; require_once $pscws_path . 'pscws4.class.php'; $pscws = new PSCWS4('utf8'); $pscws->set_dict($pscws_path . 'dict.utf8.xdb'); $pscws->set_rule($pscws_path . 'etc/rules.utf8.ini'); $pscws->set_ignore(true); $text = '我是华东政法大学的学生'; echo $text; $pscws->send_text($text); $words = array(); while ($some = $pscws->get_result()) { foreach ($some as $one) { array_push($words, $one['word']); } } var_dump($words); $display_text = implode(' ', $words); echo $display_text; $pscws->close(); $this->load->sidebar_loaded = true; }
private function get_tags($title, $num = 10) { vendor("Pscws.Pscws4", "", ".class.php"); $pscws = new PSCWS4(); $pscws->set_dict(CONF_PATH . "etc/dict.utf8.xdb"); $pscws->set_rule(CONF_PATH . "etc/rules.utf8.ini"); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops($num); $pscws->close(); $tags = array(); foreach ($words as $val) { $tags[] = $val["word"]; } return implode(",", $tags); }
public function chinese_split($text) { // 建立分词类对像, 参数为字符集, 默认为 gbk, 可在后面调用 set_charset 改变 $pscws = new PSCWS4('utf8'); // 接下来, 设定一些分词参数或选项, set_dict 是必须的, 若想智能识别人名等需要 set_rule // 包括: set_charset, set_dict, set_rule, set_ignore, set_multi, set_debug, set_duality ... 等方法 $pscws->set_charset('utf8'); $pscws->set_dict('./pscws4/etc/dict.ct.utf8.xdb'); //繁體UTF8 Chinese Traditional $pscws->set_rule('./pscws4/etc/rules_cht.utf8.ini'); //繁體UTF8 //$pscws->set_dict('./etc/dict.sc.utf8.xdb');//简体UTF8 simplified Chinese //$pscws->set_rule('./etc/rules.utf8.ini');//简体UTF8 simplified Chinese //$pscws->set_dict('./etc/dict.sc.gbk.xdb');//简体GBK simplified Chinese //$pscws->set_rule('./etc/rules.ini');//简体GBK simplified Chinese // 分词调用 send_text() 将待分词的字符串传入, 紧接着循环调用 get_result() 方法取回一系列分好的词 // 直到 get_result() 返回 false 为止 // 返回的词是一个关联数组, 包含: word 词本身, idf 逆词率(重), off 在text中的偏移, len 长度, attr 词性 $pscws->send_text($text); while ($some = $pscws->get_result()) { foreach ($some as $word) { //文章词组处理 var_dump($word); } } // 在 send_text 之后可以调用 get_tops() 返回分词结果的词语按权重统计的前 N 个词 // 常用于提取关键词, 参数用法参见下面的详细介绍. // 返回的数组元素是一个词, 它又包含: word 词本身, weight 词重, times 次数, attr 词性 //$tops = $pscws->get_tops(10, 'n,v'); //var_dump($tops); }
function get_keywords3($title, $num = 10) { $title = urldecode_utf8($title); $title = trim_html($title, 1); if (strlen($title) > 2400) { $title = cutstr($title, 800, ''); } include_once ROOT_PATH . 'web/lib/pscws4/pscws4.class.php'; $cws = new PSCWS4('utf8'); $cws->set_dict(ROOT_PATH . 'web/lib/pscws4/dict.utf8.xdb'); //$cws->set_dict(ROOT_PATH.'web/lib/pscws4/a.xdb'); $cws->set_rule(ROOT_PATH . 'web/lib/pscws4/rules.ini'); //$cws->set_multi(3); $cws->set_ignore(true); //$cws->set_debug(true); //$cws->set_duality(true); $cws->send_text($title); $words = $cws->get_tops(10, 'r,v,p'); $cws->close(); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } $tags = implode(',', $tags); return $tags; }
if (isset($totalresult)) { $pageno = Helper_Archive::pregReplace($pageno, 2); $totalresult = Helper_Archive::pregReplace($totalresult, 2); } addSearchkey($keyword); //添加热搜词 require dirname(__FILE__) . "/cloudsearch/pscws4.class.php"; $pscws = new PSCWS4('utf-8'); // // 接下来, 设定一些分词参数或选项, set_dict 是必须的, 若想智能识别人名等需要 set_rule // // 包括: set_charset, set_dict, set_rule, set_ignore, set_multi, set_debug, set_duality ... 等方法 $pscws->set_charset('utf-8'); $pscws->set_rule(dirname(__FILE__) . '/cloudsearch/rules.utf8.ini'); $pscws->set_dict(dirname(__FILE__) . '/cloudsearch/dict.utf8.xdb'); $pscws->send_text($keyword); while ($some = $pscws->get_result()) { foreach ($some as $word) { $words[] = $word['word']; } } $where = "ishidden=0"; foreach ($words as $k => $v) { $where .= " and title like '%{$v}%'"; if (mb_strlen($v, 'utf-8') > 1) { $whereor .= " or title like '%{$v}%'"; } } $whereor = trim(trim($whereor), 'or'); $wh = !empty($whereor) ? "({$where}) or ({$whereor})" : $where; $leftnavinfo = getLeftNav($wh, $typeid);