Beispiel #1
0
 /**
  * [keyword 获取关键词]
  * @param  [type] $text   [文本]
  * @param  string $filter [过滤条件]
  * @return [type]         [description]
  */
 public function keyword($text, $filter = '')
 {
     $filter = $this->makeFilter($filter);
     vendor('pscws4.class#pscws4');
     $pscws = new PSCWS4();
     $pscws->set_charset('utf-8');
     $pscws->send_text($text);
     $some = $pscws->get_tops(10, $filter);
     $_result = $this->makeAttr($some);
     $filter_reg = explode(',', str_replace('~', '', $filter));
     if (!empty($filter)) {
         if ($filter != "~") {
             foreach ($_result as $k => $v) {
                 if (in_array($v['attr'], $filter_reg)) {
                     $_res[] = $v;
                 }
             }
         } else {
             $_res = $_result;
         }
     } else {
         $_res = $_result;
     }
     return $_res;
 }
Beispiel #2
0
发展中国家
上海大学城书店
表面的东西
今天我买了一辆面的,于是我坐着面的去上班
化妆和服装
这个门把手坏了,请把手拿开
将军任命了一名中将,产量三年中将增长两倍
王军虎去广州了,王军虎头虎脑的
欧阳明练功很厉害可是马明练不厉害
毛泽东北京华烟云
人中出吕布 马中出赤兔Q1,中我要买Q币充值
EOF;
//
require 'pscws4.class.php';
$cws = new PSCWS4();
$cws->set_charset('utf8');
$cws->set_dict('etc/dict.utf8.xdb');
$cws->set_rule('etc/rules.utf8.ini');
//$cws->set_multi(3);
//$cws->set_ignore(true);
//$cws->set_debug(true);
//$cws->set_duality(true);
$cws->send_text($text);
if (php_sapi_name() != 'cli') {
    header('Content-Type: text/plain');
}
echo "pscws version: " . $cws->version() . "\n";
echo "Segment result:\n\n";
while ($tmp = $cws->get_result()) {
    $line = '';
    foreach ($tmp as $w) {
Beispiel #3
0
$keyword = Helper_Archive::pregReplace($keyword, 6);
//只能搜索中文英文和数字
$typeid = Helper_Archive::pregReplace($typeid, 2);
if (isset($totalresult)) {
    $pageno = Helper_Archive::pregReplace($pageno, 2);
    $totalresult = Helper_Archive::pregReplace($totalresult, 2);
}
addSearchkey($keyword);
//添加热搜词
require dirname(__FILE__) . "/cloudsearch/pscws4.class.php";
$pscws = new PSCWS4('utf-8');
//
// 接下来, 设定一些分词参数或选项, set_dict 是必须的, 若想智能识别人名等需要 set_rule
//
// 包括: set_charset, set_dict, set_rule, set_ignore, set_multi, set_debug, set_duality ... 等方法
$pscws->set_charset('utf-8');
$pscws->set_rule(dirname(__FILE__) . '/cloudsearch/rules.utf8.ini');
$pscws->set_dict(dirname(__FILE__) . '/cloudsearch/dict.utf8.xdb');
$pscws->send_text($keyword);
while ($some = $pscws->get_result()) {
    foreach ($some as $word) {
        $words[] = $word['word'];
    }
}
$where = "ishidden=0";
foreach ($words as $k => $v) {
    $where .= " and title like '%{$v}%'";
    if (mb_strlen($v, 'utf-8') > 1) {
        $whereor .= " or title like '%{$v}%'";
    }
}
Beispiel #4
0
 public function chinese_split($text)
 {
     // 建立分词类对像, 参数为字符集, 默认为 gbk, 可在后面调用 set_charset 改变
     $pscws = new PSCWS4('utf8');
     // 接下来, 设定一些分词参数或选项, set_dict 是必须的, 若想智能识别人名等需要 set_rule
     // 包括: set_charset, set_dict, set_rule, set_ignore, set_multi, set_debug, set_duality ... 等方法
     $pscws->set_charset('utf8');
     $pscws->set_dict('./pscws4/etc/dict.ct.utf8.xdb');
     //繁體UTF8 Chinese Traditional
     $pscws->set_rule('./pscws4/etc/rules_cht.utf8.ini');
     //繁體UTF8
     //$pscws->set_dict('./etc/dict.sc.utf8.xdb');//简体UTF8 simplified Chinese
     //$pscws->set_rule('./etc/rules.utf8.ini');//简体UTF8 simplified Chinese
     //$pscws->set_dict('./etc/dict.sc.gbk.xdb');//简体GBK  simplified Chinese
     //$pscws->set_rule('./etc/rules.ini');//简体GBK  simplified Chinese
     // 分词调用 send_text() 将待分词的字符串传入, 紧接着循环调用 get_result() 方法取回一系列分好的词
     // 直到 get_result() 返回 false 为止
     // 返回的词是一个关联数组, 包含: word 词本身, idf 逆词率(重), off 在text中的偏移, len 长度, attr 词性
     $pscws->send_text($text);
     while ($some = $pscws->get_result()) {
         foreach ($some as $word) {
             //文章词组处理
             var_dump($word);
         }
     }
     // 在 send_text 之后可以调用 get_tops() 返回分词结果的词语按权重统计的前 N 个词
     // 常用于提取关键词, 参数用法参见下面的详细介绍.
     // 返回的数组元素是一个词, 它又包含: word 词本身, weight 词重, times 次数, attr 词性
     //$tops = $pscws->get_tops(10, 'n,v');
     //var_dump($tops);
 }