Example #1
0
 /**
  * [keyword 获取关键词]
  * @param  [type] $text   [文本]
  * @param  string $filter [过滤条件]
  * @return [type]         [description]
  */
 public function keyword($text, $filter = '')
 {
     $filter = $this->makeFilter($filter);
     vendor('pscws4.class#pscws4');
     $pscws = new PSCWS4();
     $pscws->set_charset('utf-8');
     $pscws->send_text($text);
     $some = $pscws->get_tops(10, $filter);
     $_result = $this->makeAttr($some);
     $filter_reg = explode(',', str_replace('~', '', $filter));
     if (!empty($filter)) {
         if ($filter != "~") {
             foreach ($_result as $k => $v) {
                 if (in_array($v['attr'], $filter_reg)) {
                     $_res[] = $v;
                 }
             }
         } else {
             $_res = $_result;
         }
     } else {
         $_res = $_result;
     }
     return $_res;
 }
Example #2
0
 public function scws($key)
 {
     if (function_exists('scws')) {
         $so = scws_new();
         $so->set_charset('utf-8');
         $so->add_dict(ini_get('scws.default.fpath') . '/dict.utf8.xdb');
         //自定义词库
         //$so->add_dict(APP_PATH . '/library/dict/scws.txt', SCWS_XDICT_TXT);
         $so->set_rule(ini_get('scws.default.fpath') . '/rules.utf8.ini');
         $so->set_ignore(true);
         $so->set_multi(true);
         $so->set_duality(true);
     } else {
         require_once APP_PATH . '/library/scws/pscws4.class.php';
         $so = new PSCWS4('utf-8');
         $so->set_dict(APP_PATH . '/library/scws/etc/dict.utf8.xdb');
         $so->set_rule(APP_PATH . '/library/scws/etc/rules.utf8.ini');
         $so->set_multi(true);
         $so->set_ignore(true);
         $so->set_duality(true);
     }
     $keys = str_replace(array(" ", " ", "\t", "\n", "\r"), array("", "", "", "", ""), $key);
     $so->send_text($keys);
     $words_array = $so->get_result();
     $words = '';
     foreach ($words_array as $v) {
         $words = $words . '|"' . $v['word'] . '"';
     }
     $so->close();
     return $words = trim($words, '|');
 }
Example #3
0
 public static function segments($arr, $num = 10)
 {
     $list = array();
     if (empty($text)) {
         return $list;
     }
     $words = array();
     //检测是否已安装php_scws扩展
     if (function_exists("scws_open")) {
         $sh = scws_open();
         scws_set_charset($sh, 'utf8');
         scws_set_dict($sh, APP_ROOT_PATH . 'system/scws/dict.utf8.xdb');
         scws_set_rule($sh, APP_ROOT_PATH . 'system/scws/rules.utf8.ini');
         scws_set_ignore($sh, true);
         foreach ($arr as $key => $text) {
             scws_send_text($sh, $text);
             $words[] = scws_get_tops($sh, $num);
         }
         scws_close($sh);
     } else {
         require_once APP_ROOT_PATH . 'system/scws/pscws4.class.php';
         $pscws = new PSCWS4();
         $pscws->set_dict(APP_ROOT_PATH . 'system/scws/dict.utf8.xdb');
         $pscws->set_rule(APP_ROOT_PATH . 'system/scws/rules.utf8.ini');
         $pscws->set_ignore(true);
         foreach ($arr as $key => $text) {
             $pscws->send_text($text);
             $words[] = $pscws->get_tops($num);
         }
         $pscws->close();
     }
     for ($i = 0; $i < $num; $i++) {
         foreach ($words as $item) {
             if (isset($item[$i])) {
                 $word = $item[$i]['word'];
                 if (isset($list[$word])) {
                     $list[$word]++;
                 } else {
                     $list[$word] = 1;
                 }
             }
         }
     }
     $list = array_slice($list, 0, $num);
     return array_keys($list);
 }
Example #4
0
 /**
  * 分词
  * @param string $source_string 源字符串
  * @return array 分好的单词数组
  */
 private function wordSplit($source_string)
 {
     $words = array();
     $pscws_path = APPPATH . 'third_party/pscws4/';
     require_once $pscws_path . 'pscws4.class.php';
     $dicts = array('UTF-8' => $pscws_path . 'dict.utf8.xdb');
     $rules = array('UTF-8' => $pscws_path . 'etc/rules.utf8.ini');
     $charset = strtoupper($this->config->item('charset'));
     $pscws = new PSCWS4($charset);
     $dict = $dicts[$charset];
     $rule = $rules[$charset];
     $pscws->set_dict($dict);
     $pscws->set_rule($rule);
     $pscws->set_ignore(true);
     $pscws->send_text($source_string);
     for ($some_words = $pscws->get_result(); $some_words !== false; $some_words = $pscws->get_result()) {
         foreach ($some_words as $one_word) {
             $words[] = $one_word['word'];
         }
     }
     $pscws->close();
     return $words;
 }
Example #5
0
function get_tags_arr($title)
{
    $pscws = new PSCWS4();
    $pscws->set_dict(APP_ROOT . '/scws/dict.utf8.xdb');
    $pscws->set_rule(APP_ROOT . '/scws/rules.utf8.ini');
    $pscws->set_ignore(true);
    $pscws->send_text($title);
    $words = $pscws->get_tops(10);
    $tags = array();
    foreach ($words as $val) {
        $tags[] = $val['word'];
    }
    $pscws->close();
    return $tags;
}
Example #6
0
 /**
  * @param $str 要解析的字符串
  * @param $num 获取的词的数量
  * @func  实现分词功能
  */
 public function run($str, $num)
 {
     $pscws = new \PSCWS4();
     $pscws->set_dict(APP_ROOT . '/dict.utf8.xdb');
     $pscws->set_rule(APP_ROOT . '/rules.utf8.ini');
     $pscws->set_ignore(true);
     $pscws->send_text($str);
     $words = $pscws->get_tops($num);
     $tags = array();
     foreach ($words as $val) {
         $tags[] = $val['word'];
     }
     $pscws->close();
     return $tags;
 }
Example #7
0
File: Init.php Project: impakho/DHT
function getKeyword($title)
{
    require './pscws.class.php';
    $pscws = new PSCWS4();
    $pscws->set_dict('./scws/dict.utf8.xdb');
    $pscws->set_rule('./scws/rules.utf8.ini');
    $pscws->set_ignore(true);
    $pscws->send_text($title);
    $words = $pscws->get_tops();
    $res = '';
    foreach ($words as $val) {
        $res .= '|' . $val['word'];
    }
    $pscws->close();
    return substr($res, 1);
}
Example #8
0
 public function get_tags_by_title($title, $num = 10)
 {
     vendor('pscws4.pscws4', '', '.class.php');
     $pscws = new PSCWS4();
     $pscws->set_dict(PIN_DATA_PATH . 'scws/dict.utf8.xdb');
     $pscws->set_rule(PIN_DATA_PATH . 'scws/rules.utf8.ini');
     $pscws->set_ignore(true);
     $pscws->send_text($title);
     $words = $pscws->get_tops($num);
     $pscws->close();
     $tags = array();
     foreach ($words as $val) {
         $tags[] = $val['word'];
     }
     return $tags;
 }
Example #9
0
 public function ajax_getKeywords($title, $num = 5)
 {
     import('ORG.Pscws.Pscws4');
     $pscws = new PSCWS4();
     $pscws->set_dict(LIBRARY_PATH . 'ORG/Pscws/dict.utf8.xdb');
     $pscws->set_rule(LIBRARY_PATH . 'ORG/Pscws/rules.utf8.ini');
     $pscws->set_ignore(true);
     $pscws->send_text($title);
     $words = $pscws->get_tops($num);
     $pscws->close();
     $list = array();
     foreach ($words as $value) {
         $list[] = $value['word'];
     }
     echo json_encode(array('num' => count($list), 'list' => $list));
 }
Example #10
0
 public function get_tags_by_title($title, $num = 10)
 {
     //vendor('pscws4.pscws4', '', '.class.php');
     import("Extend.pscws4.pscws4", APP_PATH . 'Lib/');
     ///分组后
     ///  分组前   import("@.Extend.pscws4.pscws4");
     // dump(APP_PATH . 'Extend/pscws4/scws/dict.utf8.xdb');
     $pscws = new \PSCWS4();
     $pscws->set_dict(APP_PATH . 'Lib/' . 'Extend/pscws4/scws/dict.utf8.xdb');
     $pscws->set_rule(APP_PATH . 'Lib/' . 'Extend/pscws4/scws/rules.utf8.ini');
     $pscws->set_ignore(true);
     $pscws->send_text($title);
     $words = $pscws->get_tops($num);
     $pscws->close();
     $tags = array();
     foreach ($words as $val) {
         $tags[] = $val['word'];
     }
     return $tags;
 }
Example #11
0
function getkeyword($title, $contents = '')
{
    // 加入头文件
    require_once 'pscws4.class.php';
    // 建立分词类对像, 参数为字符集, 默认为 gbk, 可在后面调用 set_charset 改变
    $pscws = new PSCWS4('utf8');
    $pscws->set_dict('../function/etc/dict.xdb');
    $pscws->set_rule('../function/etc/rules.ini');
    $pscws->set_duality(true);
    $text = $title;
    $pscws->send_text($text);
    $tops = $pscws->get_tops(10, '');
    foreach ($tops as $k) {
        $keywords = $keywords . $k[word] . ' ';
    }
    return ' ' . trim($keywords);
}
 public function get_tags($title, $num = 10)
 {
     vendor('Pscws.Pscws4', '', '.class.php');
     $pscws = new PSCWS4();
     $pscws->set_dict(CONF_PATH . 'etc/dict.utf8.xdb');
     $pscws->set_rule(CONF_PATH . 'etc/rules.utf8.ini');
     $pscws->set_ignore(true);
     $pscws->send_text($title);
     $words = $pscws->get_tops($num);
     $pscws->close();
     $tags = array();
     foreach ($words as $val) {
         $tags[] = $val['word'];
     }
     return implode(',', $tags);
 }
Example #13
0
function get_keywords($title)
{
    require './Common/scws/pscws4.class.php';
    $pscws = new PSCWS4();
    $pscws->set_dict('./Common/scws/scws/dict.utf8.xdb');
    $pscws->set_rule('./Common/scws/scws/rules.utf8.ini');
    $pscws->set_ignore(true);
    $pscws->send_text($title);
    $words = $pscws->get_tops(5);
    $tags = array();
    foreach ($words as $val) {
        $tags[] = $val['word'];
    }
    $pscws->close();
    return $tags;
}
Example #14
0
File: test.php Project: lamphp/scws
王军虎去广州了,王军虎头虎脑的
欧阳明练功很厉害可是马明练不厉害
毛泽东北京华烟云
人中出吕布 马中出赤兔Q1,中我要买Q币充值
EOF;
if (isset($_SERVER['argv'][1])) {
    $text = $_SERVER['argv'][1];
    if (strpos($text, "\n") === false && is_file($text)) {
        $text = file_get_contents($text);
    }
} elseif (isset($_SERVER['QUERY_STRING']) && !empty($_SERVER['QUERY_STRING'])) {
    $text = $_SERVER['QUERY_STRING'];
}
//
require 'pscws4.class.php';
$cws = new PSCWS4('gbk');
$cws->set_dict(ini_get('scws.default.fpath') . '/dict.xdb');
$cws->set_rule('etc/rules.ini');
//$cws->set_multi(3);
//$cws->set_ignore(true);
//$cws->set_debug(true);
//$cws->set_duality(true);
$cws->send_text($text);
if (php_sapi_name() != 'cli') {
    header('Content-Type: text/plain');
}
echo "pscws version: ", $cws->version(), "\n";
echo "Segment result:\n\n";
while ($tmp = $cws->get_result()) {
    $line = '';
    foreach ($tmp as $w) {
Example #15
0
 /**  
  * 文本分词
  * @param string $text 需要分词的文本
  * @return array
  */
 public static function segmentAll($text)
 {
     $list = array();
     if (empty($text)) {
         return $list;
     }
     //检测是否已安装php_scws扩展
     if (function_exists("scws_open")) {
         $sh = scws_open();
         scws_set_charset($sh, 'utf8');
         scws_set_dict($sh, APP_ROOT_PATH . 'system/scws/dict.utf8.xdb');
         scws_set_rule($sh, APP_ROOT_PATH . 'system/rules.utf8.ini');
         scws_set_ignore($sh, true);
         scws_send_text($sh, $text);
         while ($words = scws_get_result($sh)) {
             foreach ($words as $word) {
                 $list[] = $word['word'];
             }
         }
         scws_close($sh);
     } else {
         require_once APP_ROOT_PATH . 'system/scws/pscws4.class.php';
         $pscws = new PSCWS4();
         $pscws->set_dict(APP_ROOT_PATH . 'system/scws/dict.utf8.xdb');
         $pscws->set_rule(APP_ROOT_PATH . 'system/scws/rules.utf8.ini');
         $pscws->set_ignore(true);
         $pscws->send_text($text);
         while ($words = $pscws->get_result()) {
             foreach ($words as $word) {
                 $list[] = $word['word'];
             }
         }
         $pscws->close();
     }
     return $list;
 }
Example #16
0
王军虎去广州了,王军虎头虎脑的
欧阳明练功很厉害可是马明练不厉害
毛泽东北京华烟云
人中出吕布 马中出赤兔Q1,中我要买Q币充值
EOF;
if (isset($_SERVER['argv'][1])) {
    $text = $_SERVER['argv'][1];
    if (strpos($text, "\n") === false && is_file($text)) {
        $text = file_get_contents($text);
    }
} elseif (isset($_SERVER['QUERY_STRING'])) {
    $text = $_SERVER['QUERY_STRING'];
}
//
require 'pscws4.class.php';
$cws = new PSCWS4('gbk');
$cws->set_dict('etc/dict.xdb');
$cws->set_rule('etc/rules.ini');
//$cws->set_multi(3);
//$cws->set_ignore(true);
//$cws->set_debug(true);
//$cws->set_duality(true);
$cws->send_text($text);
if (php_sapi_name() != 'cli') {
    header('Content-Type: text/plain');
}
echo "pscws version: " . $cws->version() . "\n";
echo "Segment result:\n\n";
while ($tmp = $cws->get_result()) {
    $line = '';
    foreach ($tmp as $w) {
Example #17
0
 function pscws()
 {
     $pscws_path = APPPATH . 'third_party/pscws4/';
     require_once $pscws_path . 'pscws4.class.php';
     $pscws = new PSCWS4('utf8');
     $pscws->set_dict($pscws_path . 'dict.utf8.xdb');
     $pscws->set_rule($pscws_path . 'etc/rules.utf8.ini');
     $pscws->set_ignore(true);
     $text = '我是华东政法大学的学生';
     echo $text;
     $pscws->send_text($text);
     $words = array();
     while ($some = $pscws->get_result()) {
         foreach ($some as $one) {
             array_push($words, $one['word']);
         }
     }
     var_dump($words);
     $display_text = implode(' ', $words);
     echo $display_text;
     $pscws->close();
     $this->load->sidebar_loaded = true;
 }
Example #18
0
 public function chinese_split($text)
 {
     // 建立分词类对像, 参数为字符集, 默认为 gbk, 可在后面调用 set_charset 改变
     $pscws = new PSCWS4('utf8');
     // 接下来, 设定一些分词参数或选项, set_dict 是必须的, 若想智能识别人名等需要 set_rule
     // 包括: set_charset, set_dict, set_rule, set_ignore, set_multi, set_debug, set_duality ... 等方法
     $pscws->set_charset('utf8');
     $pscws->set_dict('./pscws4/etc/dict.ct.utf8.xdb');
     //繁體UTF8 Chinese Traditional
     $pscws->set_rule('./pscws4/etc/rules_cht.utf8.ini');
     //繁體UTF8
     //$pscws->set_dict('./etc/dict.sc.utf8.xdb');//简体UTF8 simplified Chinese
     //$pscws->set_rule('./etc/rules.utf8.ini');//简体UTF8 simplified Chinese
     //$pscws->set_dict('./etc/dict.sc.gbk.xdb');//简体GBK  simplified Chinese
     //$pscws->set_rule('./etc/rules.ini');//简体GBK  simplified Chinese
     // 分词调用 send_text() 将待分词的字符串传入, 紧接着循环调用 get_result() 方法取回一系列分好的词
     // 直到 get_result() 返回 false 为止
     // 返回的词是一个关联数组, 包含: word 词本身, idf 逆词率(重), off 在text中的偏移, len 长度, attr 词性
     $pscws->send_text($text);
     while ($some = $pscws->get_result()) {
         foreach ($some as $word) {
             //文章词组处理
             var_dump($word);
         }
     }
     // 在 send_text 之后可以调用 get_tops() 返回分词结果的词语按权重统计的前 N 个词
     // 常用于提取关键词, 参数用法参见下面的详细介绍.
     // 返回的数组元素是一个词, 它又包含: word 词本身, weight 词重, times 次数, attr 词性
     //$tops = $pscws->get_tops(10, 'n,v');
     //var_dump($tops);
 }
Example #19
0
 private function get_tags($title, $num = 10)
 {
     vendor("Pscws.Pscws4", "", ".class.php");
     $pscws = new PSCWS4();
     $pscws->set_dict(CONF_PATH . "etc/dict.utf8.xdb");
     $pscws->set_rule(CONF_PATH . "etc/rules.utf8.ini");
     $pscws->set_ignore(true);
     $pscws->send_text($title);
     $words = $pscws->get_tops($num);
     $pscws->close();
     $tags = array();
     foreach ($words as $val) {
         $tags[] = $val["word"];
     }
     return implode(",", $tags);
 }
Example #20
0
$keyword = $_GET['keyword'];
if (preg_match("/^[" . chr(0xa1) . "-" . chr(0xff) . "A-Za-z0-9_]+\$/", $keyword)) {
    $keyword = iconv('gbk', 'utf-8', $keyword);
}
$keyword = RemoveXSS($keyword);
$keyword = Helper_Archive::pregReplace($keyword, 6);
//只能搜索中文英文和数字
$typeid = Helper_Archive::pregReplace($typeid, 2);
if (isset($totalresult)) {
    $pageno = Helper_Archive::pregReplace($pageno, 2);
    $totalresult = Helper_Archive::pregReplace($totalresult, 2);
}
addSearchkey($keyword);
//添加热搜词
require dirname(__FILE__) . "/cloudsearch/pscws4.class.php";
$pscws = new PSCWS4('utf-8');
//
// 接下来, 设定一些分词参数或选项, set_dict 是必须的, 若想智能识别人名等需要 set_rule
//
// 包括: set_charset, set_dict, set_rule, set_ignore, set_multi, set_debug, set_duality ... 等方法
$pscws->set_charset('utf-8');
$pscws->set_rule(dirname(__FILE__) . '/cloudsearch/rules.utf8.ini');
$pscws->set_dict(dirname(__FILE__) . '/cloudsearch/dict.utf8.xdb');
$pscws->send_text($keyword);
while ($some = $pscws->get_result()) {
    foreach ($some as $word) {
        $words[] = $word['word'];
    }
}
$where = "ishidden=0";
foreach ($words as $k => $v) {
Example #21
0
function get_keywords3($title, $num = 10)
{
    $title = urldecode_utf8($title);
    $title = trim_html($title, 1);
    if (strlen($title) > 2400) {
        $title = cutstr($title, 800, '');
    }
    include_once ROOT_PATH . 'web/lib/pscws4/pscws4.class.php';
    $cws = new PSCWS4('utf8');
    $cws->set_dict(ROOT_PATH . 'web/lib/pscws4/dict.utf8.xdb');
    //$cws->set_dict(ROOT_PATH.'web/lib/pscws4/a.xdb');
    $cws->set_rule(ROOT_PATH . 'web/lib/pscws4/rules.ini');
    //$cws->set_multi(3);
    $cws->set_ignore(true);
    //$cws->set_debug(true);
    //$cws->set_duality(true);
    $cws->send_text($title);
    $words = $cws->get_tops(10, 'r,v,p');
    $cws->close();
    $tags = array();
    foreach ($words as $val) {
        $tags[] = $val['word'];
    }
    $tags = implode(',', $tags);
    return $tags;
}