public static function segments($arr, $num = 10)
 {
     $list = array();
     if (empty($text)) {
         return $list;
     }
     $words = array();
     //检测是否已安装php_scws扩展
     if (function_exists("scws_open")) {
         $sh = scws_open();
         scws_set_charset($sh, 'utf8');
         scws_set_dict($sh, APP_ROOT_PATH . 'system/scws/dict.utf8.xdb');
         scws_set_rule($sh, APP_ROOT_PATH . 'system/scws/rules.utf8.ini');
         scws_set_ignore($sh, true);
         foreach ($arr as $key => $text) {
             scws_send_text($sh, $text);
             $words[] = scws_get_tops($sh, $num);
         }
         scws_close($sh);
     } else {
         require_once APP_ROOT_PATH . 'system/scws/pscws4.class.php';
         $pscws = new PSCWS4();
         $pscws->set_dict(APP_ROOT_PATH . 'system/scws/dict.utf8.xdb');
         $pscws->set_rule(APP_ROOT_PATH . 'system/scws/rules.utf8.ini');
         $pscws->set_ignore(true);
         foreach ($arr as $key => $text) {
             $pscws->send_text($text);
             $words[] = $pscws->get_tops($num);
         }
         $pscws->close();
     }
     for ($i = 0; $i < $num; $i++) {
         foreach ($words as $item) {
             if (isset($item[$i])) {
                 $word = $item[$i]['word'];
                 if (isset($list[$word])) {
                     $list[$word]++;
                 } else {
                     $list[$word] = 1;
                 }
             }
         }
     }
     $list = array_slice($list, 0, $num);
     return array_keys($list);
 }
Example #2
0
 /**
  * get tops content of the input, get number of word from return
  * @param string $content input content
  * @param int $tops number of words
  */
 public static function keywords($content, $type = '', $tops = 10)
 {
     $obj = scws_open();
     //load dic path and rules
     $dicpath = empty($type) ? C('dictionay.defaultdic') : C('dictionay.' . $type);
     $rules = C('dictionay.defaultrule');
     if (empty($dicpath) || empty($rules)) {
         throw_exception("Load dictionary node: dic file Error");
     }
     //var_dump($type,$dicpath);
     scws_set_charset($obj, 'utf8');
     scws_set_dict($obj, $dicpath, SCWS_XDICT_TXT);
     scws_set_duality($obj, true);
     scws_set_rule($obj, $rules);
     scws_send_text($obj, $content);
     return scws_get_tops($obj, $tops);
 }
Example #3
0
<?php

require 'global.php';
require '../include/db_config.php';
set_time_limit(0);
$con = @mysql_connect($dbhost, $dbuser, $dbpw) or die('error');
mysql_select_db($dbname);
mysql_query('set names gbk');
while (1) {
    $sql = 'select link_id,title,keywords,description,fulltxt from ve123_links where key_status=0 limit 0,10';
    $result = mysql_query($sql);
    $num = mysql_num_rows($result);
    if ($num == 0) {
        die('“创建索引” 完成');
    }
    while ($row = mysql_fetch_assoc($result)) {
        $content = strip_tags(htmlspecialchars_decode($row['title'] . $row['keywords'] . $row['description'] . $row['fulltxt']));
        $content = preg_replace("/[\\s  ]+/i", '', $content);
        $content = str_replace('&nbsp;', '', $content);
        $sh = scws_open();
        scws_set_charset($sh, 'gbk');
        scws_send_text($sh, $content);
        $top = scws_get_tops($sh, 20);
        $keyswords = array();
        foreach ($top as $keys) {
            $sql2 = "insert into ve123_links_keys(keywords,link_id) values('" . $keys['word'] . "','" . $row['link_id'] . "')";
            mysql_query($sql2);
        }
        mysql_query("update ve123_links set key_status=1 where link_id='" . $row['link_id'] . "'");
    }
}
Example #4
0
/**
 * 分词处理,取出词频最高的词组,并可以指定词性进行查找
 *
 * @param string $str            
 * @param int $limit
 *            可选参数,返回的词的最大数量,缺省是 10
 * @param string $attr
 *            可选参数,是一系列词性组成的字符串,各词性之间以半角的逗号隔开, 这表示返回的词性必须在列表中,如果以~开头,则表示取反,词性必须不在列表中,缺省为NULL,返回全部词性,不过滤。
 * @return multitype:
 */
function scwsTop($str, $limit = 10, $attr = null)
{
    if (!function_exists('scws_open')) {
        return false;
    }
    $rst = array();
    $str = preg_replace("/[\\s\t\r\n]+/", '', $str);
    if (!empty($str)) {
        $sh = scws_open();
        scws_set_charset($sh, 'utf8');
        scws_set_ignore($sh, true);
        scws_set_multi($sh, SCWS_MULTI_SHORT | SCWS_MULTI_DUALITY);
        scws_set_duality($sh, true);
        scws_send_text($sh, $str);
        $rst = scws_get_tops($sh, $limit, $attr);
        scws_close($sh);
    }
    return $rst;
}
Example #5
0
File: scws.php Project: lamphp/scws
著作权和版权在我国是同一个概念,是法律赋予作品创作者的专有权利。所谓专有权利就是没有经过权利人许可又不是法律规定的例外,要使用这个作品,就必须经过作者授权,没有授权就是侵权。

一九八零年春天
EOF;
$cws = scws_open();
scws_set_charset($cws, "utf8");
scws_set_dict($cws, ini_get('scws.default.fpath') . '/dict.utf8.xdb');
scws_set_rule($cws, ini_get('scws.default.fpath') . '/rules.utf8.ini');
//scws_set_ignore($cws, true);
//scws_set_multi($cws, true);
scws_send_text($cws, $text);
echo "\n";
// top words
printf("No. WordString               Attr  Weight(times)\n");
printf("-------------------------------------------------\n");
$list = scws_get_tops($cws, 10, "~v");
$cnt = 1;
foreach ($list as $tmp) {
    printf("%02d. %-24.24s %-4.2s  %.2f(%d)\n", $cnt, $tmp['word'], $tmp['attr'], $tmp['weight'], $tmp['times']);
    $cnt++;
}
echo "\n\n-------------------------------------------------\n";
// segment
while ($res = scws_get_result($cws)) {
    foreach ($res as $tmp) {
        if ($tmp['len'] == 1 && $tmp['word'] == "\r") {
            continue;
        }
        if ($tmp['len'] == 1 && $tmp['word'] == "\n") {
            echo $tmp['word'];
        } else {
Example #6
0
<?php

require 'global.php';
require '../include/db_config.php';
set_time_limit(0);
$con = @mysql_connect($dbhost, $dbuser, $dbpw) or die('error');
mysql_select_db($dbname);
mysql_query('set names gbk');
while (1) {
    $sql = 'select kid,keyword from ve123_search_keyword where ks=0 limit 0,10';
    $result = mysql_query($sql);
    $num = mysql_num_rows($result);
    if ($num == 0) {
        die('“生成分词” 完成');
    }
    while ($row = mysql_fetch_assoc($result)) {
        $content = strip_tags(htmlspecialchars_decode($row['keyword']));
        $content = preg_replace("/[\\s  ]+/i", '', $content);
        $content = str_replace('&nbsp;', '', $content);
        $sh = scws_open();
        scws_send_text($sh, $content);
        $top = scws_get_tops($sh, 3);
        $keyswords = array();
        foreach ($top as $keys) {
            $sql2 = "insert into ve123_search_keys(keyscn,kid) values('" . $keys['word'] . "','" . $row['kid'] . "')";
            mysql_query($sql2);
        }
        $sq = 'update ve123_search_keyword set ks=1 where kid=' . $row['kid'];
        mysql_query($sq);
    }
}
Example #7
0
 private function _do($content, $len = 20)
 {
     scws_send_text($this->_scws, $content);
     $words = scws_get_tops($this->_scws, $len);
     return $words;
 }