Beispiel #1
0
 //2.Array: 配置选项,使用NULL来选择默认的配置(friso.ini中的配置)。
 //3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs
 //1.完整的配置:
 //array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1,
 //	'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>RB_CMODE);
 //1.在不了解friso内核的情况下, 请不要随便更改nthreshold
 //2.使用NULL来使用php.ini中指定的friso.ini文件中的配置
 //2.返回选项:
 //词条: RB_RET_WORD, 类别:RB_RET_TYPE, 长度:RB_RET_LENGTH, 真实长度:RB_RET_RLEN, 偏移量:RB_RET_OFF
 //词性:RB_RET_POS(待实现)
 $_rargs = RB_RET_TYPE | RB_RET_LEN | RB_RET_RLEN | RB_RET_OFF | RB_RET_POS;
 //$_rargs = 0;
 //3.切分类别:
 //CJK词条:RB_TYP_CJK, 英中混合词(b超):RB_TYP_ECM,中英混合词(卡拉ok):RB_TYP_CEM,
 //英文标点混合词(c++):RB_TYP_EPUN,标点:RB_TYP_PUN,未知类别:RB_TYP_UNK,其他类别(同义词):RB_TYP_OTR
 $_result = rb_split($_str, array('mode' => RB_CMODE), $_rargs);
 unset($_str);
 foreach ($_result as $_val) {
     $_str = $_val['word'];
     if ($_rargs != 0) {
         $_str .= '[';
         if (($_rargs & RB_RET_TYPE) != 0) {
             $_str .= ', type: ' . $_val['type'];
         }
         //获取词条类别
         if (($_rargs & RB_RET_LEN) != 0) {
             $_str .= ', len: ' . $_val['len'];
         }
         //词条长度
         if (($_rargs & RB_RET_RLEN) != 0) {
             $_str .= ', rlen: ' . $_val['rlen'];
Beispiel #2
0
        $_cfg['add_syn'] = 0;
    }
    if (!isset($_cfg['clr_stw'])) {
        $_cfg['clr_stw'] = 0;
    }
    if (!isset($_cfg['keep_urec'])) {
        $_cfg['keep_urec'] = 0;
    }
    if (!isset($_cfg['spx_out'])) {
        $_cfg['spx_out'] = 0;
    }
    if (!isset($_cfg['en_sseg'])) {
        $_cfg['en_sseg'] = 0;
    }
    $s_time = timer();
    $_ret = rb_split($text, $_cfg);
    $_timer = timer() - $s_time;
}
function timer()
{
    list($msec, $sec) = explode(' ', microtime());
    return (double) $msec + (double) $sec;
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

<head>
	<title>GBK - robbe分词测试程序 </title>
	<meta http-equiv="content-type" content="text/html;charset=GBK" />