Beispiel #1
0
<?php

header('content-type:text/html;charset:GBK');
ini_set('magic_quotes_gpc', 0);
//check the charset
if (rb_charset() != "GBK") {
    $_str = "Error: GBK charset required. <br />";
    $_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />";
    $_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />";
    exit($_str);
}
$text = '';
$_timer = 0;
$_act = '';
$_cfg = array('mode' => RB_CMODE);
if (isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split') {
    $text =& $_POST['text'];
    $_cfg =& $_POST['config'];
    if (!isset($_cfg['add_syn'])) {
        $_cfg['add_syn'] = 0;
    }
    if (!isset($_cfg['clr_stw'])) {
        $_cfg['clr_stw'] = 0;
    }
    if (!isset($_cfg['keep_urec'])) {
        $_cfg['keep_urec'] = 0;
    }
    if (!isset($_cfg['spx_out'])) {
        $_cfg['spx_out'] = 0;
    }
    if (!isset($_cfg['en_sseg'])) {
Beispiel #2
0
header("Content-Type:text/html;charset=utf-8;");
echo "constant access:<br />";
echo "complex mode: " . RB_CMODE . ", simple mode: " . RB_SMODE . "<br />";
echo "rb_ucode_utf8(20013)=" . rb_ucode_utf8(20013) . "<br />";
echo "rb_utf8_ucode(中)=" . rb_utf8_ucode("中") . "<br />";
echo "rb_utf8_bytes(中)=" . rb_utf8_bytes("中") . "<p />";
echo "词库函数:<br />";
echo "rb_dic_exist(研究) ? " . rb_dic_exist(RB_LEX_CJK, "研究") . "<br />";
$_entry = rb_dic_get(RB_LEX_CJK, "你");
echo "rb_dic_get(你):<br />";
echo "|——length: " . $_entry["length"] . ", freq: " . $_entry["freq"] . "<p />";
//version about.
echo "friso_version(): ", friso_version(), "<br />";
echo "rb_version(): ", rb_version(), ", rb_charset(): ", rb_charset(), "<p />";
echo "分词函数:<br />";
if (rb_charset() == 'UTF-8') {
    $_str = "研究生命起源,robbe高性能php中文分词组件。";
    echo "rb_split(\"" . $_str . "\"):<br />";
    //API:
    //rb_split(string, Array, [long])
    //1.string: 要被切分的字符串。
    //2.Array: 配置选项,使用NULL来选择默认的配置(friso.ini中的配置)。
    //3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs
    //1.完整的配置:
    //array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1,
    //	'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>RB_CMODE);
    //1.在不了解friso内核的情况下, 请不要随便更改nthreshold
    //2.使用NULL来使用php.ini中指定的friso.ini文件中的配置
    //2.返回选项:
    //词条: RB_RET_WORD, 类别:RB_RET_TYPE, 长度:RB_RET_LENGTH, 真实长度:RB_RET_RLEN, 偏移量:RB_RET_OFF
    //词性:RB_RET_POS(待实现)
Beispiel #3
0
<?php

header('content-type:text/html;charset:utf-8');
ini_set('magic_quotes_gpc', 0);
//charset check.
if (rb_charset() != "UTF-8") {
    $_str = "Error: UTF-8 charset required. <br />";
    $_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />";
    $_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />";
    exit($_str);
}
$text = '';
$_timer = 0;
$_act = '';
$_cfg = array('mode' => RB_CMODE);
if (isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split') {
    $text =& $_POST['text'];
    $_cfg =& $_POST['config'];
    if (!isset($_cfg['add_syn'])) {
        $_cfg['add_syn'] = 0;
    }
    if (!isset($_cfg['clr_stw'])) {
        $_cfg['clr_stw'] = 0;
    }
    if (!isset($_cfg['keep_urec'])) {
        $_cfg['keep_urec'] = 0;
    }
    if (!isset($_cfg['spx_out'])) {
        $_cfg['spx_out'] = 0;
    }
    if (!isset($_cfg['en_sseg'])) {