<?php header('content-type:text/html;charset:GBK'); ini_set('magic_quotes_gpc', 0); //check the charset if (rb_charset() != "GBK") { $_str = "Error: GBK charset required. <br />"; $_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />"; $_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />"; exit($_str); } $text = ''; $_timer = 0; $_act = ''; $_cfg = array('mode' => RB_CMODE); if (isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split') { $text =& $_POST['text']; $_cfg =& $_POST['config']; if (!isset($_cfg['add_syn'])) { $_cfg['add_syn'] = 0; } if (!isset($_cfg['clr_stw'])) { $_cfg['clr_stw'] = 0; } if (!isset($_cfg['keep_urec'])) { $_cfg['keep_urec'] = 0; } if (!isset($_cfg['spx_out'])) { $_cfg['spx_out'] = 0; } if (!isset($_cfg['en_sseg'])) {
header("Content-Type:text/html;charset=utf-8;"); echo "constant access:<br />"; echo "complex mode: " . RB_CMODE . ", simple mode: " . RB_SMODE . "<br />"; echo "rb_ucode_utf8(20013)=" . rb_ucode_utf8(20013) . "<br />"; echo "rb_utf8_ucode(中)=" . rb_utf8_ucode("中") . "<br />"; echo "rb_utf8_bytes(中)=" . rb_utf8_bytes("中") . "<p />"; echo "词库函数:<br />"; echo "rb_dic_exist(研究) ? " . rb_dic_exist(RB_LEX_CJK, "研究") . "<br />"; $_entry = rb_dic_get(RB_LEX_CJK, "你"); echo "rb_dic_get(你):<br />"; echo "|——length: " . $_entry["length"] . ", freq: " . $_entry["freq"] . "<p />"; //version about. echo "friso_version(): ", friso_version(), "<br />"; echo "rb_version(): ", rb_version(), ", rb_charset(): ", rb_charset(), "<p />"; echo "分词函数:<br />"; if (rb_charset() == 'UTF-8') { $_str = "研究生命起源,robbe高性能php中文分词组件。"; echo "rb_split(\"" . $_str . "\"):<br />"; //API: //rb_split(string, Array, [long]) //1.string: 要被切分的字符串。 //2.Array: 配置选项,使用NULL来选择默认的配置(friso.ini中的配置)。 //3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs //1.完整的配置: //array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1, // 'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>RB_CMODE); //1.在不了解friso内核的情况下, 请不要随便更改nthreshold //2.使用NULL来使用php.ini中指定的friso.ini文件中的配置 //2.返回选项: //词条: RB_RET_WORD, 类别:RB_RET_TYPE, 长度:RB_RET_LENGTH, 真实长度:RB_RET_RLEN, 偏移量:RB_RET_OFF //词性:RB_RET_POS(待实现)
<?php header('content-type:text/html;charset:utf-8'); ini_set('magic_quotes_gpc', 0); //charset check. if (rb_charset() != "UTF-8") { $_str = "Error: UTF-8 charset required. <br />"; $_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />"; $_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />"; exit($_str); } $text = ''; $_timer = 0; $_act = ''; $_cfg = array('mode' => RB_CMODE); if (isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split') { $text =& $_POST['text']; $_cfg =& $_POST['config']; if (!isset($_cfg['add_syn'])) { $_cfg['add_syn'] = 0; } if (!isset($_cfg['clr_stw'])) { $_cfg['clr_stw'] = 0; } if (!isset($_cfg['keep_urec'])) { $_cfg['keep_urec'] = 0; } if (!isset($_cfg['spx_out'])) { $_cfg['spx_out'] = 0; } if (!isset($_cfg['en_sseg'])) {