示例#1
0
<?php

header('content-type:text/html;charset:utf-8');
ini_set('magic_quotes_gpc', 0);
//charset check.
if (friso_charset() != "UTF-8") {
    $_str = "Error: UTF-8 charset required. <br />";
    $_str .= "1. Modified friso.charset = 0 in your friso.ini .<br />";
    $_str .= "2. Modified friso.lex_dir = UTF-8 lexicon abusolute path to load your UTF-8 lexicon. <br />";
    exit($_str);
}
$text = '';
$_timer = 0;
$_act = '';
$_cfg = array('mode' => FRISO_COMPLEX);
if (isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split') {
    $text =& $_POST['text'];
    $_cfg =& $_POST['config'];
    if (!isset($_cfg['add_syn'])) {
        $_cfg['add_syn'] = 0;
    }
    if (!isset($_cfg['clr_stw'])) {
        $_cfg['clr_stw'] = 0;
    }
    if (!isset($_cfg['keep_urec'])) {
        $_cfg['keep_urec'] = 0;
    }
    if (!isset($_cfg['spx_out'])) {
        $_cfg['spx_out'] = 0;
    }
    if (!isset($_cfg['en_sseg'])) {
示例#2
0
header("Content-Type:text/html;charset=utf-8;");
echo "constant access:<br />";
echo "complex mode: " . FRISO_COMPLEX . ", simple mode: " . FRISO_SIMPLE . "<br />";
echo "friso_ucode_utf8(20013)=" . friso_ucode_utf8(20013) . "<br />";
echo "friso_utf8_ucode(中)=" . friso_utf8_ucode("中") . "<br />";
echo "friso_utf8_bytes(中)=" . friso_utf8_bytes("中") . "<p />";
echo "词库函数:<br />";
echo "friso_dic_exist(研究) ? " . friso_dic_exist(FRISO_LEX_CJK, "研究") . "<br />";
$_entry = friso_dic_get(FRISO_LEX_CJK, "你");
echo "friso_dic_get(你):<br />";
echo "|——length: " . $_entry["length"] . ", freq: " . $_entry["freq"] . "<p />";
//version about.
echo "friso_version(): ", friso_version(), ", friso_charset(): ", friso_charset(), "<p />";
echo "分词函数:<br />";
if (friso_charset() == 'UTF-8') {
    $_str = "歧义和同义词:研究生命起源,混合词: 做B超检查身体,x射线本质是什么,今天去奇都ktv唱卡拉ok去,哆啦a梦是一个动漫中的主角,单位和全角: 2009年8月6日开始大学之旅,岳阳今天的气温为38.6℃, 也就是101.48℉, 英文数字: bug report chenxin619315@gmail.com or visit http://code.google.com/p/jcseg, we all admire the hacker spirit!特殊数字: ① ⑩ ⑽ ㈩.";
    echo "<p>friso_split(\"" . $_str . "\"):<p />";
    //API:
    //rb_split(string, Array, [long])
    //1.string: 要被切分的字符串。
    //2.Array: 配置选项,使用NULL来选择默认的配置(friso.ini中的配置)。
    //3.long: 可选参数,自定义切分返回选项,查看下面的$_rargs
    //1.完整的配置:
    //array('max_len'=>5, 'r_name'=>0, 'mix_len'=>2, 'lna_len'=>1, 'add_syn'=>1,
    //    'clr_stw'=>1, 'keep_urec'=>0, 'spx_out'=>0, 'en_sseg'=> 1, 'st_minl'=>2, 'kpuncs'=>'.+#', 'mode'=>FRISO_COMPLEX);
    //1.在不了解friso内核的情况下, 请不要随便更改nthreshold
    //2.使用NULL来使用php.ini中指定的friso.ini文件中的配置
    //2.返回选项:
    //词条: FRISO_RET_WORD, 类别:FRISO_RET_TYPE, 长度:FRISO_RET_LENGTH, 真实长度:FRISO_RET_RLEN, 偏移量:FRISO_RET_OFF
    //词性:FRISO_RET_POS(待实现)
示例#3
0
<?php

header('content-type:text/html;charset:GBK');
ini_set('magic_quotes_gpc', 0);
//check the charset
if (friso_charset() != "GBK") {
    $_str = "Error: GBK charset required. <br />";
    $_str .= "1. Modified friso.charset = 1 in your friso.ini .<br />";
    $_str .= "2. Modified friso.lex_dir = GBK lexicon abusolute path to load your GBK lexicon. <br />";
    exit($_str);
}
$text = '';
$_timer = 0;
$_act = '';
$_cfg = array('mode' => FRISO_COMPLEX);
if (isset($_POST['_act']) && ($_act = $_POST['_act']) == 'split') {
    $text =& $_POST['text'];
    $_cfg =& $_POST['config'];
    if (!isset($_cfg['add_syn'])) {
        $_cfg['add_syn'] = 0;
    }
    if (!isset($_cfg['clr_stw'])) {
        $_cfg['clr_stw'] = 0;
    }
    if (!isset($_cfg['keep_urec'])) {
        $_cfg['keep_urec'] = 0;
    }
    if (!isset($_cfg['spx_out'])) {
        $_cfg['spx_out'] = 0;
    }
    if (!isset($_cfg['en_sseg'])) {