예제 #1
0
파일: setbwd.php 프로젝트: jechiy/PHPWind
/**
 * @desc 生成总字典文件
 */
function setAllDictionary()
{
    global $db, $score;
    L::loadClass('filterutil', 'filter', false);
    $bin_file = D_P . 'data/bbscache/dict_all.dat';
    $source_file = D_P . 'data/bbscache/dict_all.txt';
    if (!file_exists($bin_file) && !file_exists($source_file)) {
        pwCache::setData($source_file, '');
        //文本形式字典
        pwCache::setData($bin_file, '');
        //二进制字典
    }
    $classid = getCloseClass();
    $classid = S::sqlImplode($classid);
    $querys = $db->query("SELECT word, type, wordreplace FROM pw_wordfb WHERE classid NOT IN ({$classid})");
    $content = "";
    while ($value = $db->fetch_array($querys)) {
        $weighing = $score[$value['type']];
        $wordreplace = $value['type'] == 3 ? '|' . $value['wordreplace'] : '';
        $value['word'] = strtolower($value['word']);
        $content .= "" . $value['word'] . "|" . $weighing . $wordreplace . "\r\n";
    }
    pwCache::setData($source_file, $content);
    //文本形式字典
    pwCache::setData($bin_file, '');
    //二进制字典
    //更新二进制字典
    $trie = new Trie();
    $trie->build($source_file, $bin_file);
}
예제 #2
0
/**
 * @desc 检测字典文件
 */
function checkDictionary()
{
    global $db, $score;
    require_once R_P . "lib/filterutil.class.php";
    $classid = getCloseClass();
    $db_dictlist = array();
    $dict_folder = D_P . 'data/bbscache/';
    //读取所有敏感词时间戳
    $sql = "SELECT wordtime FROM pw_wordfb WHERE classid NOT IN ({$classid}) GROUP BY wordtime";
    $query = $db->query($sql);
    while ($rt = $db->fetch_array($query)) {
        //字典名称
        $title = $rt['wordtime'];
        $db_dictlist[] = $title;
        $bin_path = 'dict_' . $title . '.php';
        $source_path = 'dict_' . $title . '.txt';
        $bin_file = $dict_folder . $bin_path;
        $source_file = $dict_folder . $source_path;
        //查询该时间戳是否有字典,如果没有就插入记录
        $sql = "SELECT id FROM pw_filter_dictionary WHERE title =" . $title;
        $dictionary = $db->get_value($sql);
        if (!$dictionary) {
            //处理数据
            $value = array('title' => $title, 'bin' => $bin_path, 'source' => $source_path);
            $value = pwSqlSingle($value);
            //插入记录
            $sql = "INSERT INTO pw_filter_dictionary SET  {$value} ";
            $db->update($sql);
        }
        //查找字典所包含的敏感词
        $querys = $db->query("SELECT word, type FROM pw_wordfb WHERE classid NOT IN ({$classid}) AND wordtime =" . pwEscape($title));
        $content = "";
        while ($value = $db->fetch_array($querys)) {
            $weighing = $score[$value['type']];
            $content .= "" . $value['word'] . "|" . $weighing . "\r\n";
        }
        //更新字典
        if ($content) {
            writeover($source_file, $content);
            //文本形式字典
            writeover($bin_file, '');
            //二进制字典
            //更新二进制字典
            $trie = new Trie();
            $trie->build($source_file, $bin_file);
        }
    }
    foreach ($db_dictlist as $key => $value) {
        $dict_str .= $dict_str ? ", " . pwEscape($value) : pwEscape($value);
    }
    if (!$dict_str) {
        $dict_str = -1;
    }
    //删除多余字典记录
    $sql = "DELETE FROM pw_filter_dictionary WHERE title NOT IN ({$dict_str})";
    $db->update($sql);
    $files = glob($dict_folder . 'dict_*.php');
    $db_dictlist[] = 'all';
    $db_dictlist = array_flip($db_dictlist);
    foreach ($files as $value) {
        $title = substr($value, strpos($value, '_') + 1);
        $title = substr($title, 0, strpos($title, '.'));
        if (!array_key_exists($title, $db_dictlist)) {
            $bin_path = 'dict_' . $title . '.php';
            $source_path = 'dict_' . $title . '.txt';
            $bin_file = $dict_folder . $bin_path;
            $source_file = $dict_folder . $source_path;
            //删除多余文件
            deldir($bin_file);
            deldir($source_file);
        }
    }
}