/** * @desc 检测字典文件 */ function checkDictionary() { global $db, $score; require_once R_P . "lib/filterutil.class.php"; $classid = getCloseClass(); $db_dictlist = array(); $dict_folder = D_P . 'data/bbscache/'; //读取所有敏感词时间戳 $sql = "SELECT wordtime FROM pw_wordfb WHERE classid NOT IN ({$classid}) GROUP BY wordtime"; $query = $db->query($sql); while ($rt = $db->fetch_array($query)) { //字典名称 $title = $rt['wordtime']; $db_dictlist[] = $title; $bin_path = 'dict_' . $title . '.php'; $source_path = 'dict_' . $title . '.txt'; $bin_file = $dict_folder . $bin_path; $source_file = $dict_folder . $source_path; //查询该时间戳是否有字典,如果没有就插入记录 $sql = "SELECT id FROM pw_filter_dictionary WHERE title =" . $title; $dictionary = $db->get_value($sql); if (!$dictionary) { //处理数据 $value = array('title' => $title, 'bin' => $bin_path, 'source' => $source_path); $value = pwSqlSingle($value); //插入记录 $sql = "INSERT INTO pw_filter_dictionary SET {$value} "; $db->update($sql); } //查找字典所包含的敏感词 $querys = $db->query("SELECT word, type FROM pw_wordfb WHERE classid NOT IN ({$classid}) AND wordtime =" . pwEscape($title)); $content = ""; while ($value = $db->fetch_array($querys)) { $weighing = $score[$value['type']]; $content .= "" . $value['word'] . "|" . $weighing . "\r\n"; } //更新字典 if ($content) { writeover($source_file, $content); //文本形式字典 writeover($bin_file, ''); //二进制字典 //更新二进制字典 $trie = new Trie(); $trie->build($source_file, $bin_file); } } foreach ($db_dictlist as $key => $value) { $dict_str .= $dict_str ? ", " . pwEscape($value) : pwEscape($value); } if (!$dict_str) { $dict_str = -1; } //删除多余字典记录 $sql = "DELETE FROM pw_filter_dictionary WHERE title NOT IN ({$dict_str})"; $db->update($sql); $files = glob($dict_folder . 'dict_*.php'); $db_dictlist[] = 'all'; $db_dictlist = array_flip($db_dictlist); foreach ($files as $value) { $title = substr($value, strpos($value, '_') + 1); $title = substr($title, 0, strpos($title, '.')); if (!array_key_exists($title, $db_dictlist)) { $bin_path = 'dict_' . $title . '.php'; $source_path = 'dict_' . $title . '.txt'; $bin_file = $dict_folder . $bin_path; $source_file = $dict_folder . $source_path; //删除多余文件 deldir($bin_file); deldir($source_file); } } }
/** * @desc 生成总字典文件 */ function setAllDictionary() { global $db, $score; L::loadClass('filterutil', 'filter', false); $bin_file = D_P . 'data/bbscache/dict_all.dat'; $source_file = D_P . 'data/bbscache/dict_all.txt'; if (!file_exists($bin_file) && !file_exists($source_file)) { pwCache::setData($source_file, ''); //文本形式字典 pwCache::setData($bin_file, ''); //二进制字典 } $classid = getCloseClass(); $classid = S::sqlImplode($classid); $querys = $db->query("SELECT word, type, wordreplace FROM pw_wordfb WHERE classid NOT IN ({$classid})"); $content = ""; while ($value = $db->fetch_array($querys)) { $weighing = $score[$value['type']]; $wordreplace = $value['type'] == 3 ? '|' . $value['wordreplace'] : ''; $value['word'] = strtolower($value['word']); $content .= "" . $value['word'] . "|" . $weighing . $wordreplace . "\r\n"; } pwCache::setData($source_file, $content); //文本形式字典 pwCache::setData($bin_file, ''); //二进制字典 //更新二进制字典 $trie = new Trie(); $trie->build($source_file, $bin_file); }
/** * 构建字典 * @param $path 序列化后字典存放路径 * @return $return int 构造成功后返回的行数 */ function buildDict($path = null) { if ($path == null) { $path = array('bin' => $this->dict_bin_path, 'source' => $this->dict_source_path); } $trie = new Trie($path); $return = $trie->build(); return $return; }