$query = "SELECT arc.*, addt.* From `#@__archives` arc LEFT JOIN `#@__addonarticle` addt ON addt.aid=arc.id WHERE arc.id in({$arcids}) AND arc.channel=1 "; $dsql->SetQuery($query); $dsql->Execute(); while ($row = $dsql->GetArray()) { //跳过已经有关键字的内容 if (trim($row['keywords']) != '') { continue; } $aid = $row['id']; $keywords = ''; $title = $row['title']; $description = $row['description']; $body = cn_substr($row['body'], 5000); $sp->SetSource($title, $cfg_soft_lang, $cfg_soft_lang); $sp->StartAnalysis(); $titleindexs = preg_replace("/#p#|#e#/", '', $sp->GetFinallyIndex()); $sp->SetSource(Html2Text($body), $cfg_soft_lang, $cfg_soft_lang); $sp->StartAnalysis(); $allindexs = preg_replace("/#p#|#e#/", '', $sp->GetFinallyIndex()); if (is_array($allindexs) && is_array($titleindexs)) { foreach ($titleindexs as $k => $v) { if (strlen($keywords . $k) >= 60) { break; } else { if (strlen($k) <= 2) { continue; } $keywords .= $k . ','; } } foreach ($allindexs as $k => $v) {
/** * 获得关键字的分词结果,并保存到数据库 * * @access public * @param string $keyword 关键词 * @return string */ function GetKeywords($keyword) { global $cfg_soft_lang; $keyword = cn_substr($keyword, 50); $row = $this->dsql->GetOne("SELECT spwords FROM `#@__search_keywords` WHERE keyword='" . addslashes($keyword) . "'; "); if (!is_array($row)) { if (strlen($keyword) > 7) { $sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang); $sp->SetSource($keyword, $cfg_soft_lang, $cfg_soft_lang); $sp->SetResultType(2); $sp->StartAnalysis(TRUE); $keywords = $sp->GetFinallyResult(); $idx_keywords = $sp->GetFinallyIndex(); ksort($idx_keywords); $keywords = $keyword . ' '; foreach ($idx_keywords as $key => $value) { if (strlen($key) <= 3) { continue; } $keywords .= ' ' . $key; } $keywords = preg_replace("/[ ]{1,}/", " ", $keywords); //var_dump($idx_keywords);exit(); unset($sp); } else { $keywords = $keyword; } $inquery = "INSERT INTO `#@__search_keywords`(`keyword`,`spwords`,`count`,`result`,`lasttime`)\n VALUES ('" . addslashes($keyword) . "', '" . addslashes($keywords) . "', '1', '0', '" . time() . "'); "; $this->dsql->ExecuteNoneQuery($inquery); } else { $this->dsql->ExecuteNoneQuery("UPDATE `#@__search_keywords` SET count=count+1,lasttime='" . time() . "' WHERE keyword='" . addslashes($keyword) . "'; "); $keywords = $row['spwords']; } return $keywords; }
/** * 处理HTML文本 * 删除非站外链接、自动摘要、自动获取缩略图 * * @access public * @param string $body 内容 * @param string $description 描述 * @param string $litpic 缩略图 * @param string $keywords 关键词 * @param string $dtype 类型 * @return string */ function AnalyseHtmlBody($body, &$description, &$litpic, &$keywords, $dtype = '') { global $autolitpic, $remote, $dellink, $autokey, $cfg_basehost, $cfg_auot_description, $id, $title, $cfg_soft_lang; $autolitpic = empty($autolitpic) ? '' : $autolitpic; $body = stripslashes($body); //远程图片本地化 if ($remote == 1) { $body = GetCurContent($body); } //删除非站内链接 if ($dellink == 1) { $allow_urls = array($_SERVER['HTTP_HOST']); // 读取允许的超链接设置 if (file_exists(DEDEDATA . "/admin/allowurl.txt")) { $allow_urls = array_merge($allow_urls, file(DEDEDATA . "/admin/allowurl.txt")); } $body = Replace_Links($body, $allow_urls); } //自动摘要 if ($description == '' && $cfg_auot_description > 0) { $description = cn_substr(html2text($body), $cfg_auot_description); $description = trim(preg_replace('/#p#|#e#/', '', $description)); $description = addslashes($description); } //自动获取缩略图 if ($autolitpic == 1 && $litpic == '') { $litpic = GetDDImgFromBody($body); } //自动获取关键字 if ($autokey == 1 && $keywords == '') { $subject = $title; $message = $body; include_once DEDEINC . '/splitword.class.php'; $keywords = ''; $sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang); $sp->SetSource($subject, $cfg_soft_lang, $cfg_soft_lang); $sp->StartAnalysis(); $titleindexs = preg_replace("/#p#|#e#/", '', $sp->GetFinallyIndex()); $sp->SetSource(Html2Text($message), $cfg_soft_lang, $cfg_soft_lang); $sp->StartAnalysis(); $allindexs = preg_replace("/#p#|#e#/", '', $sp->GetFinallyIndex()); if (is_array($allindexs) && is_array($titleindexs)) { foreach ($titleindexs as $k => $v) { if (strlen($keywords . $k) >= 60) { break; } else { if (strlen($k) <= 2) { continue; } $keywords .= $k . ','; } } foreach ($allindexs as $k => $v) { if (strlen($keywords . $k) >= 60) { break; } else { if (!in_array($k, $titleindexs)) { if (strlen($k) <= 2) { continue; } $keywords .= $k . ','; } } } } $sp = null; } $body = GetFieldValueA($body, $dtype, $id); $body = addslashes($body); return $body; }
if ($limitSql != '') { $fquery = "SELECT arc.id,arc.title,arc.keywords,addon.body FROM `#@__archives` arc\n LEFT JOIN `#@__addonarticle` addon ON addon.aid=arc.id WHERE arc.channel='1' {$limitSql} "; $dsql->SetQuery($fquery); $dsql->Execute(); $sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang); while ($row = $dsql->GetObject()) { if ($row->keywords != '') { continue; } $tjnum++; $id = $row->id; $keywords = ""; $sp->SetSource($row->title, $cfg_soft_lang, $cfg_soft_lang); $sp->SetResultType(2); $sp->StartAnalysis(TRUE); $titleindexs = $sp->GetFinallyIndex(); $sp->SetSource(Html2Text($row->body), $cfg_soft_lang, $cfg_soft_lang); $sp->SetResultType(2); $sp->StartAnalysis(TRUE); $allindexs = $sp->GetFinallyIndex(); if (is_array($allindexs) && is_array($titleindexs)) { foreach ($titleindexs as $k => $v) { if (strlen($keywords) >= 30) { break; } else { if (strlen($k) <= 2) { continue; } $keywords .= $k . ","; } }