$description = stripslashes(cn_substr(html2text($body),$cfg_auot_description)); $description = trim(preg_replace("/#p#|#e#/","",$description)); $description = addslashes($description); } //把内容中远程的图片资源本地化 //------------------------------------ if($cfg_isUrlOpen && $remote==1){ $body = GetCurContent($body); } //自动获取关键字 //---------------------------------- if($autokey==1){ require_once(DEDEADMIN."/../include/pub_splitword_www.php"); $keywords = ""; $sp = new SplitWord(); $titleindexs = explode(" ",trim($sp->GetIndexText($sp->SplitRMM($title)))); $allindexs = explode(" ",trim($sp->GetIndexText($sp->SplitRMM(Html2Text($body)),200))); if(is_array($allindexs) && is_array($titleindexs)){ foreach($titleindexs as $k){ if(strlen($keywords)>=50) break; else $keywords .= $k." "; } foreach($allindexs as $k){ if(strlen($keywords)>=50) break; else if(!in_array($k,$titleindexs)) $keywords .= $k." "; } } $sp->Clear(); unset($sp); $keywords = preg_replace("/#p#|#e#/","",$keywords); $keywords = addslashes($keywords);
$dsql->Execute(); while ($row = $dsql->GetArray()) { //跳过已经有关键字的内容 if (trim($row['keywords']) != '') { continue; } $aid = $row['id']; $keywords = ''; $title = $row['title']; $description = $row['description']; $body = cn_substr($row['body'], 5000); if ($cfg_soft_lang == 'utf-8') { $title = utf82gb($title); $body = utf82gb($body); } $titleindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText($title))); $allindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText(Html2Text($body), 500))); if (is_array($allindexs) && is_array($titleindexs)) { foreach ($titleindexs as $k) { if (strlen($keywords . $k) >= 30) { break; } else { $keywords .= $k . ','; } } foreach ($allindexs as $k) { if (strlen($keywords . $k) >= 30) { break; } else { if (!in_array($k, $titleindexs)) { $keywords .= $k . ',';
$fquery = "Select arc.id,arc.title,arc.keywords,addon.body From `#@__archives` arc\r\n\t left join `#@__addonarticle` addon on addon.aid=arc.id where arc.channel='1' {$limitSql} "; $dsql->SetQuery($fquery); $dsql->Execute(); $sp = new SplitWord(); while ($row = $dsql->GetObject()) { if ($row->keywords != '') { continue; } $tjnum++; $id = $row->id; $keywords = ""; if ($cfg_soft_lang == 'utf-8') { $row->title = utf82gb($row->title); $row->body = utf82gb($row->body); } $titleindexs = explode(' ', trim($sp->GetIndexText($row->title))); $allindexs = explode(' ', trim($sp->GetIndexText(Html2Text($row->body), 500))); if (is_array($allindexs) && is_array($titleindexs)) { foreach ($titleindexs as $k) { if (strlen($keywords) >= 30) { break; } else { $keywords .= $k . ","; } } foreach ($allindexs as $k) { if (strlen($keywords) >= 30) { break; } else { if (!in_array($k, $titleindexs)) { $keywords .= $k . ",";
function AnalyseHtmlBody($body, &$description, &$litpic, &$keywords, $dtype = '') { global $autolitpic, $remote, $dellink, $autokey, $cfg_basehost, $cfg_auot_description, $id, $title, $cfg_soft_lang; $autolitpic = empty($autolitpic) ? '' : $autolitpic; $body = stripslashes($body); //远程图片本地化 if ($remote == 1) { $body = GetCurContent($body); } //删除非站内链接 if ($dellink == 1) { $basehost = "http://" . $_SERVER['HTTP_HOST']; $body = str_replace($cfg_basehost, '#basehost#', $body); $body = str_replace($basehost, '#2basehost2#', $body); $body = preg_replace("/(<a[ \t\r\n]{1,}href=[\"']{0,}http:\\/\\/[^\\/]([^>]*)>)|(<\\/a>)/isU", "", $body); $body = str_replace('#basehost#', $cfg_basehost, $body); $body = str_replace('#2basehost2#', $basehost, $body); } //自动摘要 if ($description == '' && $cfg_auot_description > 0) { $description = cn_substr(html2text($body), $cfg_auot_description); $description = trim(preg_replace('/#p#|#e#/', '', $description)); $description = addslashes($description); } //自动获取缩略图 if ($autolitpic == 1 && $litpic == '') { $litpic = GetDDImgFromBody($body); } //自动获取关键字 if ($autokey == 1 && $keywords == '') { $subject = $title; $message = $body; if ($cfg_soft_lang == 'utf-8') { $subject = utf82gb($title); $message = utf82gb($message); } include_once DEDEINC . '/splitword.class.php'; $keywords = ''; $sp = new SplitWord(); $titleindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText($subject))); $allindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText(Html2Text($message), 500))); if (is_array($allindexs) && is_array($titleindexs)) { foreach ($titleindexs as $k) { if (strlen($keywords . $k) >= 60) { break; } else { $keywords .= $k . ','; } } foreach ($allindexs as $k) { if (strlen($keywords . $k) >= 60) { break; } else { if (!in_array($k, $titleindexs)) { $keywords .= $k . ','; } } } } $sp->Clear(); $sp = null; } $body = GetFieldValueA($body, $dtype, $id); $body = addslashes($body); return $body; }