$description = stripslashes(cn_substr(html2text($body),$cfg_auot_description));
	$description = trim(preg_replace("/#p#|#e#/","",$description));
	$description = addslashes($description);
}
//把内容中远程的图片资源本地化
//------------------------------------
if($cfg_isUrlOpen && $remote==1){
	$body = GetCurContent($body);
}
//自动获取关键字
//----------------------------------
if($autokey==1){
	require_once(DEDEADMIN."/../include/pub_splitword_www.php");
	$keywords = "";
	$sp = new SplitWord();
	$titleindexs = explode(" ",trim($sp->GetIndexText($sp->SplitRMM($title))));
	$allindexs = explode(" ",trim($sp->GetIndexText($sp->SplitRMM(Html2Text($body)),200)));
	if(is_array($allindexs) && is_array($titleindexs)){
		foreach($titleindexs as $k){
			if(strlen($keywords)>=50) break;
			else $keywords .= $k." ";
		}
		foreach($allindexs as $k){
			if(strlen($keywords)>=50) break;
			else if(!in_array($k,$titleindexs)) $keywords .= $k." ";
	  }
	}
	$sp->Clear();
	unset($sp);
	$keywords = preg_replace("/#p#|#e#/","",$keywords);
	$keywords = addslashes($keywords);
Пример #2
0
 $dsql->Execute();
 while ($row = $dsql->GetArray()) {
     //跳过已经有关键字的内容
     if (trim($row['keywords']) != '') {
         continue;
     }
     $aid = $row['id'];
     $keywords = '';
     $title = $row['title'];
     $description = $row['description'];
     $body = cn_substr($row['body'], 5000);
     if ($cfg_soft_lang == 'utf-8') {
         $title = utf82gb($title);
         $body = utf82gb($body);
     }
     $titleindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText($title)));
     $allindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText(Html2Text($body), 500)));
     if (is_array($allindexs) && is_array($titleindexs)) {
         foreach ($titleindexs as $k) {
             if (strlen($keywords . $k) >= 30) {
                 break;
             } else {
                 $keywords .= $k . ',';
             }
         }
         foreach ($allindexs as $k) {
             if (strlen($keywords . $k) >= 30) {
                 break;
             } else {
                 if (!in_array($k, $titleindexs)) {
                     $keywords .= $k . ',';
Пример #3
0
 $fquery = "Select arc.id,arc.title,arc.keywords,addon.body From `#@__archives` arc\r\n\t          left join `#@__addonarticle` addon on addon.aid=arc.id where arc.channel='1' {$limitSql} ";
 $dsql->SetQuery($fquery);
 $dsql->Execute();
 $sp = new SplitWord();
 while ($row = $dsql->GetObject()) {
     if ($row->keywords != '') {
         continue;
     }
     $tjnum++;
     $id = $row->id;
     $keywords = "";
     if ($cfg_soft_lang == 'utf-8') {
         $row->title = utf82gb($row->title);
         $row->body = utf82gb($row->body);
     }
     $titleindexs = explode(' ', trim($sp->GetIndexText($row->title)));
     $allindexs = explode(' ', trim($sp->GetIndexText(Html2Text($row->body), 500)));
     if (is_array($allindexs) && is_array($titleindexs)) {
         foreach ($titleindexs as $k) {
             if (strlen($keywords) >= 30) {
                 break;
             } else {
                 $keywords .= $k . ",";
             }
         }
         foreach ($allindexs as $k) {
             if (strlen($keywords) >= 30) {
                 break;
             } else {
                 if (!in_array($k, $titleindexs)) {
                     $keywords .= $k . ",";
Пример #4
0
function AnalyseHtmlBody($body, &$description, &$litpic, &$keywords, $dtype = '')
{
    global $autolitpic, $remote, $dellink, $autokey, $cfg_basehost, $cfg_auot_description, $id, $title, $cfg_soft_lang;
    $autolitpic = empty($autolitpic) ? '' : $autolitpic;
    $body = stripslashes($body);
    //远程图片本地化
    if ($remote == 1) {
        $body = GetCurContent($body);
    }
    //删除非站内链接
    if ($dellink == 1) {
        $basehost = "http://" . $_SERVER['HTTP_HOST'];
        $body = str_replace($cfg_basehost, '#basehost#', $body);
        $body = str_replace($basehost, '#2basehost2#', $body);
        $body = preg_replace("/(<a[ \t\r\n]{1,}href=[\"']{0,}http:\\/\\/[^\\/]([^>]*)>)|(<\\/a>)/isU", "", $body);
        $body = str_replace('#basehost#', $cfg_basehost, $body);
        $body = str_replace('#2basehost2#', $basehost, $body);
    }
    //自动摘要
    if ($description == '' && $cfg_auot_description > 0) {
        $description = cn_substr(html2text($body), $cfg_auot_description);
        $description = trim(preg_replace('/#p#|#e#/', '', $description));
        $description = addslashes($description);
    }
    //自动获取缩略图
    if ($autolitpic == 1 && $litpic == '') {
        $litpic = GetDDImgFromBody($body);
    }
    //自动获取关键字
    if ($autokey == 1 && $keywords == '') {
        $subject = $title;
        $message = $body;
        if ($cfg_soft_lang == 'utf-8') {
            $subject = utf82gb($title);
            $message = utf82gb($message);
        }
        include_once DEDEINC . '/splitword.class.php';
        $keywords = '';
        $sp = new SplitWord();
        $titleindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText($subject)));
        $allindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText(Html2Text($message), 500)));
        if (is_array($allindexs) && is_array($titleindexs)) {
            foreach ($titleindexs as $k) {
                if (strlen($keywords . $k) >= 60) {
                    break;
                } else {
                    $keywords .= $k . ',';
                }
            }
            foreach ($allindexs as $k) {
                if (strlen($keywords . $k) >= 60) {
                    break;
                } else {
                    if (!in_array($k, $titleindexs)) {
                        $keywords .= $k . ',';
                    }
                }
            }
        }
        $sp->Clear();
        $sp = null;
    }
    $body = GetFieldValueA($body, $dtype, $id);
    $body = addslashes($body);
    return $body;
}