function splitWords($str, $style = 'array') { //字典文件太大,防止读取时服务器配置内存不够用 //@ini_set("memory_limit","30M"); require_once dirname(__FILE__) . "/utf8_splitword.php"; $sp = new SplitWord(); $result = $sp->SplitRMM($str); $sp->Clear(); if ($style == 'string') { return $result; } else { if ($style == 'array') { return split(',', $result); } } }
function q($wd, $domain = '') { global $db; $this->db =& $db; $this->wd = $wd; if (empty($wd)) { $sql = "select * from ve123_links where title<>'' order by updatetime desc"; } elseif (!empty($domain)) { if ($domain == getdomain($domain)) { $sql = "select * from ve123_links where title<>'' and url like '%." . $domain . "%' or url like '%//" . $domain . "%'"; //echo $sql; } else { $sql = "select * from ve123_links where title<>'' and url regexp 'http://" . $domain . "'"; //echo $sql; } } else { $this->update_keywords(); require PATH . "include/splitword.func.php"; $sp = new SplitWord(); $wd_split = $sp->SplitRMM($wd); $sp->Clear(); $this->wd_split = $wd_split = ereg_replace("[ ]{1,}", " ", trim($wd_split)); $this->wd_array = $wd_array = explode(" ", $wd_split); $this->wd_count = $wd_count = count($wd_split); //echo $wd_count; $tgarray = $this->GetTg(); $tgarray_count = count($tgarray); $ordersql = " order by ("; foreach ($wd_array as $value) { $strlen = strlen($value); if ($strlen > 2) { $points = $strlen; } else { $points = 1; } $ordersql .= "(case when title like '%" . $value . "%' then " . $points . " else 0 end)+"; } $ordersql .= "(case when title like '%" . $wd . "%' then 8 else 0 end)"; // $ordersql=rtrim($ordersql,"+"); $ordersql .= ") desc,updatetime desc"; $keywordsql = $this->GetKeywordSql("title,' ',url,' ',keywords,' ',fulltxt"); if (empty($keywordsql)) { $keywordsql = "title like '%" . $wd . "%'"; } $sql = "select * from ve123_links where title<>'' and " . $keywordsql . $ordersql; // // } //echo $sql; $query = $db->query($sql); $this->total = $total = $db->num_rows($query) + $tgarray_count; $pagesize = 10; $this->totalpage = $totalpage = ceil($total / $pagesize); $p = intval($_GET["p"]); if ($p <= 0) { $p = 1; } $offset = ($p - 1) * $pagesize; $query = $db->query($sql . " limit {$offset},{$pagesize}"); while ($row = $db->fetch_array($query)) { $data["title"] = $this->GetRedKeyWord(replace_filter_word(str_cut($row["title"], 60))); $data["txt"] = $this->GetRedKeyWord(replace_filter_word(str_cut($row["fulltxt"], 250))); $data["description"] = $this->GetRedKeyWord(replace_filter_word(str_cut($row["description"], 250))); $data["url"] = str_cut($row["url"], 50); $data["updatetime"] = date("Y-m-d", $row["updatetime"]); $data["pagesize"] = $row["pagesize"]; $data["link_id"] = $row["link_id"]; $data["tuiguang"] = $row["tuiguang"]; $array[] = $data; } $array_count = count($array); if (empty($array_count)) { $newarray = $tgarray; } elseif ($p == 1 && $tgarray_count > 0) { $newarray = array_merge($tgarray, $array); } else { $newarray = $array; } return $newarray; }
require_once(DEDEADMIN."/../include/pub_splitword_www.php"); $keywords = ""; $sp = new SplitWord(); $titleindexs = explode(" ",trim($sp->GetIndexText($sp->SplitRMM($title)))); $allindexs = explode(" ",trim($sp->GetIndexText($sp->SplitRMM(Html2Text($body)),200))); if(is_array($allindexs) && is_array($titleindexs)){ foreach($titleindexs as $k){ if(strlen($keywords)>=50) break; else $keywords .= $k." "; } foreach($allindexs as $k){ if(strlen($keywords)>=50) break; else if(!in_array($k,$titleindexs)) $keywords .= $k." "; } } $sp->Clear(); unset($sp); $keywords = preg_replace("/#p#|#e#/","",$keywords); $keywords = addslashes($keywords); } //自动获取缩略图 if($autolitpic==1 && $litpic==''){ $litpic = GetDDImgFromBody($body); } $message = addslashes($body); $dsql = new DedeSql(false); $cts = GetChannelTable($dsql,$channelid);
function GetKeywords($keyword) { $keyword = cn_substr($keyword, 50); $row = $this->dsql->GetOne("Select spwords From `#@__search_keywords` where keyword='" . addslashes($keyword) . "'; "); if (!is_array($row)) { if (strlen($keyword) > 7) { $sp = new SplitWord(); $keywords = $sp->GetSplitRMM($keyword); $sp->Clear(); $keywords = ereg_replace("[ ]{1,}", " ", trim($keywords)); } else { $keywords = $keyword; } $inquery = "INSERT INTO `#@__search_keywords`(`keyword`,`spwords`,`count`,`result`,`lasttime`)\r\n VALUES ('" . addslashes($keyword) . "', '" . addslashes($keywords) . "', '1', '0', '" . time() . "'); "; $this->dsql->ExecuteNoneQuery($inquery); } else { $this->dsql->ExecuteNoneQuery("Update `#@__search_keywords` set count=count+1,lasttime='" . time() . "' where keyword='" . addslashes($keyword) . "'; "); $keywords = $row['spwords']; } return $keywords; }
function AnalyseHtmlBody($body, &$description, &$litpic, &$keywords, $dtype = '') { global $autolitpic, $remote, $dellink, $autokey, $cfg_basehost, $cfg_auot_description, $id, $title, $cfg_soft_lang; $autolitpic = empty($autolitpic) ? '' : $autolitpic; $body = stripslashes($body); //远程图片本地化 if ($remote == 1) { $body = GetCurContent($body); } //删除非站内链接 if ($dellink == 1) { $basehost = "http://" . $_SERVER['HTTP_HOST']; $body = str_replace($cfg_basehost, '#basehost#', $body); $body = str_replace($basehost, '#2basehost2#', $body); $body = preg_replace("/(<a[ \t\r\n]{1,}href=[\"']{0,}http:\\/\\/[^\\/]([^>]*)>)|(<\\/a>)/isU", "", $body); $body = str_replace('#basehost#', $cfg_basehost, $body); $body = str_replace('#2basehost2#', $basehost, $body); } //自动摘要 if ($description == '' && $cfg_auot_description > 0) { $description = cn_substr(html2text($body), $cfg_auot_description); $description = trim(preg_replace('/#p#|#e#/', '', $description)); $description = addslashes($description); } //自动获取缩略图 if ($autolitpic == 1 && $litpic == '') { $litpic = GetDDImgFromBody($body); } //自动获取关键字 if ($autokey == 1 && $keywords == '') { $subject = $title; $message = $body; if ($cfg_soft_lang == 'utf-8') { $subject = utf82gb($title); $message = utf82gb($message); } include_once DEDEINC . '/splitword.class.php'; $keywords = ''; $sp = new SplitWord(); $titleindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText($subject))); $allindexs = explode(' ', preg_replace("/#p#|#e#/", '', $sp->GetIndexText(Html2Text($message), 500))); if (is_array($allindexs) && is_array($titleindexs)) { foreach ($titleindexs as $k) { if (strlen($keywords . $k) >= 60) { break; } else { $keywords .= $k . ','; } } foreach ($allindexs as $k) { if (strlen($keywords . $k) >= 60) { break; } else { if (!in_array($k, $titleindexs)) { $keywords .= $k . ','; } } } } $sp->Clear(); $sp = null; } $body = GetFieldValueA($body, $dtype, $id); $body = addslashes($body); return $body; }
/** * [SplitWord 分词] * @param [type] $str [description] */ function SplitWord($str) { vendor('SplitWord/SplitWord'); $split = new SplitWord(); $data = $split->SplitRMM($str); p($data); $split->Clear(); return $data; }
function q($wd, $domain = '') { global $db; $this->db =& $db; $this->wd = $wd; require "../include/splitword.func.php"; $sp = new SplitWord(); $wd_split = $sp->SplitRMM($wd); $sp->Clear(); $this->wd_split = $wd_split = ereg_replace("[ ]{1,}", " ", trim($wd_split)); $this->wd_array = $wd_array = explode(" ", $wd_split); $this->wd_count = $wd_count = count($wd_split); //echo $wd_count; $tgarray = $this->GetTg(); $tgarray_count = count($tgarray); $ordersql = " order by links.tuiguang desc,("; foreach ($wd_array as $value) { $ordersql .= "(case when links.title like '%" . $value . "%' then 1 else 0 end)+"; } $ordersql .= "(case when links.title like '%" . $wd . "%' then 8 else 0 end)"; // $ordersql=rtrim($ordersql,"+"); $ordersql .= ") desc"; $keywordsql = $this->GetKeywordSql("links.title,' ',links.url,' ',links.keywords"); if (empty($keywordsql)) { $keywordsql = "links.title like '%" . $wd . "%'"; } if (empty($domain)) { $sql = "select links.*,sites.qp from kuaso_links links left join kuaso_sites sites on links.site_id=sites.site_id where links.title<>'' and " . $keywordsql . $ordersql; } else { if ($domain == getdomain($domain)) { $sql = "select * from kuaso_links where title<>'' and url like '%." . $domain . "%' or url like '%//" . $domain . "%'"; //echo $sql; } else { $sql = "select * from kuaso_links where title<>'' and url regexp 'http://" . $domain . "'"; //echo $sql; } } //echo $sql; $query = $db->query($sql); $this->total = $total = $db->num_rows($query) + $tgarray_count; $pagesize = 10; $this->totalpage = $totalpage = ceil($total / $pagesize); $p = intval($_GET["p"]); if ($p <= 0) { $p = 1; } $offset = ($p - 1) * $pagesize; $query = $db->query($sql . " limit {$offset},{$pagesize}"); while ($row = $db->fetch_array($query)) { $data["title"] = $this->GetRedKeyWord(str_cut($row["title"], 60)); $data["txt"] = $this->GetRedKeyWord(str_cut($row["fulltxt"], 250)); $data["url"] = str_cut($row["url"], 50); $data["updatetime"] = date("Y-m-d", $row["updatetime"]); $data["pagesize"] = $row["pagesize"]; $data["link_id"] = $row["link_id"]; $data["tuiguang"] = $row["tuiguang"]; $array[] = $data; } $array_count = count($array); if (empty($array_count)) { $newarray = $tgarray; } elseif ($p == 1 && $tgarray_count > 0) { $newarray = array_merge($tgarray, $array); } else { $newarray = $array; } return $newarray; }