if ($step == 2) { //对完整规则进行测试 if ($dopost == 'test') { include DEDEINC . "/dedecollection.class.php"; $usemore = !isset($usemore) ? 0 : 1; $listconfig = "{dede:noteinfo notename=\\\"{$notename}\\\" channelid=\\\"{$channelid}\\\" macthtype=\\\"{$macthtype}\\\"\r\nrefurl=\\\"{$refurl}\\\" sourcelang=\\\"{$sourcelang}\\\" cosort=\\\"{$cosort}\\\" isref=\\\"{$isref}\\\" exptime=\\\"{$exptime}\\\" usemore=\\\"{$usemore}\\\" /}\r\n\r\n{dede:listrule sourcetype=\\\"{$sourcetype}\\\" rssurl=\\\"{$rssurl}\\\" regxurl=\\\"{$regxurl}\\\"\r\nstartid=\\\"{$startid}\\\" endid=\\\"{$endid}\\\" addv=\\\"{$addv}\\\" urlrule=\\\"{$urlrule}\\\"\r\n musthas=\\\"{$musthas}\\\" nothas=\\\"{$nothas}\\\" listpic=\\\"{$listpic}\\\" usemore=\\\"{$usemore}\\\"}\r\n\t{dede:addurls}{$addurls}{/dede:addurls}\r\n\t{dede:batchrule}{$batchrule}{/dede:batchrule}\r\n\t{dede:regxrule}{$regxrule}{/dede:regxrule}\r\n\t{dede:areastart}{$areastart}{/dede:areastart}\r\n\t{dede:areaend}{$areaend}{/dede:areaend}\r\n{/dede:listrule}\r\n"; $tmplistconfig = stripslashes($listconfig); $notename = stripslashes($notename); if ($sourcetype == 'rss' && ($refurl = '')) { $refurl = $rssurl; } $refurl = stripslashes($refurl); $errmsg = ''; //测试规则 if ($sourcetype == 'rss') { $links = GetRssLinks(stripslashes($rssurl)); $demopage = $rssurl; } else { $links = array(); $lists = GetUrlFromListRule($regxurl, stripslashes($addurls), $startid, $endid, $addv, $usemore, stripslashes($batchrule)); if (isset($lists[0][0])) { $demopage = $lists[0][0]; $dc = new DedeCollection(); $dc->LoadListConfig($tmplistconfig); $listurl = ''; $links = $dc->Testlists($listurl); $errmsg = $dc->errString; } else { $demopage = '没有匹配到适合的列表页!'; } }
} } $usemore = empty($usemore) ? '0' : $usemore; $inQuery = " INSERT INTO `#@__co_note`(`channelid`,`notename`,`sourcelang`,`uptime`,`cotime`,`pnum`,`isok`,`listconfig`,`itemconfig`,`usemore`)\r\n VALUES ('{$channelid}','{$mynotename}','{$sourcelang}','" . time() . "','0','0','0','{$listconfig}','{$itemconfig}','{$usemore}'); "; $dsql->ExecuteNoneQuery($inQuery); ShowMsg("成功复制一个节点!", $ENV_GOBACK_URL); exit; } else { if ($dopost == "testrss") { CheckPurview('co_AddNote'); $msg = ''; if ($rssurl == '') { $msg = '你没有指定RSS地址!'; } else { include DEDEINC . "/dedecollection.func.php"; $arr = GetRssLinks($rssurl); $msg = "从 {$rssurl} 发现的网址:<br />"; $i = 1; if (is_array($arr)) { foreach ($arr as $ar) { $msg .= "<hr size='1' />\r\n"; $msg .= "link: {$ar['link']}<br />title: {$ar['title']}<br />image: {$ar['image']}\r\n"; $i++; } } } $wintitle = "采集管理-测试"; $wecome_info = "<a href='co_main.php'>采集管理</a>::RSS地址测试"; $win = new OxWindow(); $win->AddMsgItem($msg); $winform = $win->GetWindow("hand");
/** * 采集种子网址 * * @access public * @param int $islisten 是否监听 * @param int $glstart 采集开始 * @param int $pagesize 分页尺寸 * @return string */ function GetSourceUrl($islisten = 0, $glstart = 0, $pagesize = 10) { //在第一页中进行预处理 //“下载种子网址的未下载内容”的模式不需要经过采集种子网址的步骤 if ($glstart == 0) { //重新采集所有内容模式 if ($islisten == -1) { $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_urls` WHERE nid='" . $this->noteId . "'"); $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' "); } else { $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' AND isexport=1 "); } } $nrow = $this->dsql->GetOne("SELECT * FROM `#@__co_note` WHERE nid='{$this->noteId}'"); if (is_array($nrow)) { $nchannelid = $nrow['channelid']; $musthas_b = ""; } //从RSS中获取种子 if ($this->lists['sourcetype'] == 'rss') { $links = GetRssLinks($this->lists['rssurl']); //if($this->noteInfos['cosort']!='asc') $tmplink = krsort($links); $lk = 0; foreach ($links as $v) { if ($islisten == 1) { $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' "); if (is_array($lrow)) { continue; } } $lk++; if ($mytotal > 0 && $lk >= $mytotal) { break; } $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n VALUES ('{$this->noteId}' , '0', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . addslashes($v['link']) . "' , 'dtime' , '0' , '0' , ''); "; $this->dsql->ExecuteNoneQuery($inquery); $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');"; $this->dsql->ExecuteNoneQuery($inquery); } return 0; } else { $tmplink = array(); $arrStart = 0; $moviePostion = 0; $endpos = $glstart + $pagesize; $totallen = count($this->lists['url']); //dump($this->lists['url']);exit; foreach ($this->lists['url'] as $k => $cururls) { //$status = FALSE; $urlnum = 0; $cururl = $cururls[0]; $typeid = empty($cururls[1]) ? 0 : $cururls[1]; $moviePostion++; if ($moviePostion > $endpos) { break; } if ($moviePostion > $glstart) { global $co_oldpertime; $co_oldpertime = intval($co_oldpertime) >= 0 ? intval($co_oldpertime) : 0; if ($co_oldpertime > 0 && $nchannelid <= 10) { $nocorow = $this->dsql->GetOne("SELECT * FROM dede_co_listurls WHERE url='" . md5($cururl) . "' and last_cotime>UNIX_TIMESTAMP()"); if (is_array($nocorow)) { //echo "网址:".$cururl."已采集过,跳过本次采集<br/>"; continue; } } $html = $this->DownOnePage($cururl); $htmllen = strlen($html); $this->dsql->ExecuteNoneQuery("INSERT INTO dede_co_listurls (url,last_cotime,lenth2,nid) VALUES ('" . md5($cururl) . "',UNIX_TIMESTAMP(),{$htmllen},'{$this->noteId}') ON DUPLICATE KEY UPDATE last_cotime=UNIX_TIMESTAMP(),lenth1=lenth2, lenth2={$htmllen},nid='{$this->noteId}'"); if ($nchannelid > 10) { $htmllenrow = $this->dsql->GetOne("SELECT * FROM dede_co_listurls WHERE nid='{$this->noteId}' and url='" . md5($cururl) . "' and lenth1=lenth2"); } else { $htmllenrow = $this->dsql->GetOne("SELECT * FROM dede_co_listurls WHERE url='" . md5($cururl) . "' and lenth1=lenth2"); } if (is_array($htmllenrow)) { //echo "网址:".$cururl."没有更新,跳过采集<br/>"; continue; } elseif ($co_oldpertime > 0 && $nchannelid == '1') { $last_cotime = time() + $co_oldpertime; $this->dsql->ExecuteNoneQuery("update dede_co_listurls set last_cotime={$last_cotime} WHERE url='" . md5($cururl) . "'"); } if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') { $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend']; $html = $this->GetHtmlArea('[var:区域]', $areabody, $html); } $this->cDedeHtml->SetSource($html, $cururl, 'link'); $lk = 0; foreach ($this->cDedeHtml->Links as $k => $v) { if ($this->lists['nothas'] != '') { if (strstr($this->lists['nothas'], "|")) { $nothas = explode('|', $this->lists['nothas']); $nothas_a = $nothas[0]; $nothas_b = $nothas[1]; if (preg_match("#" . $nothas_a . "#i", $v['link']) || preg_match("#" . $nothas_b . "#i", $v['link'])) { continue; } } elseif (preg_match("#" . $this->lists['nothas'] . "#", $v['link'])) { continue; } } if ($this->lists['musthas'] != '') { if (strstr($this->lists['musthas'], "|")) { $musthas = explode('|', $this->lists['musthas']); $musthas_a = $musthas[0]; $musthas_b = $musthas[1]; if (!preg_match("#" . $musthas_a . "#i", $v['link'])) { continue; } } elseif (!preg_match("#" . $this->lists['musthas'] . "#i", $v['link'])) { continue; } if (strstr($musthas_b, ",")) { $musthas_b = str_replace('\\/', '/', $musthas_b); $urlchange = explode(',', $musthas_b); $urlchange_a = str_replace('/', '\\/', $urlchange[0]); $urlchange_a = "/" . $urlchange_a . "/"; $urlchange_b = $urlchange[1]; $v['link'] = preg_replace($urlchange_a, $urlchange_b, $v['link']); } } $tmplink[$arrStart][0] = $v; $tmplink[$arrStart][1] = $typeid; $arrStart++; $lk++; } $this->cDedeHtml->Clear(); } } //foreach //if($this->noteInfos['cosort']!='asc') krsort($tmplink); $unum = count($tmplink); if ($unum > 0) { //echo "完成本次种子网址抓取,共找到:{$unum} 个记录!<br/>\r\n"; foreach ($tmplink as $vs) { $v = $vs[0]; $typeid = $vs[1]; if ($islisten == 1) { if ($nchannelid > 10) { $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' "); } else { $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE hash='" . md5($v['link']) . "' "); } if (is_array($lrow)) { continue; } } if ($nchannelid > 10 && $musthas_b != "") { $urlchange = explode('-', $musthas_b); $urlchange_a = $urlchange[0]; $urlchange_b = $urlchange[1]; $downurl = str_replace($urlchange_a, $urlchange_b, addslashes($v['link'])); } else { $downurl = addslashes($v['link']); } $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n VALUES ('{$this->noteId}' ,'{$typeid}', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . $downurl . "' , '" . time() . "' , '0' , '0' , ''); "; $this->dsql->ExecuteNoneQuery($inquery); $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');"; $this->dsql->ExecuteNoneQuery($inquery); } if ($endpos >= $totallen) { return 0; } else { return $totallen - $endpos; } } else { //仅在第一批采集时出错才返回 if ($glstart == 0) { return -1; } //在其它页出错照常采集后面内容 if ($endpos >= $totallen) { return 0; } else { return $totallen - $endpos; } } } }
/** * 采集种子网址 * * @access public * @param int $islisten 是否监听 * @param int $glstart 采集开始 * @param int $pagesize 分页尺寸 * @return string */ function GetSourceUrl($islisten = 0, $glstart = 0, $pagesize = 10) { //在第一页中进行预处理 //“下载种子网址的未下载内容”的模式不需要经过采集种子网址的步骤 if ($glstart == 0) { //重新采集所有内容模式 if ($islisten == -1) { $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_urls` WHERE nid='" . $this->noteId . "'"); $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' "); } else { $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' AND isexport=1 "); } } //从RSS中获取种子 if ($this->lists['sourcetype'] == 'rss') { $links = GetRssLinks($this->lists['rssurl']); //if($this->noteInfos['cosort']!='asc') $tmplink = krsort($links); $lk = 0; foreach ($links as $v) { if ($islisten == 1) { $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' "); if (is_array($lrow)) { continue; } } $lk++; if ($mytotal > 0 && $lk >= $mytotal) { break; } $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n VALUES ('{$this->noteId}' , '0', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . addslashes($v['link']) . "' , 'dtime' , '0' , '0' , ''); "; $this->dsql->ExecuteNoneQuery($inquery); $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');"; $this->dsql->ExecuteNoneQuery($inquery); } return 0; } else { $tmplink = array(); $arrStart = 0; $moviePostion = 0; $endpos = $glstart + $pagesize; $totallen = count($this->lists['url']); //dump($this->lists['url']);exit; foreach ($this->lists['url'] as $k => $cururls) { //$status = FALSE; $urlnum = 0; $cururl = $cururls[0]; $typeid = empty($cururls[1]) ? 0 : $cururls[1]; $moviePostion++; if ($moviePostion > $endpos) { break; } if ($moviePostion > $glstart) { $html = $this->DownOnePage($cururl); if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') { $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend']; $html = $this->GetHtmlArea('[var:区域]', $areabody, $html); } $this->cDedeHtml->SetSource($html, $cururl, 'link'); $lk = 0; foreach ($this->cDedeHtml->Links as $k => $v) { if ($this->lists['nothas'] != '') { if (preg_match("#" . $this->lists['nothas'] . "#", $v['link'])) { continue; } } if ($this->lists['musthas'] != '') { if (!preg_match("#" . $this->lists['musthas'] . "#i", $v['link'])) { continue; } } $tmplink[$arrStart][0] = $v; $tmplink[$arrStart][1] = $typeid; $arrStart++; $lk++; } $this->cDedeHtml->Clear(); } } //foreach //if($this->noteInfos['cosort']!='asc') krsort($tmplink); $unum = count($tmplink); if ($unum > 0) { //echo "完成本次种子网址抓取,共找到:{$unum} 个记录!<br/>\r\n"; foreach ($tmplink as $vs) { $v = $vs[0]; $typeid = $vs[1]; if ($islisten == 1) { $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' "); if (is_array($lrow)) { continue; } } $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n VALUES ('{$this->noteId}' ,'{$typeid}', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . addslashes($v['link']) . "' , '" . time() . "' , '0' , '0' , ''); "; $this->dsql->ExecuteNoneQuery($inquery); $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');"; $this->dsql->ExecuteNoneQuery($inquery); } if ($endpos >= $totallen) { return 0; } else { return $totallen - $endpos; } } else { //仅在第一批采集时出错才返回 if ($glstart == 0) { return -1; } //在其它页出错照常采集后面内容 if ($endpos >= $totallen) { return 0; } else { return $totallen - $endpos; } } } }