Ejemplo n.º 1
0
 if ($step == 2) {
     //对完整规则进行测试
     if ($dopost == 'test') {
         include DEDEINC . "/dedecollection.class.php";
         $usemore = !isset($usemore) ? 0 : 1;
         $listconfig = "{dede:noteinfo notename=\\\"{$notename}\\\" channelid=\\\"{$channelid}\\\" macthtype=\\\"{$macthtype}\\\"\r\nrefurl=\\\"{$refurl}\\\" sourcelang=\\\"{$sourcelang}\\\" cosort=\\\"{$cosort}\\\" isref=\\\"{$isref}\\\" exptime=\\\"{$exptime}\\\" usemore=\\\"{$usemore}\\\" /}\r\n\r\n{dede:listrule sourcetype=\\\"{$sourcetype}\\\" rssurl=\\\"{$rssurl}\\\" regxurl=\\\"{$regxurl}\\\"\r\nstartid=\\\"{$startid}\\\" endid=\\\"{$endid}\\\" addv=\\\"{$addv}\\\" urlrule=\\\"{$urlrule}\\\"\r\n musthas=\\\"{$musthas}\\\" nothas=\\\"{$nothas}\\\" listpic=\\\"{$listpic}\\\" usemore=\\\"{$usemore}\\\"}\r\n\t{dede:addurls}{$addurls}{/dede:addurls}\r\n\t{dede:batchrule}{$batchrule}{/dede:batchrule}\r\n\t{dede:regxrule}{$regxrule}{/dede:regxrule}\r\n\t{dede:areastart}{$areastart}{/dede:areastart}\r\n\t{dede:areaend}{$areaend}{/dede:areaend}\r\n{/dede:listrule}\r\n";
         $tmplistconfig = stripslashes($listconfig);
         $notename = stripslashes($notename);
         if ($sourcetype == 'rss' && ($refurl = '')) {
             $refurl = $rssurl;
         }
         $refurl = stripslashes($refurl);
         $errmsg = '';
         //测试规则
         if ($sourcetype == 'rss') {
             $links = GetRssLinks(stripslashes($rssurl));
             $demopage = $rssurl;
         } else {
             $links = array();
             $lists = GetUrlFromListRule($regxurl, stripslashes($addurls), $startid, $endid, $addv, $usemore, stripslashes($batchrule));
             if (isset($lists[0][0])) {
                 $demopage = $lists[0][0];
                 $dc = new DedeCollection();
                 $dc->LoadListConfig($tmplistconfig);
                 $listurl = '';
                 $links = $dc->Testlists($listurl);
                 $errmsg = $dc->errString;
             } else {
                 $demopage = '没有匹配到适合的列表页!';
             }
         }
Ejemplo n.º 2
0
         }
     }
     $usemore = empty($usemore) ? '0' : $usemore;
     $inQuery = " INSERT INTO `#@__co_note`(`channelid`,`notename`,`sourcelang`,`uptime`,`cotime`,`pnum`,`isok`,`listconfig`,`itemconfig`,`usemore`)\r\n               VALUES ('{$channelid}','{$mynotename}','{$sourcelang}','" . time() . "','0','0','0','{$listconfig}','{$itemconfig}','{$usemore}'); ";
     $dsql->ExecuteNoneQuery($inQuery);
     ShowMsg("成功复制一个节点!", $ENV_GOBACK_URL);
     exit;
 } else {
     if ($dopost == "testrss") {
         CheckPurview('co_AddNote');
         $msg = '';
         if ($rssurl == '') {
             $msg = '你没有指定RSS地址!';
         } else {
             include DEDEINC . "/dedecollection.func.php";
             $arr = GetRssLinks($rssurl);
             $msg = "从 {$rssurl} 发现的网址:<br />";
             $i = 1;
             if (is_array($arr)) {
                 foreach ($arr as $ar) {
                     $msg .= "<hr size='1' />\r\n";
                     $msg .= "link: {$ar['link']}<br />title: {$ar['title']}<br />image: {$ar['image']}\r\n";
                     $i++;
                 }
             }
         }
         $wintitle = "采集管理-测试";
         $wecome_info = "<a href='co_main.php'>采集管理</a>::RSS地址测试";
         $win = new OxWindow();
         $win->AddMsgItem($msg);
         $winform = $win->GetWindow("hand");
Ejemplo n.º 3
0
 /**
  *  采集种子网址
  *
  * @access    public
  * @param     int  $islisten  是否监听
  * @param     int  $glstart  采集开始
  * @param     int  $pagesize  分页尺寸
  * @return    string
  */
 function GetSourceUrl($islisten = 0, $glstart = 0, $pagesize = 10)
 {
     //在第一页中进行预处理
     //“下载种子网址的未下载内容”的模式不需要经过采集种子网址的步骤
     if ($glstart == 0) {
         //重新采集所有内容模式
         if ($islisten == -1) {
             $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_urls` WHERE nid='" . $this->noteId . "'");
             $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' ");
         } else {
             $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' AND isexport=1 ");
         }
     }
     $nrow = $this->dsql->GetOne("SELECT * FROM `#@__co_note` WHERE nid='{$this->noteId}'");
     if (is_array($nrow)) {
         $nchannelid = $nrow['channelid'];
         $musthas_b = "";
     }
     //从RSS中获取种子
     if ($this->lists['sourcetype'] == 'rss') {
         $links = GetRssLinks($this->lists['rssurl']);
         //if($this->noteInfos['cosort']!='asc')
         $tmplink = krsort($links);
         $lk = 0;
         foreach ($links as $v) {
             if ($islisten == 1) {
                 $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' ");
                 if (is_array($lrow)) {
                     continue;
                 }
             }
             $lk++;
             if ($mytotal > 0 && $lk >= $mytotal) {
                 break;
             }
             $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n                    VALUES ('{$this->noteId}' , '0', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . addslashes($v['link']) . "' , 'dtime' , '0' , '0' , ''); ";
             $this->dsql->ExecuteNoneQuery($inquery);
             $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');";
             $this->dsql->ExecuteNoneQuery($inquery);
         }
         return 0;
     } else {
         $tmplink = array();
         $arrStart = 0;
         $moviePostion = 0;
         $endpos = $glstart + $pagesize;
         $totallen = count($this->lists['url']);
         //dump($this->lists['url']);exit;
         foreach ($this->lists['url'] as $k => $cururls) {
             //$status = FALSE;
             $urlnum = 0;
             $cururl = $cururls[0];
             $typeid = empty($cururls[1]) ? 0 : $cururls[1];
             $moviePostion++;
             if ($moviePostion > $endpos) {
                 break;
             }
             if ($moviePostion > $glstart) {
                 global $co_oldpertime;
                 $co_oldpertime = intval($co_oldpertime) >= 0 ? intval($co_oldpertime) : 0;
                 if ($co_oldpertime > 0 && $nchannelid <= 10) {
                     $nocorow = $this->dsql->GetOne("SELECT * FROM dede_co_listurls WHERE url='" . md5($cururl) . "' and last_cotime>UNIX_TIMESTAMP()");
                     if (is_array($nocorow)) {
                         //echo "网址:".$cururl."已采集过,跳过本次采集<br/>";
                         continue;
                     }
                 }
                 $html = $this->DownOnePage($cururl);
                 $htmllen = strlen($html);
                 $this->dsql->ExecuteNoneQuery("INSERT INTO dede_co_listurls (url,last_cotime,lenth2,nid) VALUES ('" . md5($cururl) . "',UNIX_TIMESTAMP(),{$htmllen},'{$this->noteId}') ON DUPLICATE KEY UPDATE last_cotime=UNIX_TIMESTAMP(),lenth1=lenth2, lenth2={$htmllen},nid='{$this->noteId}'");
                 if ($nchannelid > 10) {
                     $htmllenrow = $this->dsql->GetOne("SELECT * FROM dede_co_listurls WHERE nid='{$this->noteId}' and url='" . md5($cururl) . "' and lenth1=lenth2");
                 } else {
                     $htmllenrow = $this->dsql->GetOne("SELECT * FROM dede_co_listurls WHERE url='" . md5($cururl) . "' and lenth1=lenth2");
                 }
                 if (is_array($htmllenrow)) {
                     //echo "网址:".$cururl."没有更新,跳过采集<br/>";
                     continue;
                 } elseif ($co_oldpertime > 0 && $nchannelid == '1') {
                     $last_cotime = time() + $co_oldpertime;
                     $this->dsql->ExecuteNoneQuery("update dede_co_listurls set last_cotime={$last_cotime} WHERE url='" . md5($cururl) . "'");
                 }
                 if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') {
                     $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend'];
                     $html = $this->GetHtmlArea('[var:区域]', $areabody, $html);
                 }
                 $this->cDedeHtml->SetSource($html, $cururl, 'link');
                 $lk = 0;
                 foreach ($this->cDedeHtml->Links as $k => $v) {
                     if ($this->lists['nothas'] != '') {
                         if (strstr($this->lists['nothas'], "|")) {
                             $nothas = explode('|', $this->lists['nothas']);
                             $nothas_a = $nothas[0];
                             $nothas_b = $nothas[1];
                             if (preg_match("#" . $nothas_a . "#i", $v['link']) || preg_match("#" . $nothas_b . "#i", $v['link'])) {
                                 continue;
                             }
                         } elseif (preg_match("#" . $this->lists['nothas'] . "#", $v['link'])) {
                             continue;
                         }
                     }
                     if ($this->lists['musthas'] != '') {
                         if (strstr($this->lists['musthas'], "|")) {
                             $musthas = explode('|', $this->lists['musthas']);
                             $musthas_a = $musthas[0];
                             $musthas_b = $musthas[1];
                             if (!preg_match("#" . $musthas_a . "#i", $v['link'])) {
                                 continue;
                             }
                         } elseif (!preg_match("#" . $this->lists['musthas'] . "#i", $v['link'])) {
                             continue;
                         }
                         if (strstr($musthas_b, ",")) {
                             $musthas_b = str_replace('\\/', '/', $musthas_b);
                             $urlchange = explode(',', $musthas_b);
                             $urlchange_a = str_replace('/', '\\/', $urlchange[0]);
                             $urlchange_a = "/" . $urlchange_a . "/";
                             $urlchange_b = $urlchange[1];
                             $v['link'] = preg_replace($urlchange_a, $urlchange_b, $v['link']);
                         }
                     }
                     $tmplink[$arrStart][0] = $v;
                     $tmplink[$arrStart][1] = $typeid;
                     $arrStart++;
                     $lk++;
                 }
                 $this->cDedeHtml->Clear();
             }
         }
         //foreach
         //if($this->noteInfos['cosort']!='asc')
         krsort($tmplink);
         $unum = count($tmplink);
         if ($unum > 0) {
             //echo "完成本次种子网址抓取,共找到:{$unum} 个记录!<br/>\r\n";
             foreach ($tmplink as $vs) {
                 $v = $vs[0];
                 $typeid = $vs[1];
                 if ($islisten == 1) {
                     if ($nchannelid > 10) {
                         $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' ");
                     } else {
                         $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE hash='" . md5($v['link']) . "' ");
                     }
                     if (is_array($lrow)) {
                         continue;
                     }
                 }
                 if ($nchannelid > 10 && $musthas_b != "") {
                     $urlchange = explode('-', $musthas_b);
                     $urlchange_a = $urlchange[0];
                     $urlchange_b = $urlchange[1];
                     $downurl = str_replace($urlchange_a, $urlchange_b, addslashes($v['link']));
                 } else {
                     $downurl = addslashes($v['link']);
                 }
                 $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n                    VALUES ('{$this->noteId}' ,'{$typeid}', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . $downurl . "' , '" . time() . "' , '0' , '0' , ''); ";
                 $this->dsql->ExecuteNoneQuery($inquery);
                 $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');";
                 $this->dsql->ExecuteNoneQuery($inquery);
             }
             if ($endpos >= $totallen) {
                 return 0;
             } else {
                 return $totallen - $endpos;
             }
         } else {
             //仅在第一批采集时出错才返回
             if ($glstart == 0) {
                 return -1;
             }
             //在其它页出错照常采集后面内容
             if ($endpos >= $totallen) {
                 return 0;
             } else {
                 return $totallen - $endpos;
             }
         }
     }
 }
Ejemplo n.º 4
0
 /**
  *  采集种子网址
  *
  * @access    public
  * @param     int  $islisten  是否监听
  * @param     int  $glstart  采集开始
  * @param     int  $pagesize  分页尺寸
  * @return    string
  */
 function GetSourceUrl($islisten = 0, $glstart = 0, $pagesize = 10)
 {
     //在第一页中进行预处理
     //“下载种子网址的未下载内容”的模式不需要经过采集种子网址的步骤
     if ($glstart == 0) {
         //重新采集所有内容模式
         if ($islisten == -1) {
             $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_urls` WHERE nid='" . $this->noteId . "'");
             $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' ");
         } else {
             $this->dsql->ExecuteNoneQuery("DELETE FROM `#@__co_htmls` WHERE nid='" . $this->noteId . "' AND isexport=1 ");
         }
     }
     //从RSS中获取种子
     if ($this->lists['sourcetype'] == 'rss') {
         $links = GetRssLinks($this->lists['rssurl']);
         //if($this->noteInfos['cosort']!='asc')
         $tmplink = krsort($links);
         $lk = 0;
         foreach ($links as $v) {
             if ($islisten == 1) {
                 $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' ");
                 if (is_array($lrow)) {
                     continue;
                 }
             }
             $lk++;
             if ($mytotal > 0 && $lk >= $mytotal) {
                 break;
             }
             $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n                    VALUES ('{$this->noteId}' , '0', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . addslashes($v['link']) . "' , 'dtime' , '0' , '0' , ''); ";
             $this->dsql->ExecuteNoneQuery($inquery);
             $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');";
             $this->dsql->ExecuteNoneQuery($inquery);
         }
         return 0;
     } else {
         $tmplink = array();
         $arrStart = 0;
         $moviePostion = 0;
         $endpos = $glstart + $pagesize;
         $totallen = count($this->lists['url']);
         //dump($this->lists['url']);exit;
         foreach ($this->lists['url'] as $k => $cururls) {
             //$status = FALSE;
             $urlnum = 0;
             $cururl = $cururls[0];
             $typeid = empty($cururls[1]) ? 0 : $cururls[1];
             $moviePostion++;
             if ($moviePostion > $endpos) {
                 break;
             }
             if ($moviePostion > $glstart) {
                 $html = $this->DownOnePage($cururl);
                 if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') {
                     $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend'];
                     $html = $this->GetHtmlArea('[var:区域]', $areabody, $html);
                 }
                 $this->cDedeHtml->SetSource($html, $cururl, 'link');
                 $lk = 0;
                 foreach ($this->cDedeHtml->Links as $k => $v) {
                     if ($this->lists['nothas'] != '') {
                         if (preg_match("#" . $this->lists['nothas'] . "#", $v['link'])) {
                             continue;
                         }
                     }
                     if ($this->lists['musthas'] != '') {
                         if (!preg_match("#" . $this->lists['musthas'] . "#i", $v['link'])) {
                             continue;
                         }
                     }
                     $tmplink[$arrStart][0] = $v;
                     $tmplink[$arrStart][1] = $typeid;
                     $arrStart++;
                     $lk++;
                 }
                 $this->cDedeHtml->Clear();
             }
         }
         //foreach
         //if($this->noteInfos['cosort']!='asc')
         krsort($tmplink);
         $unum = count($tmplink);
         if ($unum > 0) {
             //echo "完成本次种子网址抓取,共找到:{$unum} 个记录!<br/>\r\n";
             foreach ($tmplink as $vs) {
                 $v = $vs[0];
                 $typeid = $vs[1];
                 if ($islisten == 1) {
                     $lrow = $this->dsql->GetOne("SELECT * FROM `#@__co_urls` WHERE nid='{$this->noteId}' AND hash='" . md5($v['link']) . "' ");
                     if (is_array($lrow)) {
                         continue;
                     }
                 }
                 $inquery = "INSERT INTO `#@__co_htmls` (`nid` ,`typeid`, `title` , `litpic` , `url` , `dtime` , `isdown` , `isexport` , `result`)\r\n                    VALUES ('{$this->noteId}' ,'{$typeid}', '" . addslashes($v['title']) . "' , '" . addslashes($v['image']) . "' , '" . addslashes($v['link']) . "' , '" . time() . "' , '0' , '0' , ''); ";
                 $this->dsql->ExecuteNoneQuery($inquery);
                 $inquery = "INSERT INTO `#@__co_urls`(hash,nid) VALUES ('" . md5($v['link']) . "','{$this->noteId}');";
                 $this->dsql->ExecuteNoneQuery($inquery);
             }
             if ($endpos >= $totallen) {
                 return 0;
             } else {
                 return $totallen - $endpos;
             }
         } else {
             //仅在第一批采集时出错才返回
             if ($glstart == 0) {
                 return -1;
             }
             //在其它页出错照常采集后面内容
             if ($endpos >= $totallen) {
                 return 0;
             } else {
                 return $totallen - $endpos;
             }
         }
     }
 }