function Testlists(&$dourl) { $links = array(); //从RSS中获取网址 if ($this->lists['sourcetype'] == 'rss') { $dourl = $this->lists['rssurl']; $links = GetRssLinks($dourl); return $links; } //正常情况 if (isset($this->lists['url'][0][0])) { $dourl = $this->lists['url'][0][0]; } else { $dourl = ''; $this->errString = "配置中指定列表的网址错误!\r\n"; return $links; } $dhtml = new DedeHtml2(); $html = $this->DownOnePage($dourl); if ($html == '') { $this->errString = "读取网址: {$dourl} 时失败!\r\n"; return $links; } if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') { $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend']; $html = $this->GetHtmlArea('[var:区域]', $areabody, $html); } $t1 = ExecTime(); $dhtml->SetSource($html, $dourl, 'link'); $this->lists['musthas'] = str_replace('/', '\\/', $this->lists['musthas']); foreach ($dhtml->Links as $s) { if ($this->lists['nothas'] != '') { if (preg_match("#" . $this->lists['nothas'] . "#i", $s['link'])) { continue; } } if ($this->lists['musthas'] != '') { if (strstr($this->lists['musthas'], "|")) { $musthas = explode('|', $this->lists['musthas']); $musthas_a = $musthas[0]; $musthas_b = $musthas[1]; if (!preg_match("#" . $musthas_a . "#i", $s['link'])) { continue; } } else { if (!preg_match("#" . $this->lists['musthas'] . "#i", $s['link'])) { continue; } } if (strstr($musthas_b, ",")) { $musthas_b = str_replace('\\/', '/', $musthas_b); $urlchange = explode(',', $musthas_b); $urlchange_a = str_replace('/', '\\/', $urlchange[0]); $urlchange_a = "/" . $urlchange_a . "/"; $urlchange_b = $urlchange[1]; $s['link'] = preg_replace($urlchange_a, $urlchange_b, $s['link']); } } $links[] = $s; } return $links; }
$playtime = "$tm 分 $ts 秒"; $width = GetAlabNum($width); $height = GetAlabNum($height); //$flashurl = ""; //处理远程的Flash //------------------ $rmflash = ""; if(empty($downremote)) $downremote = 0; //直接从远程粘贴 if(eregi("embed",$remoteflash)){ $remoteflash = stripslashes($remoteflash); require_once(dirname(__FILE__)."/../../include/pub_dedehtml2.php"); $dml = new DedeHtml2(); $dml->GetLinkType = "media"; $dml->SetSource($remoteflash,"",false); $marr = $dml->Medias; $rmfalsh = ""; if(!is_array($marr)) $rmfalsh = ""; else{ if(count($marr)==1) { foreach($marr as $k=>$v){ $rmfalsh = $k; break; } } else {
function TestList() { if(isset($this->List["url"][0])) $dourl = $this->List["url"][0]; else{ echo "配置中指定列表的网址错误!\r\n"; return ; } if($this->List["sourcetype"]=="archives") { echo "配置中指定的源参数为文档的原始URL:\r\n"; $i=0; $v = ""; foreach($this->List["url"] as $v){ echo $v."\r\n"; $i++; if($i>9) break; } return $v; } $dhtml = new DedeHtml2(); $html = $this->DownOnePage($dourl); //$html = str_replace('" class="tool comments">','?999" class="tool comments">',$html); if($html==""){ echo "读取其中的一个网址: $dourl 时失败!\r\n"; return ; } if(trim($this->List["linkarea"])!=""&&trim($this->List["linkarea"])!="[var:区域]"){ $html = $this->GetHtmlArea("[var:区域]",$this->List["linkarea"],$html); } $dhtml->GetLinkType = "link"; $dhtml->SetSource($html,$dourl,false); $testpage = ""; $TestPage = ""; if(is_array($dhtml->Links)) { echo "按指定规则在 $dourl 发现的网址:\r\n"; echo $this->List["need"]; foreach($dhtml->Links as $k=>$v) { $k = $dhtml->FillUrl($k); if($this->List["need"]!="") { if(eregi($this->List["need"],$k)) { if($this->List["cannot"]=="" ||!eregi($this->List["cannot"],$k)){ echo "$k - ".$v."\r\n"; $TestPage = $k; } }//eg1 }else{ echo "$k - ".$v."\r\n"; $TestPage = $k; } }//foreach }else{ echo "分析网页的HTML时失败!\r\n"; return ; } return $TestPage; }
/** * caijixia for dedecms * @version $Id: cjx.class.php 112 2013-05-28 01:22:57Z qinjinpeng $ * @copyright Copyright (c) 2011,caijixia for dedecms,caijixia.com. * @license This is NOT a freeware, use is subject to license terms * * @param NULL * @return NULL */ function ac_testregx() { global $cfg_soft_lang; $data = stripslashes($this->GV('data')); if ($cfg_soft_lang != 'utf-8') { $data = utf82gb($data); } require_once DEDEINC . '/dedetag.class.php'; $this->dtp = new DedeTagParse(); $this->dtp->LoadString($data); foreach ($this->dtp->CTags as $ctag) { $itemName = $ctag->TagName; ${$itemName} = trim($ctag->InnerText); } if (empty($list) || empty($page) || $list == 'http://' || $page == 'http://') { exit('规则错误'); } $listarr = array(); if (preg_match("/\\[([0-9]*-[0-9]*)\\]/", $list, $out)) { list($min, $max) = explode('-', $out[1]); if ($max - $min > 10) { $max = $min + 9; } for ($i = $min; $i <= $max; $i++) { $listarr[] = preg_replace("/\\[([0-9]*-[0-9]*)\\]/", $i, $list); } $list = preg_replace("/\\[([0-9]*-[0-9]*)\\]/", $min, $list); } else { $listarr[] = $list; } $str = $this->downfile($list); if ($cfg_soft_lang != $charset) { if ($charset == 'utf-8') { $str = utf82gb($str); } else { $str = gb2utf8($str); } } $page = str_replace('(*)', '###', $page); $page = preg_quote($page, '/'); $page = str_replace('###', '([0-9a-zA-Z\\.\\-\\/_]*)', $page); $dhtml = new DedeHtml2(); $dhtml->SetSource($str, $list, 'link'); $lss = array(); $i = 0; foreach ($dhtml->Links as $s) { if (preg_match('/' . $page . '/iU', $s['link'])) { if (!isset($lss[$s['link']])) { if (!isset($first)) { $first = $s['link']; } $lss[$s['link']] = $s['link']; $i++; if ($i == 10) { break; } } } } $msg = ''; $GLOBALS['wintitle'] = "采集侠-测试定向采集规则"; $GLOBALS['wecome_info'] = "采集侠定向采集::采集规则测试"; $win = new OxWindow(); $win->AddTitle('匹配到的列表地址(前10个)'); foreach ($listarr as $v) { $msg .= $v . "<br>"; } $win->AddMsgItem($msg); $win->AddTitle('第一个列表页匹配到的文章地址(前10个)'); $msg = ''; foreach ($lss as $v) { $msg .= $v . "<br>"; } $win->AddMsgItem($msg); $str = $this->downfile($first); if ($cfg_soft_lang != $charset) { if ($charset == 'utf-8') { $str = utf82gb($str); } else { $str = gb2utf8($str); } } $win->AddTitle("<font color=black>测试采集第一篇文章:{$first} </font>"); $win->AddTitle('文章标题'); if (empty($titlerule)) { $win->AddMsgItem('自动规则不需要测试'); } else { $win->AddMsgItem($this->UT($str, $titlerule)); } $win->AddTitle('作者'); if (empty($authorrule)) { $win->AddMsgItem('自动规则不需要测试'); } else { $win->AddMsgItem($this->UT($str, $authorrule)); } $win->AddTitle('来源'); if (empty($sourcerule)) { $win->AddMsgItem('自动规则不需要测试'); } else { $win->AddMsgItem($this->UT($str, $sourcerule)); } $win->AddTitle('文章内容(测试无法采集分页内容)'); if (empty($bodyrule)) { $win->AddMsgItem('自动规则不需要测试'); } else { $win->AddMsgItem($this->UT($str, $bodyrule)); } $win->AddTitle('分页链接'); if (empty($fyrule)) { $win->AddMsgItem('自动采集分页不需要测试'); } else { $fylink = $this->UT($str, $fyrule); $dhtml = new DedeHtml2(); $dhtml->SetSource($fylink, $first, 'link'); $relink = ''; foreach ($dhtml->Links as $k => $v) { $relink .= $k . "<br>"; } $win->AddMsgItem($relink); } $GLOBALS['winform'] = $win->GetWindow("hand"); $win->Display(); }
function Testlists(&$dourl) { $links = array(); //从RSS中获取网址 if ($this->lists['sourcetype'] == 'rss') { $dourl = $this->lists['rssurl']; $links = GetRssLinks($dourl); return $links; } //正常情况 if (isset($this->lists['url'][0][0])) { $dourl = $this->lists['url'][0][0]; } else { $dourl = ''; $this->errString = "配置中指定列表的网址错误!\r\n"; return $links; } $dhtml = new DedeHtml2(); $html = $this->DownOnePage($dourl); if ($html == '') { $this->errString = "读取网址: {$dourl} 时失败!\r\n"; return $links; } if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') { $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend']; $html = $this->GetHtmlArea('[var:区域]', $areabody, $html); } $t1 = ExecTime(); $dhtml->SetSource($html, $dourl, 'link'); foreach ($dhtml->Links as $s) { if ($this->lists['nothas'] != '') { if (eregi($this->lists['nothas'], $s['link'])) { continue; } } if ($this->lists['musthas'] != '') { if (!eregi($this->lists['musthas'], $s['link'])) { continue; } } $links[] = $s; } return $links; }