コード例 #1
0
 function Testlists(&$dourl)
 {
     $links = array();
     //从RSS中获取网址
     if ($this->lists['sourcetype'] == 'rss') {
         $dourl = $this->lists['rssurl'];
         $links = GetRssLinks($dourl);
         return $links;
     }
     //正常情况
     if (isset($this->lists['url'][0][0])) {
         $dourl = $this->lists['url'][0][0];
     } else {
         $dourl = '';
         $this->errString = "配置中指定列表的网址错误!\r\n";
         return $links;
     }
     $dhtml = new DedeHtml2();
     $html = $this->DownOnePage($dourl);
     if ($html == '') {
         $this->errString = "读取网址: {$dourl} 时失败!\r\n";
         return $links;
     }
     if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') {
         $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend'];
         $html = $this->GetHtmlArea('[var:区域]', $areabody, $html);
     }
     $t1 = ExecTime();
     $dhtml->SetSource($html, $dourl, 'link');
     $this->lists['musthas'] = str_replace('/', '\\/', $this->lists['musthas']);
     foreach ($dhtml->Links as $s) {
         if ($this->lists['nothas'] != '') {
             if (preg_match("#" . $this->lists['nothas'] . "#i", $s['link'])) {
                 continue;
             }
         }
         if ($this->lists['musthas'] != '') {
             if (strstr($this->lists['musthas'], "|")) {
                 $musthas = explode('|', $this->lists['musthas']);
                 $musthas_a = $musthas[0];
                 $musthas_b = $musthas[1];
                 if (!preg_match("#" . $musthas_a . "#i", $s['link'])) {
                     continue;
                 }
             } else {
                 if (!preg_match("#" . $this->lists['musthas'] . "#i", $s['link'])) {
                     continue;
                 }
             }
             if (strstr($musthas_b, ",")) {
                 $musthas_b = str_replace('\\/', '/', $musthas_b);
                 $urlchange = explode(',', $musthas_b);
                 $urlchange_a = str_replace('/', '\\/', $urlchange[0]);
                 $urlchange_a = "/" . $urlchange_a . "/";
                 $urlchange_b = $urlchange[1];
                 $s['link'] = preg_replace($urlchange_a, $urlchange_b, $s['link']);
             }
         }
         $links[] = $s;
     }
     return $links;
 }
コード例 #2
0
$playtime = "$tm 分 $ts 秒";
$width  = GetAlabNum($width);
$height = GetAlabNum($height);
//$flashurl = "";


//处理远程的Flash
//------------------
$rmflash = "";
if(empty($downremote)) $downremote = 0;

//直接从远程粘贴
if(eregi("embed",$remoteflash)){
	$remoteflash = stripslashes($remoteflash);
	require_once(dirname(__FILE__)."/../../include/pub_dedehtml2.php");
	$dml = new DedeHtml2();
	$dml->GetLinkType = "media";
	$dml->SetSource($remoteflash,"",false);
	$marr = $dml->Medias;
	$rmfalsh = "";
	if(!is_array($marr)) $rmfalsh = "";
	else{
		if(count($marr)==1)
		{
			foreach($marr as $k=>$v){
			  $rmfalsh = $k;
			  break;
		  }
		}
		else
		{
コード例 #3
0
	function TestList()
	{
		if(isset($this->List["url"][0])) $dourl = $this->List["url"][0];
		else{
				echo "配置中指定列表的网址错误!\r\n";
	  		return ;
		}
		if($this->List["sourcetype"]=="archives")
		{
			echo "配置中指定的源参数为文档的原始URL:\r\n";
			$i=0;
			$v = "";
			foreach($this->List["url"] as $v){
				echo $v."\r\n"; $i++; if($i>9) break;
			}
			return $v;
		}
		$dhtml = new DedeHtml2();
		$html = $this->DownOnePage($dourl);

	//$html = str_replace('" class="tool comments">','?999" class="tool comments">',$html);

		if($html==""){
			echo "读取其中的一个网址: $dourl 时失败!\r\n";
			return ;
		}
		if(trim($this->List["linkarea"])!=""&&trim($this->List["linkarea"])!="[var:区域]"){
			$html = $this->GetHtmlArea("[var:区域]",$this->List["linkarea"],$html);
		}
		
		$dhtml->GetLinkType = "link";
		$dhtml->SetSource($html,$dourl,false);
		
		$testpage = "";
		$TestPage = "";
		
		if(is_array($dhtml->Links))
		{
			echo "按指定规则在 $dourl 发现的网址:\r\n";
			echo $this->List["need"];
			foreach($dhtml->Links as $k=>$v)
			{
				$k =  $dhtml->FillUrl($k);
				if($this->List["need"]!="")
				{
					if(eregi($this->List["need"],$k))
					{
						if($this->List["cannot"]==""
						||!eregi($this->List["cannot"],$k)){
							echo "$k - ".$v."\r\n";
							$TestPage = $k;
						}
					}//eg1
				}else{
					echo "$k - ".$v."\r\n";
					$TestPage = $k;
				}
			}//foreach
		}else{
			echo "分析网页的HTML时失败!\r\n";
			return ;
		}
		return $TestPage;
	}
コード例 #4
0
ファイル: cjx.class.php プロジェクト: soonfine/leread
 /**
  * caijixia for dedecms
  * @version        $Id: cjx.class.php 112 2013-05-28 01:22:57Z qinjinpeng $
  * @copyright Copyright (c) 2011,caijixia for dedecms,caijixia.com.
  * @license   This is NOT a freeware, use is subject to license terms
  *
  * @param     NULL
  * @return    NULL
 */
 function ac_testregx()
 {
     global $cfg_soft_lang;
     $data = stripslashes($this->GV('data'));
     if ($cfg_soft_lang != 'utf-8') {
         $data = utf82gb($data);
     }
     require_once DEDEINC . '/dedetag.class.php';
     $this->dtp = new DedeTagParse();
     $this->dtp->LoadString($data);
     foreach ($this->dtp->CTags as $ctag) {
         $itemName = $ctag->TagName;
         ${$itemName} = trim($ctag->InnerText);
     }
     if (empty($list) || empty($page) || $list == 'http://' || $page == 'http://') {
         exit('规则错误');
     }
     $listarr = array();
     if (preg_match("/\\[([0-9]*-[0-9]*)\\]/", $list, $out)) {
         list($min, $max) = explode('-', $out[1]);
         if ($max - $min > 10) {
             $max = $min + 9;
         }
         for ($i = $min; $i <= $max; $i++) {
             $listarr[] = preg_replace("/\\[([0-9]*-[0-9]*)\\]/", $i, $list);
         }
         $list = preg_replace("/\\[([0-9]*-[0-9]*)\\]/", $min, $list);
     } else {
         $listarr[] = $list;
     }
     $str = $this->downfile($list);
     if ($cfg_soft_lang != $charset) {
         if ($charset == 'utf-8') {
             $str = utf82gb($str);
         } else {
             $str = gb2utf8($str);
         }
     }
     $page = str_replace('(*)', '###', $page);
     $page = preg_quote($page, '/');
     $page = str_replace('###', '([0-9a-zA-Z\\.\\-\\/_]*)', $page);
     $dhtml = new DedeHtml2();
     $dhtml->SetSource($str, $list, 'link');
     $lss = array();
     $i = 0;
     foreach ($dhtml->Links as $s) {
         if (preg_match('/' . $page . '/iU', $s['link'])) {
             if (!isset($lss[$s['link']])) {
                 if (!isset($first)) {
                     $first = $s['link'];
                 }
                 $lss[$s['link']] = $s['link'];
                 $i++;
                 if ($i == 10) {
                     break;
                 }
             }
         }
     }
     $msg = '';
     $GLOBALS['wintitle'] = "采集侠-测试定向采集规则";
     $GLOBALS['wecome_info'] = "采集侠定向采集::采集规则测试";
     $win = new OxWindow();
     $win->AddTitle('匹配到的列表地址(前10个)');
     foreach ($listarr as $v) {
         $msg .= $v . "<br>";
     }
     $win->AddMsgItem($msg);
     $win->AddTitle('第一个列表页匹配到的文章地址(前10个)');
     $msg = '';
     foreach ($lss as $v) {
         $msg .= $v . "<br>";
     }
     $win->AddMsgItem($msg);
     $str = $this->downfile($first);
     if ($cfg_soft_lang != $charset) {
         if ($charset == 'utf-8') {
             $str = utf82gb($str);
         } else {
             $str = gb2utf8($str);
         }
     }
     $win->AddTitle("<font color=black>测试采集第一篇文章:{$first} </font>");
     $win->AddTitle('文章标题');
     if (empty($titlerule)) {
         $win->AddMsgItem('自动规则不需要测试');
     } else {
         $win->AddMsgItem($this->UT($str, $titlerule));
     }
     $win->AddTitle('作者');
     if (empty($authorrule)) {
         $win->AddMsgItem('自动规则不需要测试');
     } else {
         $win->AddMsgItem($this->UT($str, $authorrule));
     }
     $win->AddTitle('来源');
     if (empty($sourcerule)) {
         $win->AddMsgItem('自动规则不需要测试');
     } else {
         $win->AddMsgItem($this->UT($str, $sourcerule));
     }
     $win->AddTitle('文章内容(测试无法采集分页内容)');
     if (empty($bodyrule)) {
         $win->AddMsgItem('自动规则不需要测试');
     } else {
         $win->AddMsgItem($this->UT($str, $bodyrule));
     }
     $win->AddTitle('分页链接');
     if (empty($fyrule)) {
         $win->AddMsgItem('自动采集分页不需要测试');
     } else {
         $fylink = $this->UT($str, $fyrule);
         $dhtml = new DedeHtml2();
         $dhtml->SetSource($fylink, $first, 'link');
         $relink = '';
         foreach ($dhtml->Links as $k => $v) {
             $relink .= $k . "<br>";
         }
         $win->AddMsgItem($relink);
     }
     $GLOBALS['winform'] = $win->GetWindow("hand");
     $win->Display();
 }
コード例 #5
0
 function Testlists(&$dourl)
 {
     $links = array();
     //从RSS中获取网址
     if ($this->lists['sourcetype'] == 'rss') {
         $dourl = $this->lists['rssurl'];
         $links = GetRssLinks($dourl);
         return $links;
     }
     //正常情况
     if (isset($this->lists['url'][0][0])) {
         $dourl = $this->lists['url'][0][0];
     } else {
         $dourl = '';
         $this->errString = "配置中指定列表的网址错误!\r\n";
         return $links;
     }
     $dhtml = new DedeHtml2();
     $html = $this->DownOnePage($dourl);
     if ($html == '') {
         $this->errString = "读取网址: {$dourl} 时失败!\r\n";
         return $links;
     }
     if (trim($this->lists['areastart']) != '' && trim($this->lists['areaend']) != '') {
         $areabody = $this->lists['areastart'] . '[var:区域]' . $this->lists['areaend'];
         $html = $this->GetHtmlArea('[var:区域]', $areabody, $html);
     }
     $t1 = ExecTime();
     $dhtml->SetSource($html, $dourl, 'link');
     foreach ($dhtml->Links as $s) {
         if ($this->lists['nothas'] != '') {
             if (eregi($this->lists['nothas'], $s['link'])) {
                 continue;
             }
         }
         if ($this->lists['musthas'] != '') {
             if (!eregi($this->lists['musthas'], $s['link'])) {
                 continue;
             }
         }
         $links[] = $s;
     }
     return $links;
 }