Exemplo n.º 1
0
 public static function match($html, $data, $rule)
 {
     $match_hash = array();
     if ($data['dom']) {
         iPHP::import(iPHP_LIB . '/phpQuery.php');
         spider::$dataTest && $_GET['pq_debug'] && (phpQuery::$debug = 1);
         $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
         if (strpos($data['rule'], '@') !== false) {
             list($content_dom, $content_attr) = explode("@", $data['rule']);
             $content_fun = 'attr';
         } else {
             list($content_dom, $content_fun, $content_attr) = explode("\n", $data['rule']);
         }
         $content_dom = trim($content_dom);
         $content_fun = trim($content_fun);
         $content_attr = trim($content_attr);
         $content_fun or $content_fun = 'html';
         if ($data['multi']) {
             $conArray = array();
             $_content = null;
             foreach ($doc[$content_dom] as $doc_key => $doc_value) {
                 if ($content_attr) {
                     $_content = phpQuery::pq($doc_value)->{$content_fun}($content_attr);
                 } else {
                     $_content = phpQuery::pq($doc_value)->{$content_fun}();
                 }
                 $cmd5 = md5($_content);
                 if ($match_hash[$cmd5]) {
                     break;
                 }
                 $conArray[$doc_key] = $_content;
                 $match_hash[$cmd5] = true;
             }
             if (spider::$dataTest) {
                 echo "<b>多条匹配结果:</b><pre>";
                 print_r($match_hash);
                 echo "</pre><hr />";
             }
             $content = implode('#--iCMS.PageBreak--#', $conArray);
             unset($conArray, $_content, $match_hash);
         } else {
             if ($content_attr) {
                 $content = $doc[$content_dom]->{$content_fun}($content_attr);
             } else {
                 $content = $doc[$content_dom]->{$content_fun}();
             }
         }
         phpQuery::unloadDocuments($doc->getDocumentID());
         unset($doc);
     } else {
         if (trim($data['rule']) == '<%content%>') {
             $content = $html;
         } else {
             $data_rule = spiderTools::pregTag($data['rule']);
             if (preg_match('/(<\\w+>|\\.\\*|\\.\\+|\\\\d|\\\\w)/i', $data_rule)) {
                 if ($data['multi']) {
                     preg_match_all('|' . $data_rule . '|is', $html, $matches, PREG_SET_ORDER);
                     $conArray = array();
                     foreach ((array) $matches as $mkey => $mat) {
                         $cmd5 = md5($mat['content']);
                         if ($match_hash[$cmd5]) {
                             break;
                         }
                         $conArray[$mkey] = $mat['content'];
                         $match_hash[$cmd5] = true;
                     }
                     if (spider::$dataTest) {
                         echo "<b>多条匹配结果:</b><pre>";
                         print_r($match_hash);
                         echo "</pre><hr />";
                     }
                     $content = implode('#--iCMS.PageBreak--#', $conArray);
                     unset($conArray, $match_hash);
                 } else {
                     preg_match('|' . $data_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                     $content = $matches['content'];
                 }
             } else {
                 $content = $data_rule;
             }
         }
     }
     return $content;
 }
Exemplo n.º 2
0
 function do_proxy_test()
 {
     $a = spiderTools::proxy_test();
     var_dump($a);
 }
Exemplo n.º 3
0
    echo md5($furl);
    ?>
" /></th>
            <th colspan="3"><?php 
    echo $furl;
    ?>
</th>
          </tr>
        </thead>
        <tbody class="spider-list" id="spider-list-<?php 
    echo md5($furl);
    ?>
">
    <?php 
    foreach ($lists as $lkey => $row) {
        list($_title, $_url) = spiderTools::title_url($row, $rule, $furl);
        if ($_url === false) {
            continue;
        }
        $hash = md5($_url);
        if (spider::checker($work, $pid, $_url, $_title) === true) {
            ?>
          <tr id="<?php 
            echo $hash;
            ?>
">
            <td><input type="checkbox" name="pub[]" value="<?php 
            echo $cid;
            ?>
|<?php 
            echo $pid;
Exemplo n.º 4
0
 public static function proxy_test()
 {
     $options = array(CURLOPT_URL => 'http://www.baidu.com', CURLOPT_REFERER => 'http://www.baidu.com', CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)', CURLOPT_TIMEOUT => 10, CURLOPT_CONNECTTIMEOUT => 8, CURLOPT_RETURNTRANSFER => 1, CURLOPT_HEADER => 0, CURLOPT_NOSIGNAL => true, CURLOPT_DNS_USE_GLOBAL_CACHE => true, CURLOPT_DNS_CACHE_TIMEOUT => 86400, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_SSL_VERIFYHOST => false);
     if (empty(spider::$proxy_array)) {
         if (empty(spider::$curl_proxy)) {
             return false;
         }
         spider::$proxy_array = explode("\n", spider::$curl_proxy);
         // socks5://127.0.0.1:1080@username:password
     }
     if (empty(spider::$proxy_array)) {
         return false;
     }
     $rand_keys = array_rand(spider::$proxy_array, 1);
     $proxy = spider::$proxy_array[$rand_keys];
     $proxy = trim($proxy);
     $options = spiderTools::proxy($options, $proxy);
     $ch = curl_init();
     curl_setopt_array($ch, $options);
     curl_exec($ch);
     $info = curl_getinfo($ch);
     curl_close($ch);
     if ($info['http_code'] == 200) {
         return $proxy;
     } else {
         unset(spider::$proxy_array[$rand_keys]);
         return spiderTools::proxy_test();
     }
 }
Exemplo n.º 5
0
 public static function crawl($_pid = NULL, $_rid = NULL, $_url = NULL, $_title = NULL)
 {
     ini_get('safe_mode') or set_time_limit(0);
     $sid = spider::$sid;
     if ($sid) {
         $sRs = iDB::row("SELECT * FROM `#iCMS@__spider_url` WHERE `id`='{$sid}' LIMIT 1;");
         $title = $sRs->title;
         $cid = $sRs->cid;
         $pid = $sRs->pid;
         $url = $sRs->url;
         $rid = $sRs->rid;
     } else {
         $rid = spider::$rid;
         $pid = spider::$pid;
         $title = spider::$title;
         $url = spider::$url;
         $_rid === NULL or $rid = $_rid;
         $_pid === NULL or $pid = $_pid;
         $_title === NULL or $title = $_title;
         $_url === NULL or $url = $_url;
     }
     if ($pid) {
         $project = spider::project($pid);
         $prule_list_url = $project['list_url'];
     }
     $ruleA = spider::rule($rid);
     $rule = $ruleA['rule'];
     $dataArray = $rule['data'];
     if ($prule_list_url) {
         $rule['list_url'] = $prule_list_url;
     }
     if (spider::$dataTest) {
         echo "<b>抓取规则信息</b><pre>";
         print_r(iS::escapeStr($ruleA));
         print_r(iS::escapeStr($project));
         echo "</pre><hr />";
     }
     spider::$curl_proxy = $rule['proxy'];
     $responses = array();
     $html = spiderTools::remote($url);
     if (empty($html)) {
         $msg = '错误:001..采集 ' . $url . '文件内容为空!请检查采集规则';
         if (spider::$work == 'shell') {
             echo "{$msg}\n";
             return false;
         } else {
             iPHP::alert($msg);
         }
     }
     //      $http   = spider::check_content_code($html);
     //
     //      if($http['match']==false){
     //          return false;
     //      }
     //      $content        = $http['content'];
     spider::$allHtml = "";
     $rule['__url__'] = spider::$url;
     $responses['reurl'] = spider::$url;
     $responses['__title__'] = $title;
     foreach ((array) $dataArray as $key => $data) {
         $content_html = $html;
         $dname = $data['name'];
         /**
          * [UNSET:name]
          * 注销[name]
          * @var string
          */
         if (strpos($dname, 'UNSET:') !== false) {
             $_dname = str_replace('UNSET:', '', $dname);
             unset($responses[$_dname]);
             continue;
         }
         /**
          * [DATA:name]
          * 把之前[name]处理完的数据当作原始数据
          * 如果之前有数据会叠加
          * 用于数据多次处理
          * @var string
          */
         if (strpos($dname, 'DATA:') !== false) {
             $_dname = str_replace('DATA:', '', $dname);
             $content_html = $responses[$_dname];
             unset($responses[$dname]);
         }
         /**
          * [PRE:name]
          * 把PRE:name采集到的数据 当做原始数据
          * 一般用于下载内容
          * @var string
          */
         $pre_dname = 'PRE:' . $dname;
         if (isset($responses[$pre_dname])) {
             $content_html = $responses[$pre_dname];
             unset($responses[$pre_dname]);
         }
         /**
          * [EMPTY:name]
          * 如果[name]之前抓取结果数据为空使用这个数据项替换
          * @var string
          */
         if (strpos($dname, 'EMPTY:') !== false) {
             $_dname = str_replace('EMPTY:', '', $dname);
             if (empty($responses[$_dname])) {
                 $dname = $_dname;
             } else {
                 //有值不执行抓取
                 continue;
             }
         }
         $content = spiderContent::crawl($content_html, $data, $rule, $responses);
         unset($content_html);
         if (strpos($dname, 'ARRAY:') !== false) {
             // if(strpos($data['rule'], 'RULE@')!==false){
             $dname = str_replace('ARRAY:', '', $dname);
             // $contentArray = $responses[$dname];
             // // $contentArray = $responses[$dname];
             $cArray = array();
             foreach ((array) $content as $k => $value) {
                 foreach ((array) $value as $key => $val) {
                     $cArray[$key][$k] = $val;
                 }
             }
             if ($cArray) {
                 $content = $cArray;
                 unset($cArray);
             }
         }
         /**
          * [name.xxx]
          * 采集内容做为数组
          */
         if (strpos($dname, '.') !== false) {
             $f_key = substr($dname, 0, stripos($dname, "."));
             $s_key = substr(strrchr($dname, "."), 1);
             if (isset($responses[$f_key][$s_key])) {
                 if (is_array($responses[$f_key][$s_key])) {
                     $responses[$f_key][$s_key] = array_merge($responses[$f_key][$s_key], $content);
                 } else {
                     $responses[$f_key][$s_key] .= $content;
                 }
             } else {
                 $responses[$f_key][$s_key] = $content;
             }
         } else {
             /**
              * 多个name 内容合并
              */
             if (isset($responses[$dname])) {
                 if (is_array($responses[$dname])) {
                     $responses[$dname] = array_merge($responses[$dname], $content);
                 } else {
                     $responses[$dname] .= $content;
                 }
             } else {
                 $responses[$dname] = $content;
             }
         }
         /**
          * 对匹配多条的数据去重过滤
          */
         if (!is_array($responses[$dname]) && $data['multi']) {
             if (strpos($responses[$dname], ',') !== false) {
                 $_dnameArray = explode(',', $responses[$dname]);
                 $dnameArray = array();
                 foreach ((array) $_dnameArray as $key => $value) {
                     $value = trim($value);
                     $value && ($dnameArray[] = $value);
                 }
                 $dnameArray = array_filter($dnameArray);
                 $dnameArray = array_unique($dnameArray);
                 $responses[$dname] = implode(',', $dnameArray);
                 unset($dnameArray, $_dnameArray);
             }
         }
         gc_collect_cycles();
     }
     if (isset($responses['title']) && empty($responses['title'])) {
         $responses['title'] = $responses['__title__'];
     }
     spider::$allHtml = null;
     unset($html);
     gc_collect_cycles();
     if (spider::$dataTest) {
         echo "<pre style='width:99%;word-wrap: break-word;'>";
         print_r(iS::escapeStr($responses));
         echo '<hr />';
         echo '使用内存:' . iFS::sizeUnit(memory_get_usage()) . ' 执行时间:' . iPHP::timer_stop() . 's';
         echo "</pre>";
     }
     iFS::$CURLOPT_ENCODING = '';
     iFS::$CURLOPT_REFERER = '';
     iFS::$watermark_config['pos'] = iCMS::$config['watermark']['pos'];
     iFS::$watermark_config['x'] = iCMS::$config['watermark']['x'];
     iFS::$watermark_config['y'] = iCMS::$config['watermark']['y'];
     iFS::$watermark_config['img'] = iCMS::$config['watermark']['img'];
     $rule['fs']['encoding'] && (iFS::$CURLOPT_ENCODING = $rule['fs']['encoding']);
     $rule['fs']['referer'] && (iFS::$CURLOPT_REFERER = $rule['fs']['referer']);
     if ($rule['watermark_mode']) {
         iFS::$watermark_config['pos'] = $rule['watermark']['pos'];
         iFS::$watermark_config['x'] = $rule['watermark']['x'];
         iFS::$watermark_config['y'] = $rule['watermark']['y'];
         $rule['watermark']['img'] && (iFS::$watermark_config['img'] = $rule['watermark']['img']);
     }
     if (spider::$callback['data'] && is_callable(spider::$callback['data'])) {
         $responses = call_user_func_array(spider::$callback['data'], array($responses));
     }
     return $responses;
 }
Exemplo n.º 6
0
 public static function crawl($work = NULL, $pid = NULL, $_rid = NULL, $_urls = null, $callback = null)
 {
     $pid === NULL && ($pid = spider::$pid);
     if ($pid) {
         $project = spider::project($pid);
         $cid = $project['cid'];
         $rid = $project['rid'];
         $prule_list_url = $project['list_url'];
         $lastupdate = $project['lastupdate'];
     } else {
         $cid = spider::$cid;
         $rid = spider::$rid;
     }
     if (empty($rid) && $_rid !== NULL) {
         $rid = $_rid;
     }
     if ($work == 'shell') {
         $lastupdate = $project['lastupdate'];
         if ($project['psleep']) {
             if (time() - $lastupdate < $project['psleep']) {
                 echo '采集方案[' . $pid . "]:" . format_date($lastupdate) . "刚采集过了,请" . $project['psleep'] / 3600 . "小时后在继续采集\n";
                 return;
             }
         }
         echo "开始采集方案[" . $pid . "] 采集规则[" . $rid . "]\n";
     }
     $ruleA = spider::rule($rid);
     $rule = $ruleA['rule'];
     $urls = $rule['list_urls'];
     $project['urls'] && ($urls = $project['urls']);
     spiderUrls::$urls && ($urls = spiderUrls::$urls);
     $_urls && ($urls = $_urls);
     $urlsArray = explode("\n", $urls);
     $urlsArray = array_filter($urlsArray);
     $_urlsArray = $urlsArray;
     $urlsList = array();
     if ($work == 'shell') {
         // echo "$urls\n";
         print_r($urlsArray);
     }
     foreach ($_urlsArray as $_key => $_url) {
         $_url = htmlspecialchars_decode($_url);
         $_urlsList = array();
         /**
          * RULE@rid@url
          * url使用[rid]规则采集并返回列表结果
          */
         if (strpos($_url, 'RULE@') !== false) {
             list($___s, $_rid, $_urls) = explode('@', $_url);
             if (spider::$ruleTest) {
                 print_r('<b>使用[rid:' . $_rid . ']规则抓取列表</b>:' . $_urls);
                 echo "<hr />";
             }
             $_urlsList = spiderUrls::crawl($work, false, $_rid, $_urls, 'CALLBACK@URL');
             $urlsList = array_merge($urlsList, $_urlsList);
             unset($urlsArray[$_key]);
         } else {
             preg_match('|.*<(.*)>.*|is', $_url, $_matches);
             if ($_matches) {
                 list($format, $begin, $num, $step, $zeroize, $reverse) = explode(',', $_matches[1]);
                 $url = str_replace($_matches[1], '*', trim($_matches[0]));
                 $_urlsList = spiderTools::mkurls($url, $format, $begin, $num, $step, $zeroize, $reverse);
                 unset($urlsArray[$_key]);
                 $urlsList = array_merge($urlsList, $_urlsList);
             }
         }
     }
     $urlsList && ($urlsArray = array_merge($urlsArray, $urlsList));
     unset($_urlsArray, $_key, $_url, $_matches, $_urlsList, $urlsList);
     $urlsArray = array_unique($urlsArray);
     // spider::$useragent = $rule['user_agent'];
     // spider::$encoding  = $rule['curl']['encoding'];
     // spider::$referer   = $rule['curl']['referer'];
     // spider::$charset   = $rule['charset'];
     if (empty($urlsArray)) {
         if ($work == 'shell') {
             echo "采集列表为空!请填写!\n";
             return false;
         }
         iPHP::alert('采集列表为空!请填写!', 'js:parent.window.iCMS_MODAL.destroy();');
     }
     //      if(spider::$ruleTest){
     //          echo "<pre>";
     //          print_r(iS::escapeStr($project));
     //          print_r(iS::escapeStr($rule));
     //          echo "</pre>";
     //          echo "<hr />";
     //      }
     if ($rule['mode'] == "2") {
         iPHP::import(iPHP_LIB . '/phpQuery.php');
         spider::$ruleTest && $_GET['pq_debug'] && (phpQuery::$debug = 1);
     }
     $pubArray = array();
     $pubCount = array();
     $pubAllCount = array();
     spider::$curl_proxy = $rule['proxy'];
     spider::$urlslast = null;
     foreach ($urlsArray as $key => $url) {
         $url = trim($url);
         spider::$urlslast = $url;
         if ($work == 'shell') {
             echo '开始采集列表:' . $url . "\n";
         }
         if (spider::$ruleTest) {
             echo '<b>抓取列表:</b>' . $url . "<br />";
         }
         $html = spiderTools::remote($url);
         if (empty($html)) {
             continue;
         }
         if ($rule['mode'] == "2") {
             $doc = phpQuery::newDocumentHTML($html, 'UTF-8');
             $list_area = $doc[trim($rule['list_area_rule'])];
             // if(strpos($rule['list_area_format'], 'DOM::')!==false){
             //     $list_area = spiderTools::dataClean($rule['list_area_format'], $list_area);
             // }
             if ($rule['list_area_format']) {
                 $list_area_format = trim($rule['list_area_format']);
                 if (strpos($list_area_format, 'ARRAY::') !== false) {
                     $list_area_format = str_replace('ARRAY::', '', $list_area_format);
                     $lists = array();
                     foreach ($list_area as $la_key => $la) {
                         $lists[] = phpQuery::pq($list_area_format, $la);
                     }
                 } else {
                     $lists = phpQuery::pq($list_area_format, $list_area);
                 }
             } else {
                 $lists = $list_area;
             }
             // $lists = $list_area;
             //echo 'list:getDocumentID:'.$lists->getDocumentID()."\n";
         } else {
             $list_area_rule = spiderTools::pregTag($rule['list_area_rule']);
             if ($list_area_rule) {
                 preg_match('|' . $list_area_rule . '|is', $html, $matches, $PREG_SET_ORDER);
                 $list_area = $matches['content'];
             } else {
                 $list_area = $html;
             }
             $html = null;
             unset($html);
             if (spider::$ruleTest) {
                 echo iS::escapeStr($rule['list_area_rule']);
                 //              echo iS::escapeStr($list_area);
                 echo "<hr />";
             }
             if ($rule['list_area_format']) {
                 $list_area = spiderTools::dataClean($rule['list_area_format'], $list_area);
             }
             preg_match_all('|' . spiderTools::pregTag($rule['list_url_rule']) . '|is', $list_area, $lists, PREG_SET_ORDER);
             $list_area = null;
             unset($list_area);
             if ($rule['sort'] == "1") {
                 //arsort($lists);
             } elseif ($rule['sort'] == "2") {
                 asort($lists);
             } elseif ($rule['sort'] == "3") {
                 shuffle($lists);
             }
         }
         if (spider::$ruleTest) {
             echo '<b>列表区域规则:</b>' . iS::escapeStr($rule['list_area_rule']);
             echo "<hr />";
             echo '<b>列表区域抓取结果:</b>' . iS::escapeStr($list_area);
             echo "<hr />";
             echo '<b>列表链接规则:</b>' . iS::escapeStr($rule['list_url_rule']);
             echo "<hr />";
             echo '<b>网址合成规则:</b>' . iS::escapeStr($rule['list_url']);
             echo "<hr />";
         }
         if ($prule_list_url) {
             $rule['list_url'] = $prule_list_url;
         }
         //PID@xx 返回URL列表
         if ($callback == 'CALLBACK@URL') {
             $cbListUrl = array();
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 // if(spider::checker($work)===true){
                 $cbListUrl[] = spider::$url;
                 // }
             }
             return $cbListUrl;
         }
         if ($work == "shell") {
             $pubCount[$url]['count'] = count($lists);
             $pubAllCount['count'] += $pubCount[$url]['count'];
             echo "开始采集:" . $url . " 列表 " . $pubCount[$url]['count'] . "条记录\n";
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 $hash = md5(spider::$url);
                 echo "title:" . spider::$title . "\n";
                 echo "url:" . spider::$url . "\n";
                 spider::$rid = $rid;
                 $checker = spider::checker($work);
                 if ($checker === true) {
                     echo "开始采集....";
                     $callback = spider::publish("shell");
                     if ($callback['code'] == "1001") {
                         $pubCount[$url]['success']++;
                         $pubAllCount['success']++;
                         echo "....√\n";
                         if ($project['sleep']) {
                             echo "sleep:" . $project['sleep'] . "s\n";
                             if ($rule['mode'] != "2") {
                                 unset($lists[$lkey]);
                             }
                             gc_collect_cycles();
                             sleep($project['sleep']);
                         } else {
                             //sleep(1);
                         }
                     } else {
                         $pubCount[$url]['error']++;
                         $pubAllCount['error']++;
                         echo "error\n\n";
                         continue;
                     }
                 }
                 $pubCount[$url]['published']++;
                 $pubAllCount['published']++;
             }
             if ($rule['mode'] == "2") {
                 phpQuery::unloadDocuments($doc->getDocumentID());
             } else {
                 unset($lists);
             }
         }
         if ($work == "WEB@MANUAL") {
             $listsArray[$url] = $lists;
         }
         if ($work == "WEB@AUTO" || $work == 'DATA@RULE') {
             foreach ($lists as $lkey => $row) {
                 list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $url);
                 if (spider::$url === false) {
                     continue;
                 }
                 $hash = md5(spider::$url);
                 if (spider::$ruleTest) {
                     echo '<b>列表抓取结果:</b>' . $lkey . '<br />';
                     echo spider::$title . ' (<a href="' . APP_URI . '&do=testdata' . '&url=' . urlencode(spider::$url) . '&rid=' . $rid . '&pid=' . $pid . '&title=' . urlencode(spider::$title) . '" target="_blank">测试内容规则</a>) <br />';
                     echo spider::$url . "<br />";
                     echo $hash . "<br /><hr />";
                 } else {
                     if (spider::checker($work) === true || spider::$dataTest) {
                         $suData = array('sid' => 0, 'url' => spider::$url, 'title' => spider::$title, 'cid' => $cid, 'rid' => $rid, 'pid' => $pid, 'hash' => $hash);
                         switch ($work) {
                             case 'DATA@RULE':
                                 $contentArray[$lkey] = spiderData::crawl();
                                 // $contentArray[$lkey] = spiderUrls::crawl($work,$_pid);
                                 unset($suData['sid']);
                                 $suData['title'] = addslashes($suData['title']);
                                 $suData += array('addtime' => time(), 'status' => '2', 'publish' => '2', 'indexid' => '0', 'pubdate' => '0');
                                 spider::$dataTest or $suid = iDB::insert('spider_url', $suData);
                                 $contentArray[$lkey]['spider_url'] = $suid;
                                 break;
                             case 'WEB@AUTO':
                                 $pubArray[] = $suData;
                                 break;
                         }
                     }
                 }
             }
         }
     }
     $lists = null;
     unset($lists);
     gc_collect_cycles();
     switch ($work) {
         case 'WEB@AUTO':
             return $pubArray;
             break;
         case 'DATA@RULE':
             return $contentArray;
             break;
         case 'WEB@MANUAL':
             return array('cid' => $cid, 'rid' => $rid, 'pid' => $pid, 'sid' => $sid, 'work' => $work, 'rule' => $rule, 'listsArray' => $listsArray);
             break;
         case "shell":
             echo "采集数据统结果:\n";
             print_r($pubCount);
             print_r($pubAllCount);
             echo "全部采集完成....\n";
             iDB::update('spider_project', array('lastupdate' => time()), array('id' => $pid));
             break;
     }
 }
Exemplo n.º 7
0
    echo md5($furl);
    ?>
" /></th>
            <th colspan="3"><?php 
    echo $furl;
    ?>
</th>
          </tr>
        </thead>
        <tbody class="spider-list" id="spider-list-<?php 
    echo md5($furl);
    ?>
">
    <?php 
    foreach ($lists as $lkey => $row) {
        list(spider::$title, spider::$url) = spiderTools::title_url($row, $rule, $furl);
        if (spider::$url === false) {
            continue;
        }
        $hash = md5(spider::$url);
        if (spider::checker($work) === true) {
            ?>
          <tr id="<?php 
            echo $hash;
            ?>
">
            <td><input type="checkbox" name="pub[]" value="<?php 
            echo $cid;
            ?>
|<?php 
            echo $pid;