Esempio n. 1
0
 public static function crawl($_pid = NULL, $_rid = NULL, $_url = NULL, $_title = NULL)
 {
     ini_get('safe_mode') or set_time_limit(0);
     $sid = spider::$sid;
     if ($sid) {
         $sRs = iDB::row("SELECT * FROM `#iCMS@__spider_url` WHERE `id`='{$sid}' LIMIT 1;");
         $title = $sRs->title;
         $cid = $sRs->cid;
         $pid = $sRs->pid;
         $url = $sRs->url;
         $rid = $sRs->rid;
     } else {
         $rid = spider::$rid;
         $pid = spider::$pid;
         $title = spider::$title;
         $url = spider::$url;
         $_rid === NULL or $rid = $_rid;
         $_pid === NULL or $pid = $_pid;
         $_title === NULL or $title = $_title;
         $_url === NULL or $url = $_url;
     }
     if ($pid) {
         $project = spider::project($pid);
         $prule_list_url = $project['list_url'];
     }
     $ruleA = spider::rule($rid);
     $rule = $ruleA['rule'];
     $dataArray = $rule['data'];
     if ($prule_list_url) {
         $rule['list_url'] = $prule_list_url;
     }
     if (spider::$dataTest) {
         echo "<b>抓取规则信息</b><pre>";
         print_r(iS::escapeStr($ruleA));
         print_r(iS::escapeStr($project));
         echo "</pre><hr />";
     }
     spider::$curl_proxy = $rule['proxy'];
     $responses = array();
     $html = spiderTools::remote($url);
     if (empty($html)) {
         $msg = '错误:001..采集 ' . $url . '文件内容为空!请检查采集规则';
         if (spider::$work == 'shell') {
             echo "{$msg}\n";
             return false;
         } else {
             iPHP::alert($msg);
         }
     }
     //      $http   = spider::check_content_code($html);
     //
     //      if($http['match']==false){
     //          return false;
     //      }
     //      $content        = $http['content'];
     spider::$allHtml = "";
     $rule['__url__'] = spider::$url;
     $responses['reurl'] = spider::$url;
     $responses['__title__'] = $title;
     foreach ((array) $dataArray as $key => $data) {
         $content_html = $html;
         $dname = $data['name'];
         /**
          * [UNSET:name]
          * 注销[name]
          * @var string
          */
         if (strpos($dname, 'UNSET:') !== false) {
             $_dname = str_replace('UNSET:', '', $dname);
             unset($responses[$_dname]);
             continue;
         }
         /**
          * [DATA:name]
          * 把之前[name]处理完的数据当作原始数据
          * 如果之前有数据会叠加
          * 用于数据多次处理
          * @var string
          */
         if (strpos($dname, 'DATA:') !== false) {
             $_dname = str_replace('DATA:', '', $dname);
             $content_html = $responses[$_dname];
             unset($responses[$dname]);
         }
         /**
          * [PRE:name]
          * 把PRE:name采集到的数据 当做原始数据
          * 一般用于下载内容
          * @var string
          */
         $pre_dname = 'PRE:' . $dname;
         if (isset($responses[$pre_dname])) {
             $content_html = $responses[$pre_dname];
             unset($responses[$pre_dname]);
         }
         /**
          * [EMPTY:name]
          * 如果[name]之前抓取结果数据为空使用这个数据项替换
          * @var string
          */
         if (strpos($dname, 'EMPTY:') !== false) {
             $_dname = str_replace('EMPTY:', '', $dname);
             if (empty($responses[$_dname])) {
                 $dname = $_dname;
             } else {
                 //有值不执行抓取
                 continue;
             }
         }
         $content = spiderContent::crawl($content_html, $data, $rule, $responses);
         unset($content_html);
         if (strpos($dname, 'ARRAY:') !== false) {
             // if(strpos($data['rule'], 'RULE@')!==false){
             $dname = str_replace('ARRAY:', '', $dname);
             // $contentArray = $responses[$dname];
             // // $contentArray = $responses[$dname];
             $cArray = array();
             foreach ((array) $content as $k => $value) {
                 foreach ((array) $value as $key => $val) {
                     $cArray[$key][$k] = $val;
                 }
             }
             if ($cArray) {
                 $content = $cArray;
                 unset($cArray);
             }
         }
         /**
          * [name.xxx]
          * 采集内容做为数组
          */
         if (strpos($dname, '.') !== false) {
             $f_key = substr($dname, 0, stripos($dname, "."));
             $s_key = substr(strrchr($dname, "."), 1);
             if (isset($responses[$f_key][$s_key])) {
                 if (is_array($responses[$f_key][$s_key])) {
                     $responses[$f_key][$s_key] = array_merge($responses[$f_key][$s_key], $content);
                 } else {
                     $responses[$f_key][$s_key] .= $content;
                 }
             } else {
                 $responses[$f_key][$s_key] = $content;
             }
         } else {
             /**
              * 多个name 内容合并
              */
             if (isset($responses[$dname])) {
                 if (is_array($responses[$dname])) {
                     $responses[$dname] = array_merge($responses[$dname], $content);
                 } else {
                     $responses[$dname] .= $content;
                 }
             } else {
                 $responses[$dname] = $content;
             }
         }
         /**
          * 对匹配多条的数据去重过滤
          */
         if (!is_array($responses[$dname]) && $data['multi']) {
             if (strpos($responses[$dname], ',') !== false) {
                 $_dnameArray = explode(',', $responses[$dname]);
                 $dnameArray = array();
                 foreach ((array) $_dnameArray as $key => $value) {
                     $value = trim($value);
                     $value && ($dnameArray[] = $value);
                 }
                 $dnameArray = array_filter($dnameArray);
                 $dnameArray = array_unique($dnameArray);
                 $responses[$dname] = implode(',', $dnameArray);
                 unset($dnameArray, $_dnameArray);
             }
         }
         gc_collect_cycles();
     }
     if (isset($responses['title']) && empty($responses['title'])) {
         $responses['title'] = $responses['__title__'];
     }
     spider::$allHtml = null;
     unset($html);
     gc_collect_cycles();
     if (spider::$dataTest) {
         echo "<pre style='width:99%;word-wrap: break-word;'>";
         print_r(iS::escapeStr($responses));
         echo '<hr />';
         echo '使用内存:' . iFS::sizeUnit(memory_get_usage()) . ' 执行时间:' . iPHP::timer_stop() . 's';
         echo "</pre>";
     }
     iFS::$CURLOPT_ENCODING = '';
     iFS::$CURLOPT_REFERER = '';
     iFS::$watermark_config['pos'] = iCMS::$config['watermark']['pos'];
     iFS::$watermark_config['x'] = iCMS::$config['watermark']['x'];
     iFS::$watermark_config['y'] = iCMS::$config['watermark']['y'];
     iFS::$watermark_config['img'] = iCMS::$config['watermark']['img'];
     $rule['fs']['encoding'] && (iFS::$CURLOPT_ENCODING = $rule['fs']['encoding']);
     $rule['fs']['referer'] && (iFS::$CURLOPT_REFERER = $rule['fs']['referer']);
     if ($rule['watermark_mode']) {
         iFS::$watermark_config['pos'] = $rule['watermark']['pos'];
         iFS::$watermark_config['x'] = $rule['watermark']['x'];
         iFS::$watermark_config['y'] = $rule['watermark']['y'];
         $rule['watermark']['img'] && (iFS::$watermark_config['img'] = $rule['watermark']['img']);
     }
     if (spider::$callback['data'] && is_callable(spider::$callback['data'])) {
         $responses = call_user_func_array(spider::$callback['data'], array($responses));
     }
     return $responses;
 }
Esempio n. 2
0
 function do_createArticle($aid = null)
 {
     $category = $this->PG['cid'];
     $startime = $this->PG['startime'];
     $endtime = $this->PG['endtime'];
     $startid = $this->PG['startid'];
     $endid = $this->PG['endid'];
     $perpage = (int) $this->PG['perpage'];
     $offset = (int) $this->PG['offset'];
     $orderby = $this->PG['orderby'];
     $whereSQL = "WHERE `status` ='1'";
     $aid === null && ($aid = $this->PG['aid']);
     if ($aid) {
         $title = self::Article($aid);
         iPHP::success($title . '<hr />生成静态完成!');
     }
     $category[0] == 'all' && ($category = $this->get_category(iCMS_APP_ARTICLE));
     if ($category) {
         $cids = implode(',', (array) $category);
         $whereSQL .= " AND `cid` IN({$cids})";
     }
     $startime && ($whereSQL .= " AND `pubdate`>=UNIX_TIMESTAMP('{$startime} 00:00:00')");
     $endtime && ($whereSQL .= " AND `pubdate`<=UNIX_TIMESTAMP('{$endtime} 23:59:59')");
     $startid && ($whereSQL .= " AND `id`>='{$startid}'");
     $endid && ($whereSQL .= " AND `id`<='{$endid}'");
     $perpage or $perpage = $this->CP;
     $orderby or $orderby = "id DESC";
     $total = iPHP::total(false, "SELECT count(*) FROM `#iCMS@__article` {$whereSQL}", "G");
     $looptimes = ceil($total / $perpage);
     $offset = $this->page * $perpage;
     $rs = iDB::all("SELECT `id` FROM `#iCMS@__article` {$whereSQL} order by {$orderby} LIMIT {$offset},{$perpage}");
     $_count = count($rs);
     $msg = "共<span class='label label-info'>{$total}</span>篇文章,将分成<span class='label label-info'>{$looptimes}</span>次完成<hr />开始执行第<span class='label label-info'>" . ($this->page + 1) . "</span>次生成,共<span class='label label-info'>{$_count}</span>篇<hr />";
     for ($i = 0; $i < $_count; $i++) {
         self::Article($rs[$i]['id']);
         $msg .= '<span class="label label-success">' . $rs[$i]['id'] . ' <i class="fa fa-check"></i></span> ';
     }
     $GLOBALS['page']++;
     $use_time = iPHP::timer_stop();
     $msg .= "<hr />用时<span class='label label-info'>{$use_time}</span>秒";
     $query["total_num"] = $total;
     $query["alltime"] = $this->alltime + $use_time;
     $loopurl = $this->loopurl($looptimes, $query);
     if ($loopurl) {
         $moreBtn = array(array("id" => "btn_stop", "text" => "停止", "url" => APP_URI . "&do=article"), array("id" => "btn_next", "text" => "继续", "src" => $loopurl, "next" => true));
         $dtime = 1;
         $all_time = $looptimes * $use_time + $looptimes + 1;
         $msg .= "<hr />预计全部生成还需要<span class='label label-info'>{$all_time}</span>秒";
     } else {
         $moreBtn = array(array("id" => "btn_next", "text" => "完成", "url" => APP_URI . "&do=article"));
         $dtime = 5;
         $msg .= "<hr />已全部生成完成<hr />总共用时<span class='label label-info'>" . $query["alltime"] . "</span>秒";
     }
     $updateMsg = $this->page ? true : false;
     iPHP::dialog($msg, $loopurl ? "src:" . $loopurl : '', $dtime, $moreBtn, $updateMsg);
 }
Esempio n. 3
0
 * @license http://www.idreamsoft.com iDreamSoft
 * @author coolmoo <*****@*****.**>
 * @$Id: footer.php 2381 2014-03-21 04:03:07Z coolmoo $
 */
defined('iPHP') or exit('What are you doing?');
//var_dump(iMember::$cpower);
$memory = memory_get_usage();
?>
  <div class="clearfloat"></div>
  <div class="iCMS-container hide">
    <span class="label label-success">
      使用内存:<?php 
echo iFS::sizeUnit($memory);
?>
 执行时间:<?php 
echo iPHP::timer_stop();
?>
 s
    </span>
  </div>
</div>
<a id="scrollUp" href="#top"></a>
<div class="iCMS-batch">
  <div id="weightBatch">
    <div class="input-prepend"><span class="add-on">权重</span>
      <input type="text" class="span2" name="mweight"/>
    </div>
  </div>
  <div id="keywordBatch">
    <div class="input-prepend input-append"><span class="add-on">关键字</span>
      <input type="text" class="span2" name="mkeyword"/>