public static function crawl($_pid = NULL, $_rid = NULL, $_url = NULL, $_title = NULL) { ini_get('safe_mode') or set_time_limit(0); $sid = spider::$sid; if ($sid) { $sRs = iDB::row("SELECT * FROM `#iCMS@__spider_url` WHERE `id`='{$sid}' LIMIT 1;"); $title = $sRs->title; $cid = $sRs->cid; $pid = $sRs->pid; $url = $sRs->url; $rid = $sRs->rid; } else { $rid = spider::$rid; $pid = spider::$pid; $title = spider::$title; $url = spider::$url; $_rid === NULL or $rid = $_rid; $_pid === NULL or $pid = $_pid; $_title === NULL or $title = $_title; $_url === NULL or $url = $_url; } if ($pid) { $project = spider::project($pid); $prule_list_url = $project['list_url']; } $ruleA = spider::rule($rid); $rule = $ruleA['rule']; $dataArray = $rule['data']; if ($prule_list_url) { $rule['list_url'] = $prule_list_url; } if (spider::$dataTest) { echo "<b>抓取规则信息</b><pre>"; print_r(iS::escapeStr($ruleA)); print_r(iS::escapeStr($project)); echo "</pre><hr />"; } spider::$curl_proxy = $rule['proxy']; $responses = array(); $html = spiderTools::remote($url); if (empty($html)) { $msg = '错误:001..采集 ' . $url . '文件内容为空!请检查采集规则'; if (spider::$work == 'shell') { echo "{$msg}\n"; return false; } else { iPHP::alert($msg); } } // $http = spider::check_content_code($html); // // if($http['match']==false){ // return false; // } // $content = $http['content']; spider::$allHtml = ""; $rule['__url__'] = spider::$url; $responses['reurl'] = spider::$url; $responses['__title__'] = $title; foreach ((array) $dataArray as $key => $data) { $content_html = $html; $dname = $data['name']; /** * [UNSET:name] * 注销[name] * @var string */ if (strpos($dname, 'UNSET:') !== false) { $_dname = str_replace('UNSET:', '', $dname); unset($responses[$_dname]); continue; } /** * [DATA:name] * 把之前[name]处理完的数据当作原始数据 * 如果之前有数据会叠加 * 用于数据多次处理 * @var string */ if (strpos($dname, 'DATA:') !== false) { $_dname = str_replace('DATA:', '', $dname); $content_html = $responses[$_dname]; unset($responses[$dname]); } /** * [PRE:name] * 把PRE:name采集到的数据 当做原始数据 * 一般用于下载内容 * @var string */ $pre_dname = 'PRE:' . $dname; if (isset($responses[$pre_dname])) { $content_html = $responses[$pre_dname]; unset($responses[$pre_dname]); } /** * [EMPTY:name] * 如果[name]之前抓取结果数据为空使用这个数据项替换 * @var string */ if (strpos($dname, 'EMPTY:') !== false) { $_dname = str_replace('EMPTY:', '', $dname); if (empty($responses[$_dname])) { $dname = $_dname; } else { //有值不执行抓取 continue; } } $content = spiderContent::crawl($content_html, $data, $rule, $responses); unset($content_html); if (strpos($dname, 'ARRAY:') !== false) { // if(strpos($data['rule'], 'RULE@')!==false){ $dname = str_replace('ARRAY:', '', $dname); // $contentArray = $responses[$dname]; // // $contentArray = $responses[$dname]; $cArray = array(); foreach ((array) $content as $k => $value) { foreach ((array) $value as $key => $val) { $cArray[$key][$k] = $val; } } if ($cArray) { $content = $cArray; unset($cArray); } } /** * [name.xxx] * 采集内容做为数组 */ if (strpos($dname, '.') !== false) { $f_key = substr($dname, 0, stripos($dname, ".")); $s_key = substr(strrchr($dname, "."), 1); if (isset($responses[$f_key][$s_key])) { if (is_array($responses[$f_key][$s_key])) { $responses[$f_key][$s_key] = array_merge($responses[$f_key][$s_key], $content); } else { $responses[$f_key][$s_key] .= $content; } } else { $responses[$f_key][$s_key] = $content; } } else { /** * 多个name 内容合并 */ if (isset($responses[$dname])) { if (is_array($responses[$dname])) { $responses[$dname] = array_merge($responses[$dname], $content); } else { $responses[$dname] .= $content; } } else { $responses[$dname] = $content; } } /** * 对匹配多条的数据去重过滤 */ if (!is_array($responses[$dname]) && $data['multi']) { if (strpos($responses[$dname], ',') !== false) { $_dnameArray = explode(',', $responses[$dname]); $dnameArray = array(); foreach ((array) $_dnameArray as $key => $value) { $value = trim($value); $value && ($dnameArray[] = $value); } $dnameArray = array_filter($dnameArray); $dnameArray = array_unique($dnameArray); $responses[$dname] = implode(',', $dnameArray); unset($dnameArray, $_dnameArray); } } gc_collect_cycles(); } if (isset($responses['title']) && empty($responses['title'])) { $responses['title'] = $responses['__title__']; } spider::$allHtml = null; unset($html); gc_collect_cycles(); if (spider::$dataTest) { echo "<pre style='width:99%;word-wrap: break-word;'>"; print_r(iS::escapeStr($responses)); echo '<hr />'; echo '使用内存:' . iFS::sizeUnit(memory_get_usage()) . ' 执行时间:' . iPHP::timer_stop() . 's'; echo "</pre>"; } iFS::$CURLOPT_ENCODING = ''; iFS::$CURLOPT_REFERER = ''; iFS::$watermark_config['pos'] = iCMS::$config['watermark']['pos']; iFS::$watermark_config['x'] = iCMS::$config['watermark']['x']; iFS::$watermark_config['y'] = iCMS::$config['watermark']['y']; iFS::$watermark_config['img'] = iCMS::$config['watermark']['img']; $rule['fs']['encoding'] && (iFS::$CURLOPT_ENCODING = $rule['fs']['encoding']); $rule['fs']['referer'] && (iFS::$CURLOPT_REFERER = $rule['fs']['referer']); if ($rule['watermark_mode']) { iFS::$watermark_config['pos'] = $rule['watermark']['pos']; iFS::$watermark_config['x'] = $rule['watermark']['x']; iFS::$watermark_config['y'] = $rule['watermark']['y']; $rule['watermark']['img'] && (iFS::$watermark_config['img'] = $rule['watermark']['img']); } if (spider::$callback['data'] && is_callable(spider::$callback['data'])) { $responses = call_user_func_array(spider::$callback['data'], array($responses)); } return $responses; }
function do_createArticle($aid = null) { $category = $this->PG['cid']; $startime = $this->PG['startime']; $endtime = $this->PG['endtime']; $startid = $this->PG['startid']; $endid = $this->PG['endid']; $perpage = (int) $this->PG['perpage']; $offset = (int) $this->PG['offset']; $orderby = $this->PG['orderby']; $whereSQL = "WHERE `status` ='1'"; $aid === null && ($aid = $this->PG['aid']); if ($aid) { $title = self::Article($aid); iPHP::success($title . '<hr />生成静态完成!'); } $category[0] == 'all' && ($category = $this->get_category(iCMS_APP_ARTICLE)); if ($category) { $cids = implode(',', (array) $category); $whereSQL .= " AND `cid` IN({$cids})"; } $startime && ($whereSQL .= " AND `pubdate`>=UNIX_TIMESTAMP('{$startime} 00:00:00')"); $endtime && ($whereSQL .= " AND `pubdate`<=UNIX_TIMESTAMP('{$endtime} 23:59:59')"); $startid && ($whereSQL .= " AND `id`>='{$startid}'"); $endid && ($whereSQL .= " AND `id`<='{$endid}'"); $perpage or $perpage = $this->CP; $orderby or $orderby = "id DESC"; $total = iPHP::total(false, "SELECT count(*) FROM `#iCMS@__article` {$whereSQL}", "G"); $looptimes = ceil($total / $perpage); $offset = $this->page * $perpage; $rs = iDB::all("SELECT `id` FROM `#iCMS@__article` {$whereSQL} order by {$orderby} LIMIT {$offset},{$perpage}"); $_count = count($rs); $msg = "共<span class='label label-info'>{$total}</span>篇文章,将分成<span class='label label-info'>{$looptimes}</span>次完成<hr />开始执行第<span class='label label-info'>" . ($this->page + 1) . "</span>次生成,共<span class='label label-info'>{$_count}</span>篇<hr />"; for ($i = 0; $i < $_count; $i++) { self::Article($rs[$i]['id']); $msg .= '<span class="label label-success">' . $rs[$i]['id'] . ' <i class="fa fa-check"></i></span> '; } $GLOBALS['page']++; $use_time = iPHP::timer_stop(); $msg .= "<hr />用时<span class='label label-info'>{$use_time}</span>秒"; $query["total_num"] = $total; $query["alltime"] = $this->alltime + $use_time; $loopurl = $this->loopurl($looptimes, $query); if ($loopurl) { $moreBtn = array(array("id" => "btn_stop", "text" => "停止", "url" => APP_URI . "&do=article"), array("id" => "btn_next", "text" => "继续", "src" => $loopurl, "next" => true)); $dtime = 1; $all_time = $looptimes * $use_time + $looptimes + 1; $msg .= "<hr />预计全部生成还需要<span class='label label-info'>{$all_time}</span>秒"; } else { $moreBtn = array(array("id" => "btn_next", "text" => "完成", "url" => APP_URI . "&do=article")); $dtime = 5; $msg .= "<hr />已全部生成完成<hr />总共用时<span class='label label-info'>" . $query["alltime"] . "</span>秒"; } $updateMsg = $this->page ? true : false; iPHP::dialog($msg, $loopurl ? "src:" . $loopurl : '', $dtime, $moreBtn, $updateMsg); }
* @license http://www.idreamsoft.com iDreamSoft * @author coolmoo <*****@*****.**> * @$Id: footer.php 2381 2014-03-21 04:03:07Z coolmoo $ */ defined('iPHP') or exit('What are you doing?'); //var_dump(iMember::$cpower); $memory = memory_get_usage(); ?> <div class="clearfloat"></div> <div class="iCMS-container hide"> <span class="label label-success"> 使用内存:<?php echo iFS::sizeUnit($memory); ?> 执行时间:<?php echo iPHP::timer_stop(); ?> s </span> </div> </div> <a id="scrollUp" href="#top"></a> <div class="iCMS-batch"> <div id="weightBatch"> <div class="input-prepend"><span class="add-on">权重</span> <input type="text" class="span2" name="mweight"/> </div> </div> <div id="keywordBatch"> <div class="input-prepend input-append"><span class="add-on">关键字</span> <input type="text" class="span2" name="mkeyword"/>