// 默认超时
set_time_limit(0);
// 定义应用目录
define('APP', dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR);
// 载入框架引导文件
require APP . 'system/_shell.php';
require APP . 'funcs/spider.fn.php';
require APP . 'funcs/ecshop.fn.php';
require APP . 'models/ContentModel.php';
require APP . 'et/phpQuery/phpQuery.php';
echo "开始获取队列\n";
$table_name = 'spider_ecshop_url';
$contentModel = new ContentModel();
$configs = array('need_push' => 'yes');
$url_list = $contentModel->getUrlList($configs, 'LIMIT 15000 ', $table_name);
$url_count = count($url_list);
echo "获取到{$url_count}条要采集的内容... \n";
if (!empty($url_list)) {
    // $url_info = get_line(prepare('select * from task_list where id=?i limit 1', array($ko)));
    foreach ($url_list as $v) {
        if ($v['url']) {
            /**
             * 获取单个产品内容
             */
            phpQuery::newDocumentFile($v['url']);
            $goods_id = intval(pq('input[name="id"]')->attr('value'));
            // 说明源id大于47900是无水印的 http://www.tomdurrie.com/search.php?page=380 前判读吧..
            // 63767 后面开始进行第二次采集
            if ($goods_id > 63767) {
                // 删除旧产品数据和相册数据
示例#2
0
<?php

// 默认超时
set_time_limit(0);
// 定义应用目录
define('APP', dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR);
// 载入框架引导文件
require APP . 'system/_shell.php';
require APP . 'funcs/spider.fn.php';
require APP . 'models/ContentModel.php';
system('echo -e "\\033[32m开始获取队列... \\033[0m"');
$contentModel = new ContentModel();
$configs = array('need_push' => 'yes');
$url_list = $contentModel->getUrlList($configs, 'LIMIT 50 ');
$url_count = count($url_list);
system('echo -e "\\033[32m 获取到' . $url_count . '条要采集的内容... \\033[0m"');
if (!empty($url_list)) {
    foreach ($url_list as $v) {
        $tmp_url_data[$v['task_list_id']][] = $v;
    }
    foreach ($tmp_url_data as $ko => $vo) {
        $url_info = get_line(prepare('select * from task_list where id=?i limit 1', array($ko)));
        $content_rules = $url_info['content_rules'];
        $content_rules = json_decode($content_rules, true);
        $chatset = $content_rules['charset'];
        if (!empty($content_rules['type'])) {
            foreach ($vo as $va) {
                system("echo -e '获取内容链接: \\033[32m" . $va['url'] . "\\033[0m'");
                $html = '';
                $html = http_client_request($va['url']);
                if ($chatset != 'utf-8') {