// 默认超时 set_time_limit(0); // 定义应用目录 define('APP', dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR); // 载入框架引导文件 require APP . 'system/_shell.php'; require APP . 'funcs/spider.fn.php'; require APP . 'funcs/ecshop.fn.php'; require APP . 'models/ContentModel.php'; require APP . 'et/phpQuery/phpQuery.php'; echo "开始获取队列\n"; $table_name = 'spider_ecshop_url'; $contentModel = new ContentModel(); $configs = array('need_push' => 'yes'); $url_list = $contentModel->getUrlList($configs, 'LIMIT 15000 ', $table_name); $url_count = count($url_list); echo "获取到{$url_count}条要采集的内容... \n"; if (!empty($url_list)) { // $url_info = get_line(prepare('select * from task_list where id=?i limit 1', array($ko))); foreach ($url_list as $v) { if ($v['url']) { /** * 获取单个产品内容 */ phpQuery::newDocumentFile($v['url']); $goods_id = intval(pq('input[name="id"]')->attr('value')); // 说明源id大于47900是无水印的 http://www.tomdurrie.com/search.php?page=380 前判读吧.. // 63767 后面开始进行第二次采集 if ($goods_id > 63767) { // 删除旧产品数据和相册数据
<?php // 默认超时 set_time_limit(0); // 定义应用目录 define('APP', dirname(dirname(__FILE__)) . DIRECTORY_SEPARATOR); // 载入框架引导文件 require APP . 'system/_shell.php'; require APP . 'funcs/spider.fn.php'; require APP . 'models/ContentModel.php'; system('echo -e "\\033[32m开始获取队列... \\033[0m"'); $contentModel = new ContentModel(); $configs = array('need_push' => 'yes'); $url_list = $contentModel->getUrlList($configs, 'LIMIT 50 '); $url_count = count($url_list); system('echo -e "\\033[32m 获取到' . $url_count . '条要采集的内容... \\033[0m"'); if (!empty($url_list)) { foreach ($url_list as $v) { $tmp_url_data[$v['task_list_id']][] = $v; } foreach ($tmp_url_data as $ko => $vo) { $url_info = get_line(prepare('select * from task_list where id=?i limit 1', array($ko))); $content_rules = $url_info['content_rules']; $content_rules = json_decode($content_rules, true); $chatset = $content_rules['charset']; if (!empty($content_rules['type'])) { foreach ($vo as $va) { system("echo -e '获取内容链接: \\033[32m" . $va['url'] . "\\033[0m'"); $html = ''; $html = http_client_request($va['url']); if ($chatset != 'utf-8') {