<?php /** * Created by IntelliJ IDEA. * User: lihuanpeng * Date: 15/8/25 * Time: 22:48 */ include './config.php'; for ($i = $start_page; $i <= $end_page; $i++) { // 初始化curl $list_handle = curl_init(); // curl配置参数 $options = [CURLOPT_URL => $list_url . $i, CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => rand_user_agent()]; curl_setopt_array($list_handle, $options); // 执行curl $pageContent = curl_exec($list_handle); if (curl_errno($list_handle)) { beep(); alert('curl出错:' . curl_error($list_handle)); alert('出错的URL是:' . $list_url . $i); } else { curl_close($list_handle); alert("第 {$i} 列表页html内容获取成功!"); } $html = str_get_html($pageContent); $thread_lists = $html->find('tr[class=tr3 t_one]'); foreach ($thread_lists as $thread_tr) { // 标题和链接地址 foreach ($thread_tr->find('td h3 a') as $url) { $title = iconv('gb2312', 'utf-8//IGNORE', $url->innertext);
$pids = array(); for ($i = 0; $i < $workers; $i++) { $pids[$i] = pcntl_fork(); // 创建子进程 switch ($pids[$i]) { case -1: alert('创建子进程失败:' . $i); exit; case 0: $key_start = $thread_quantity / $workers * $i; $key_end = $thread_quantity / $workers * ($i + 1); for ($j = $key_start; $j < $key_end; $j++) { // 初始化curl $thread_handle = curl_init(); // curl配置参数 $options = [CURLOPT_URL => $threads[$j]['url'], CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => rand_user_agent()]; curl_setopt_array($thread_handle, $options); // 执行curl $thread_content = curl_exec($thread_handle); if (curl_errno($thread_handle)) { beep(); alert('curl出错:' . curl_error($thread_handle)); alert('出错的URL是:' . $threads[$j]['url']); } else { curl_close($thread_handle); } $html = str_get_html($thread_content); // 查找正文中所有的图片url并写入数据库 if (is_object($html)) { foreach ($html->find('input') as $element) { if ($element->src != '') {