Ejemplo n.º 1
0
<?php

/**
 * Created by IntelliJ IDEA.
 * User: lihuanpeng
 * Date: 15/8/25
 * Time: 22:48
 */
include './config.php';
for ($i = $start_page; $i <= $end_page; $i++) {
    // 初始化curl
    $list_handle = curl_init();
    // curl配置参数
    $options = [CURLOPT_URL => $list_url . $i, CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => rand_user_agent()];
    curl_setopt_array($list_handle, $options);
    // 执行curl
    $pageContent = curl_exec($list_handle);
    if (curl_errno($list_handle)) {
        beep();
        alert('curl出错:' . curl_error($list_handle));
        alert('出错的URL是:' . $list_url . $i);
    } else {
        curl_close($list_handle);
        alert("第 {$i} 列表页html内容获取成功!");
    }
    $html = str_get_html($pageContent);
    $thread_lists = $html->find('tr[class=tr3 t_one]');
    foreach ($thread_lists as $thread_tr) {
        // 标题和链接地址
        foreach ($thread_tr->find('td h3 a') as $url) {
            $title = iconv('gb2312', 'utf-8//IGNORE', $url->innertext);
Ejemplo n.º 2
0
$pids = array();
for ($i = 0; $i < $workers; $i++) {
    $pids[$i] = pcntl_fork();
    // 创建子进程
    switch ($pids[$i]) {
        case -1:
            alert('创建子进程失败:' . $i);
            exit;
        case 0:
            $key_start = $thread_quantity / $workers * $i;
            $key_end = $thread_quantity / $workers * ($i + 1);
            for ($j = $key_start; $j < $key_end; $j++) {
                // 初始化curl
                $thread_handle = curl_init();
                // curl配置参数
                $options = [CURLOPT_URL => $threads[$j]['url'], CURLOPT_RETURNTRANSFER => true, CURLOPT_USERAGENT => rand_user_agent()];
                curl_setopt_array($thread_handle, $options);
                // 执行curl
                $thread_content = curl_exec($thread_handle);
                if (curl_errno($thread_handle)) {
                    beep();
                    alert('curl出错:' . curl_error($thread_handle));
                    alert('出错的URL是:' . $threads[$j]['url']);
                } else {
                    curl_close($thread_handle);
                }
                $html = str_get_html($thread_content);
                // 查找正文中所有的图片url并写入数据库
                if (is_object($html)) {
                    foreach ($html->find('input') as $element) {
                        if ($element->src != '') {