Пример #1
0
// 每个进程循环多少次
$w->on_worker_start = function ($worker) use($count) {
    //echo $worker->worker_pid . " --- " . $worker->worker_id."\n";
    $cookie = trim(file_get_contents("cookie.txt"));
    $curl = new rolling_curl();
    $curl->set_cookie($cookie);
    $curl->set_gzip(true);
    $curl->callback = function ($response, $info, $request, $error) {
        preg_match("@http://www.zhihu.com/people/(.*?)/about@i", $request['url'], $out);
        $username = $out[1];
        if (empty($response)) {
            var_dump($info);
            file_put_contents("./timeout/" . $username . "_info.json", json_encode($info) . "\n", FILE_APPEND);
            file_put_contents("./timeout/" . $username . "_error.json", json_encode($error) . "\n", FILE_APPEND);
        } else {
            $data = get_user_about($response);
            if (empty($data)) {
                file_put_contents("./timeout_data.txt", $request['url'] . "\n", FILE_APPEND);
            } else {
                preg_match("@http://www.zhihu.com/people/(.*?)/about@i", $request['url'], $out);
                file_put_contents("./html/" . $out[1] . ".json", json_encode($data));
            }
        }
    };
    for ($i = 0; $i < $count; $i++) {
        $username = get_user_queue();
        $username = addslashes($username);
        $url = "http://www.zhihu.com/people/{$username}/about";
        $curl->get($url);
        $data = $curl->execute();
    }
Пример #2
0
        $data = get_user($content);
        if (empty($data)) {
            file_put_contents("./data/error_emptydata.log", date("Y-m-d H:i:s") . ' ' . $username . " info data not exists --- \n", FILE_APPEND);
            db::update('user', $server_data, "`username`='{$username}'");
            return;
        }
        //$worker->log("采集用户信息 --- " . $username . " --- 成功\n");
        $data['last_message_week'] = empty($data['last_message_time']) ? 7 : intval(date("w", $data['last_message_time']));
        $data['last_message_hour'] = empty($data['last_message_time']) ? 24 : intval(date("H", $data['last_message_time']));
        $data = array_merge($data, $server_data);
        db::update('user', $data, "`username`='{$username}'");
        // 采集用户详细信息 =======================================================================
        $data = array();
        $url = "http://www.zhihu.com/people/{$username}/about";
        $curl->get($url);
        $content = $curl->execute();
        if (empty($content)) {
            file_put_contents("./data/error_timeout.log", date("Y-m-d H:i:s") . ' ' . $username . "\n", FILE_APPEND);
            db::update('user', $server_data, "`username`='{$username}'");
            return;
        }
        $data = get_user_about($content);
        if (empty($data)) {
            file_put_contents("./data/error_emptydata.log", date("Y-m-d H:i:s") . ' ' . $username . " about data not exists --- \n", FILE_APPEND);
            db::update('user', $server_data, "`username`='{$username}'");
            return;
        }
        $data = array_merge($data, $server_data);
        db::update('user', $data, "`username`='{$username}'");
    }
};