//echo $worker->worker_pid . " --- " . $worker->worker_id."\n"; $cookie = trim(file_get_contents("cookie.txt")); $curl = new rolling_curl(); $curl->set_cookie($cookie); $curl->set_gzip(true); $curl->callback = function ($response, $info, $request, $error) { preg_match("@http://www.zhihu.com/people/(.*?)/about@i", $request['url'], $out); $username = $out[1]; if (empty($response)) { var_dump($info); file_put_contents("./timeout/" . $username . "_info.json", json_encode($info) . "\n", FILE_APPEND); file_put_contents("./timeout/" . $username . "_error.json", json_encode($error) . "\n", FILE_APPEND); } else { $data = get_user_about($response); if (empty($data)) { file_put_contents("./timeout_data.txt", $request['url'] . "\n", FILE_APPEND); } else { preg_match("@http://www.zhihu.com/people/(.*?)/about@i", $request['url'], $out); file_put_contents("./html/" . $out[1] . ".json", json_encode($data)); } } }; for ($i = 0; $i < $count; $i++) { $username = get_user_queue(); $username = addslashes($username); $url = "http://www.zhihu.com/people/{$username}/about"; $curl->get($url); $data = $curl->execute(); } }; $w->run();
// 更新采集时间, 让队列每次都取到不同的用户,形成采集死循环 $server_data['info_uptime'] = time(); $server_data['info_progress_id'] = posix_getpid(); $server_data['info_server_id'] = 2; for ($i = 0; $i < $count; $i++) { $username = get_user_queue('info'); if (empty($username)) { return; } $username = addslashes($username); $worker->log("采集用户信息 --- " . $username . " --- 开始\n"); // 采集用户最后发信息时间和内容 =========================================================== $data = array(); $url = "http://www.zhihu.com/people/{$username}/"; $curl->get($url); $content = $curl->execute(); if (empty($content)) { file_put_contents("./data/error_timeout.log", date("Y-m-d H:i:s") . ' ' . $username . "\n", FILE_APPEND); db::update('user', $server_data, "`username`='{$username}'"); return; } $data = get_user($content); if (empty($data)) { file_put_contents("./data/error_emptydata.log", date("Y-m-d H:i:s") . ' ' . $username . " info data not exists --- \n", FILE_APPEND); db::update('user', $server_data, "`username`='{$username}'"); return; } //$worker->log("采集用户信息 --- " . $username . " --- 成功\n"); $data['last_message_week'] = empty($data['last_message_time']) ? 7 : intval(date("w", $data['last_message_time'])); $data['last_message_hour'] = empty($data['last_message_time']) ? 24 : intval(date("H", $data['last_message_time'])); $data = array_merge($data, $server_data);