/** * [getUserList 返回用户列表] * @param [type] $result [description] * @param [type] $u_id [description] * @param string $user_type [description] * @param integer $count [description] * @return [type] [description] */ function getUserList($u_id, $user_type = 'followees', $count, $op_type) { $following_users = array(); $more_user_list = array(); $tmp_following_users = array(); $result = Curl::request('GET', 'http://www.zhihu.com/people/' . $u_id . '/' . $user_type); preg_match('#<a class="name" href="/people/(.*?)">(.*?)</a>#', $result, $u_out); $u_name = empty($u_out[2]) ? '' : $u_out[2]; if ($count <= 20) { $following_users = getOnePageUserList($result, $u_id, $user_type, $count, $u_name, $op_type); } else { preg_match('#<input type="hidden" name="_xsrf" value="(.*?)"/>#', $result, $out); $_xsrf = empty($out[1]) ? '' : trim($out[1]); preg_match('#<div class="zh-general-list clearfix" data-init="(.*?)">#', $result, $out); $url_params = empty($out[1]) ? '' : json_decode(html_entity_decode($out[1]), true); echo "--------start requesting {$u_id} more {$count} user--------\n"; if (!empty($_xsrf) && !empty($url_params) && is_array($url_params)) { $params = $url_params['params']; $total_page = ceil($count / 20); for ($page = 1; $page <= $total_page; ++$page) { $params['offset'] = ($page - 1) * 20; $post_fields = array('method' => 'next', 'params' => json_encode($params), '_xsrf' => $_xsrf); $more_user = Curl::request('POST', 'http://www.zhihu.com/node/' . $url_params['nodename'], $post_fields); $more_user_result = json_decode($more_user, true); if (empty($more_user_result['msg']) || !is_array($more_user_result['msg'])) { echo "--------get {$u_id} {$user_type} page {$page} failed--------\n"; continue; } $more_user_tmp_list = $more_user_result['msg']; $result = dealUserInfo($more_user_tmp_list, $u_id, $user_type, $u_name); if (empty($result)) { echo "--------empty more user {$url_params['nodename']} with u_id {$u_id}--------\n"; continue; } $more_user_list = array_merge($more_user_list, $result[0]); $tmp_following_users = array_merge($tmp_following_users, $result[1]); //每获取到200条插入一次 if ($page % 10 == 0) { if (!empty($more_user_list)) { $tmp_count = count($more_user_list); echo "--------start adding more new {$tmp_count} user with u_id {$u_id}--------\n"; User::addMulti($more_user_list); echo "--------add more new {$tmp_count} user done with u_id {$u_id}--------\n"; } if (!empty($tmp_following_users) && $op_type == 2) { echo "--------start adding " . count($tmp_following_users) . " {$user_type} user with u_id {$u_id}--------\n"; User::addFollowList($tmp_following_users); echo "--------add " . count($tmp_following_users) . " {$user_type} user done with u_id {$u_id}--------\n"; } $more_user_list = array(); $tmp_following_users = array(); } $following_users = array_merge($following_users, $result[1]); } if (!empty($more_user_list)) { echo "--------start adding rest " . count($more_user_list) . " user with u_id {$u_id}--------\n"; $last_id = User::addMulti($more_user_list); echo "--------add rest" . count($more_user_list) . " user done with u_id {$u_id} and last_id {$last_id}--------\n"; } if (!empty($tmp_following_users) && $op_type == 2) { echo "--------start adding rest " . count($tmp_following_users) . " {$u_id}'s {$user_type} user--------\n"; User::addFollowList($tmp_following_users); echo "--------add " . count($tmp_following_users) . " {$u_id}'s {$user_type} user done--------\n"; } echo "--------request more {$count} user done with u_id {$u_id}--------\n"; } else { return array(); } } return $following_users; }