Example #1
0
/**
 * 获取知乎关联用户 和 用户信息
 *
 * Licensed under The MIT License
 * For full copyright and license information, please see the MIT-LICENSE.txt
 * Redistributions of files must retain the above copyright notice.
 *
 * @author seatle<*****@*****.**>
 * @copyright seatle<*****@*****.**>
 * @link http://www.epooll.com/
 * @license http://www.opensource.org/licenses/mit-license.php MIT License
 */
function set_cookie()
{
    //$cookie_jar = dirname(__FILE__)."/zhihu.cookie";
    //cls_curl::set_cookie_file($cookie_jar);
    $cookie = '_za=36643642-e546-4d60-a771-8af8dcfbd001; q_c1=a57a2b9f10964f909b8d8969febf3ab2|1437705596000|1437705596000; _xsrf=f0304fba4e44e1d008ec308d59bab029; cap_id="YWY1YmRmODlmZGVmNDc3MWJlZGFkZDg3M2E0M2Q5YjM=|1437705596|963518c454bb6f10d96775021c098c84e1e46f5a"; z_c0="QUFCQVgtRWZBQUFYQUFBQVlRSlZUVjR6NEZVUTgtRkdjTVc5UDMwZXRJZFdWZ2JaOWctNVhnPT0=|1438164574|aed6ef3707f246a7b64da4f1e8c089395d77ff2b"; __utma=51854390.1105113342.1437990174.1438160686.1438164116.10; __utmc=51854390; __utmz=51854390.1438134939.8.5.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/people/yangzetao; __utmv=51854390.100-1|2=registration_date=20131030=1^3=entry_date=20131030=1';
    cls_curl::set_cookie($cookie);
}
Example #2
0
/**
 * 获取知乎关联用户 和 用户信息
 *
 * Licensed under The MIT License
 * For full copyright and license information, please see the MIT-LICENSE.txt
 * Redistributions of files must retain the above copyright notice.
 *
 * @author seatle<*****@*****.**>
 * @copyright seatle<*****@*****.**>
 * @link http://www.epooll.com/
 * @license http://www.opensource.org/licenses/mit-license.php MIT License
 */
function set_cookie()
{
    //$cookie_jar = dirname(__FILE__)."/zhihu.cookie";
    //cls_curl::set_cookie_file($cookie_jar);
    $cookie = '';
    // 自己去知乎上面取
    cls_curl::set_cookie($cookie);
}
Example #3
0
/**
 * 获取用户
 * 
 * @param string $username
 * @param string $user_type followees 、followers
 * @return void
 * @author seatle <*****@*****.**> 
 * @created time :2015-07-28 09:46
 */
function get_user_index($username, $user_type = 'followees', $worker)
{
    $url = "http://www.zhihu.com/people/{$username}/{$user_type}";
    // by suhy
    $cookie = trim(file_get_contents("cookie.txt"));
    cls_curl::set_cookie($cookie);
    cls_curl::set_gzip(true);
    $content = cls_curl::get($url);
    if (empty($content)) {
        return array();
    }
    $users = array();
    // 用户不足20个的时候,从ajax取不到用户,所以首页这里还是要取一下
    preg_match_all('#<h2 class="zm-list-content-title"><a data-tip=".*?" href="http://www.zhihu.com/people/(.*?)" class="zg-link" title=".*?">(.*?)</a></h2>#', $content, $out);
    $count = count($out[1]);
    for ($i = 0; $i < $count; $i++) {
        $d_username = empty($out[1][$i]) ? '' : $out[1][$i];
        $d_nickname = empty($out[2][$i]) ? '' : $out[2][$i];
        if (!empty($d_username) && !empty($d_nickname)) {
            $users[$d_username] = array('username' => $d_username, 'nickname' => $d_nickname);
        }
    }
    $keyword = $user_type == 'followees' ? '关注了' : '关注者';
    $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 主页 --- 成功\n");
    preg_match('#<span class="zg-gray-normal">' . $keyword . '</span><br />\\s<strong>(.*?)</strong><label> 人</label>#', $content, $out);
    $user_count = empty($out[1]) ? 0 : intval($out[1]);
    preg_match('#<input type="hidden" name="_xsrf" value="(.*?)"/>#', $content, $out);
    $_xsrf = empty($out[1]) ? '' : trim($out[1]);
    preg_match('#<div class="zh-general-list clearfix" data-init="(.*?)">#', $content, $out);
    $url_params = empty($out[1]) ? '' : json_decode(html_entity_decode($out[1]), true);
    if (!empty($_xsrf) && !empty($url_params) && is_array($url_params)) {
        $url = "http://www.zhihu.com/node/" . $url_params['nodename'];
        $params = $url_params['params'];
        $j = 1;
        for ($i = 0; $i < $user_count; $i = $i + 20) {
            $params['offset'] = $i;
            $post_data = array('method' => 'next', 'params' => json_encode($params), '_xsrf' => $_xsrf);
            $content = cls_curl::post($url, $post_data);
            if (empty($content)) {
                $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 第{$j}页 --- 失败\n");
                continue;
            }
            $rows = json_decode($content, true);
            if (empty($rows['msg']) || !is_array($rows['msg'])) {
                $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 第{$j}页 --- 失败\n");
                continue;
            }
            $worker->log("采集用户 --- " . $username . " --- {$keyword} --- 第{$j}页 --- 成功\n");
            foreach ($rows['msg'] as $row) {
                preg_match_all('#<h2 class="zm-list-content-title"><a data-tip=".*?" href="http://www.zhihu.com/people/(.*?)" class="zg-link" title=".*?">(.*?)</a></h2>#', $row, $out);
                $d_username = empty($out[1][0]) ? '' : $out[1][0];
                $d_nickname = empty($out[2][0]) ? '' : $out[2][0];
                if (!empty($d_username) && !empty($d_nickname)) {
                    $users[$d_username] = array('username' => $d_username, 'nickname' => $d_nickname);
                }
            }
            $j++;
        }
    }
    return $users;
}