<?php require_once '/home/liubin/Downloads/myspider2/lib/CG_Fetch_Html.php'; /** * 回调函数 * * @param string 抓取的html内容 */ function request_callback($response) { $filename = uniqid() . '.html'; $rst = file_put_contents($filename, $response); return $rst > 0 ? TRUE : FALSE; } $callback = 'request_callback'; $cookie_path = '/home/liubin/Downloads/myspider2/config/cookie.txt'; $cookie = file_get_contents($cookie_path); $useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36'; $urls = array('http://www.zhihu.com/people/lin-taro/followees', 'http://www.zhihu.com/people/lin-taro/followers'); $fetch = new CG_Fetch_Html($urls, 'request_callback'); $fetch->cookie = $cookie; $fetch->useragent = $useragent; $fetch->fetch_html();
/** * 关注者,关注了用户名保存用户名到redis中,下一步用来抓取信息 * * @return mixed */ private function _fetch_follow() { $count = 20; $usernames = $this->_get_sql_users($count); if (empty($usernames)) { file_put_contents('/home/liubin/Downloads/myspider2/log/' . date('Y-m-d H:i:s') . '.log', '数据库中没有可用用户', FILE_APPEND); return FALSE; } $followees_urls = array(); $followers_urls = array(); foreach ($usernames as $username) { $followees_urls[] = "http://www.zhihu.com/people/{$username}/followees"; $followers_urls[] = "http://www.zhihu.com/people/{$username}/followers"; } $cookie_path = '/home/liubin/Downloads/myspider2/config/cookie.txt'; $cookie = file_get_contents($cookie_path); $useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36'; //抓取followees $fetch_followees = new CG_Fetch_Html($followees_urls, "followees_callback"); $fetch_followees->cookie = $cookie; $fetch_followees->useragent = $useragent; $fetch_followees->fetch_html(); //抓取followers $fetch_followers = new CG_Fetch_Html($followers_urls, 'followers_callback'); $fetch_followers->cookie = $cookie; $fetch_followers->useragent = $useragent; $fetch_followers->fetch_html(); }