Example #1
0
<?php

require_once '/home/liubin/Downloads/myspider2/lib/CG_Fetch_Html.php';
/**
 * 回调函数
 *
 * @param string 抓取的html内容
 */
function request_callback($response)
{
    $filename = uniqid() . '.html';
    $rst = file_put_contents($filename, $response);
    return $rst > 0 ? TRUE : FALSE;
}
$callback = 'request_callback';
$cookie_path = '/home/liubin/Downloads/myspider2/config/cookie.txt';
$cookie = file_get_contents($cookie_path);
$useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36';
$urls = array('http://www.zhihu.com/people/lin-taro/followees', 'http://www.zhihu.com/people/lin-taro/followers');
$fetch = new CG_Fetch_Html($urls, 'request_callback');
$fetch->cookie = $cookie;
$fetch->useragent = $useragent;
$fetch->fetch_html();
Example #2
0
 /**
  * 关注者,关注了用户名保存用户名到redis中,下一步用来抓取信息
  *
  * @return mixed
  */
 private function _fetch_follow()
 {
     $count = 20;
     $usernames = $this->_get_sql_users($count);
     if (empty($usernames)) {
         file_put_contents('/home/liubin/Downloads/myspider2/log/' . date('Y-m-d H:i:s') . '.log', '数据库中没有可用用户', FILE_APPEND);
         return FALSE;
     }
     $followees_urls = array();
     $followers_urls = array();
     foreach ($usernames as $username) {
         $followees_urls[] = "http://www.zhihu.com/people/{$username}/followees";
         $followers_urls[] = "http://www.zhihu.com/people/{$username}/followers";
     }
     $cookie_path = '/home/liubin/Downloads/myspider2/config/cookie.txt';
     $cookie = file_get_contents($cookie_path);
     $useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36';
     //抓取followees
     $fetch_followees = new CG_Fetch_Html($followees_urls, "followees_callback");
     $fetch_followees->cookie = $cookie;
     $fetch_followees->useragent = $useragent;
     $fetch_followees->fetch_html();
     //抓取followers
     $fetch_followers = new CG_Fetch_Html($followers_urls, 'followers_callback');
     $fetch_followers->cookie = $cookie;
     $fetch_followers->useragent = $useragent;
     $fetch_followers->fetch_html();
 }