Exemplo n.º 1
0
ini_set("memory_limit", "1024M");
require dirname(__FILE__) . '/../core/init.php';
/* Do NOT delete this comment */
/* 不要删除这段注释 */
$configs = array('name' => '天行云', 'log_show' => true, 'tasknum' => 1, 'max_try' => 5, 'domains' => array('www.xyb2b.com'), 'scan_urls' => array('http://www.xyb2b.com/Products/Index'), 'list_url_regexes' => array("http://www.xyb2b.com/Products/Index/start/\\d+"), 'content_url_regexes' => array("http://www.xyb2b.com/Home/Products/detail/gid/\\d+"), 'fields' => array(array('name' => "txy_price", 'selector' => '//*[@id="goodsForm"]/div[6]/div[1]/div[2]/div[2]/div/div/ul/li/h2/text()', 'required' => true), array('name' => "article_content", 'selector' => "//h1[@id='detailName']", 'required' => true), array('name' => "article_num", 'selector' => '//*[@id="showNum"]', 'required' => true), array('name' => "article_publish_time", 'selector' => "//div[contains(@class,'author')]//h2", 'required' => true), array('name' => "url", 'selector' => "//div[contains(@class,'author')]//h2", 'required' => true)));
$spider = new phpspider($configs);
$spider->on_start = function ($phpspider) {
    // 登录请求url
    $login_url = "http://www.xyb2b.com/Home/User/doLogin";
    // 提交的参数
    $options = array("m_name" => "public", "m_password" => "123456", "savelogin" => "0", "requsetUrl" => "http://www.xyb2b.com/Products/Index/start/1");
    // 发送登录请求
    requests::post($login_url, $options);
    //$phpspider->request_url($login_url, $options);
    // 登录成功后本框架会把Cookie保存到www.waduanzi.com域名下,我们可以看看是否是已经收集到Cookie了
    $cookies = requests::get_cookies("www.xyb2b.com");
    // print_r($cookies);exit;  // 可以看到已经输出Cookie数组结构
    // 框架自动收集Cookie,访问这个域名下的URL会自动带上
    // 接下来我们来访问一个需要登录后才能看到的页面
    // $url = "http://www.xyb2b.com/Ucenter/Cart/pageList";
    // $html = $phpspider->request_url($url);
    // echo $html;     // 可以看到登录后的页面,非常棒👍
};
$spider->on_scan_page = function ($page, $content, $phpspider) {
    preg_match('#<a href="(.*)" class="change">尾页</a>#', $content, $out);
    preg_match('(\\d+)', $out[1], $out_2);
    for ($i = 0; $i <= $out_2[0]; $i++) {
        $url = "http://www.xyb2b.com/Products/Index/start/{$i}";
        $options = array('url_type' => $url, 'method' => 'get');
        $phpspider->add_url($url, $options);
    }