Exemple #1
0
     echo "the page don't get data\n";
     continue;
 }
 preg_match_all('/<div class="sgoNext">\\s?<a href="(.*?)".*?>/', $str, $next_url);
 //获取页码html
 if (!empty($next_url[1][0])) {
     $url = $baseurl . $next_url[1][0];
     echo "next page url:" . $url . "\n";
 } else {
     echo "don't get next page url\n";
     unset($url);
 }
 //进入每一页读取商品详细信息
 foreach ($goods_lists as $good) {
     echo "good page url:" . $good['url'] . "\n";
     $str = HttpGet($good['url']);
     $str = preg_replace("/\\s+/", " ", $str);
     //过滤多余回车
     $str = preg_replace("/<[ ]+/si", "<", $str);
     //过滤“<”后面的空格
     $str = preg_replace("/<\\!--.*?-->/si", "", $str);
     //过滤注释
     //开始匹配所需数据
     $match_arr = array();
     //存储正则匹配的数据
     preg_match('/<i class="gray09 fArial ml15">(.*?)<\\/i>/', $str, $match_arr['skuId']);
     //商品编号
     if (empty($match_arr['skuId'][1])) {
         die("don't get 'skuId' data\n");
     }
     preg_match('/<span id="goods_name_baike">(.*?)<\\/span>/', $str, $match_arr['name']);
Exemple #2
0
<?php

$page = 1;
//当前页码;
$totalPage = 0;
//总页码;
do {
    $s = ($page - 1) * 95;
    $url = "http://list.taobao.com/itemlist/market/nongye1.htm?_input_charset=utf-8&json=on&s=" . $s . "&atype=b&cat=50107919&style=list&at=4673%2C11138&as=0&viewIndex=1&spm=a2106.2206569.0.0.klNaSh&same_info=1&isnew=2&pSize=95&_ksTS=1405255051526_27";
    //获取json数据
    $json = HttpGet($url);
    //处理编码问题
    $utf8_json = characet($json);
    //json解析为数组
    $arr = json_decode($utf8_json, true);
    //获取页码
    $page = $arr['page']['currentPage'];
    //获取总页码
    $totalPage = $arr['page']['totalPage'];
    //获取商品列表
    $lists = $arr['itemList'];
    //计算总共商品数
    $count = count($lists);
    //向终端输出信息
    echo "获取....第" . $page . "页,共" . $count . "条信息\n";
    //开始插入数据库
    $insert_num = insert_db($lists);
    //向终端输出信息
    echo "\n";
    echo "成功" . $insert_num . "条 失败" . ($count - $insert_num) . "条\n\n";
    $page++;
Exemple #3
0
    // var_dump($v);
    preg_match_all('/<a.*?href=\\"(.*?)\\".*?>(.*?)<\\/a>/i', $v, $temp_1);
    for ($i = 0; $i < count($temp_1[1]); $i++) {
        $category_2[$k - 1][] = array('name' => $temp_1[2][$i], 'link' => $temp_1[1][$i]);
    }
}
// var_dump($category_2);
// var_dump($category_1);die;
foreach ($category_1 as $k => $v) {
    $category[$k] = array('fname' => $v[2][0], 'flink' => $v[1][0], 'children' => $category_2[$k]);
}
// var_dump($category);die;
foreach ($category as $k => $v) {
    for ($i = 0; $i < count($v['children']); $i++) {
        $url = $v['children'][$i]['link'];
        $str = HttpGet($url);
        // print_r($url);
        $str = preg_replace("/\\s+/", " ", $str);
        //过滤多余回车
        $str = preg_replace("/<[ ]+/si", "<", $str);
        //过滤<__("<"号后面带空格)
        $str = preg_replace("/<\\!--.*?-->/si", "", $str);
        //注释
        preg_match_all('/<span class=\\"c-price\\">(\\d+.\\d+)/', $str, $price);
        //获取价格
        preg_match_all('/<a class="item-name".*?>(.*?)<\\/a>/', $str, $name);
        //名称
        preg_match_all('/<span class="sale-num">(\\d+)<\\/span>/', $str, $sale_num);
        //销量
        preg_match_all('/<a\\sclass="item-name".*?href=\\"http:\\/\\/detail.tmall.com\\/item.htm\\?id=(\\d+)&/', $str, $skuId);
        //商品ID