Пример #1
0
function grabWebPage($url)
{
    println('get url: ' . $url);
    $response = readItemBackup($url);
    if ($response) {
        throw new Exception("hit backup", 1);
    }
    if (!$response) {
        $curl = MCurl::curlGetRequest($url);
        $curl->setUseProxy(true);
        $curl->setTimeout(10);
        $curl->setRetry(999);
        $curl->setRetrySleep(3);
        $response = $curl->sendRequest();
    }
    return $response;
}
Пример #2
0
<?php

# 抓1号店分类
require dirname(__FILE__) . '/script_init.php';
$dbProxy = BaseDal::getDBProxy(DBQUICKSHOP);
$sql = "SELECT *\r\n        FROM yhd_category\r\n        WHERE yhd_parent_cid=0\r\n        ORDER BY sort_order";
$ret = $dbProxy->rs2array($sql);
foreach ($ret as $category) {
    $cid = $category['yhd_cid'];
    $url = sprintf('http://www.yhd.com/header/ajaxGetGlobalLeftFloatMenuDataV10.do?categoryId=%d', $cid);
    println('sending request: ' . $url);
    $curl = MCurl::curlGetRequest($url);
    $curl->setUseProxy(true);
    $response = $curl->sendRequest();
    println('received response');
    $json = json_decode($response, true);
    $html = $json['value'];
    grepHtml($html, $cid);
    println();
}
//////////////////////////////////
function grepHtml($html, $parent_cid)
{
    $pattern = '/<dl[\\s\\S]*?<\\/dl>/';
    if (preg_match_all($pattern, $html, $reg)) {
        foreach ($reg[0] as $dl) {
            grepDl($dl, $parent_cid);
        }
    }
}
function grepDl($html, $parent_cid)