Example #1
0
function grabWebPage($url)
{
    println('get url: ' . $url);
    $response = readItemBackup($url);
    if ($response) {
        throw new Exception("hit backup", 1);
    }
    if (!$response) {
        $curl = MCurl::curlGetRequest($url);
        $curl->setUseProxy(true);
        $curl->setTimeout(10);
        $curl->setRetry(999);
        $curl->setRetrySleep(3);
        $response = $curl->sendRequest();
    }
    return $response;
}
Example #2
0
<?php

# 抓1号店分类
require dirname(__FILE__) . '/script_init.php';
$dbProxy = BaseDal::getDBProxy(DBQUICKSHOP);
$sql = "SELECT *\r\n        FROM yhd_category\r\n        WHERE yhd_parent_cid=0\r\n        ORDER BY sort_order";
$ret = $dbProxy->rs2array($sql);
foreach ($ret as $category) {
    $cid = $category['yhd_cid'];
    $url = sprintf('http://www.yhd.com/header/ajaxGetGlobalLeftFloatMenuDataV10.do?categoryId=%d', $cid);
    println('sending request: ' . $url);
    $curl = MCurl::curlGetRequest($url);
    $curl->setUseProxy(true);
    $response = $curl->sendRequest();
    println('received response');
    $json = json_decode($response, true);
    $html = $json['value'];
    grepHtml($html, $cid);
    println();
}
//////////////////////////////////
function grepHtml($html, $parent_cid)
{
    $pattern = '/<dl[\\s\\S]*?<\\/dl>/';
    if (preg_match_all($pattern, $html, $reg)) {
        foreach ($reg[0] as $dl) {
            grepDl($dl, $parent_cid);
        }
    }
}
function grepDl($html, $parent_cid)
Example #3
0
function multiget($inurls, &$res, $config, $transit, $roster, $lang, $multi = 0)
{
    global $db, $cache;
    $timeout = 10;
    $tcurl = $config['pars'];
    $num = $config['multiget'];
    $urlss = array_chunk($inurls, $num, TRUE);
    foreach ($urlss as $id => $urls) {
        if ($tcurl == 'curl') {
            $curl = new CURL();
            $curl->retry = 2;
            $opts = array(CURLOPT_RETURNTRANSFER => true, CURLOPT_CONNECTTIMEOUT => $timeout);
            foreach ($urls as $key => $link) {
                $curl->addSession($link, $key, $opts);
            }
            $result = $curl->exec();
            $curl->clear();
        } elseif ($tcurl == 'mcurl') {
            $curl = new MCurl();
            $curl->threads = 100;
            $curl->timeout = 15;
            $curl->sec_multiget($urls, $result);
        } else {
            foreach ($urls as $id => $link) {
                $ch[$id] = curl_init();
                curl_setopt($ch[$id], CURLOPT_URL, $link);
                curl_setopt($ch[$id], CURLOPT_RETURNTRANSFER, 1);
                curl_setopt($ch[$id], CURLOPT_FAILONERROR, true);
                curl_setopt($ch[$id], CURLOPT_CONNECTTIMEOUT, $timeout);
                curl_setopt($ch[$id], CURLOPT_HTTPHEADER, array("X-Requested-With: XMLHttpRequest", "Accept: text/html, */*", "User-Agent: Mozilla/3.0 (compatible; easyhttp)", "Connection: Keep-Alive"));
            }
            $mh = curl_multi_init();
            foreach ($ch as $id => $h) {
                curl_multi_add_handle($mh, $h);
            }
            $running = null;
            do {
                curl_multi_exec($mh, $running);
            } while ($running > 0);
            foreach ($ch as $id => $h) {
                $result[$id] = curl_multi_getcontent($h);
            }
            foreach ($ch as $id => $h) {
                curl_multi_remove_handle($mh, $h);
            }
            curl_multi_close($mh);
            unset($ch);
        }
        if ($multi != 0) {
            foreach ($result as $name => $val) {
                $res[$name] = $val;
            }
        } else {
            foreach ($result as $name => $val) {
                $json = json_decode($val, TRUE);
                if ($json['status'] == 'ok' && $json['status_code'] == 'NO_ERROR') {
                    $transit = insert_stat($json, $roster[$name], $config, $transit);
                    $res[$name] = pars_data2($json, $name, $config, $lang, $roster[$name]);
                    $cache->set($name, $res[$name], ROOT_DIR . '/cache/players/');
                }
            }
        }
        unset($result, $json);
    }
}
Example #4
0
function get_url($link, $config)
{
    $url[0] = $link;
    if ($config['pars'] == 'curl') {
        $curl = new CURL();
        $curl->retry = 4;
        $opts = array(CURLOPT_RETURNTRANSFER => true);
        $curl->addSession($url[0], 0, $opts);
        $result = $curl->exec();
        $curl->clear();
    } elseif ($config['pars'] == 'mcurl') {
        $curl = new MCurl();
        $curl->threads = 100;
        $curl->timeout = 15;
        $curl->sec_multiget($url, $result);
    } else {
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $link);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_FAILONERROR, true);
        curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 2);
        curl_setopt($ch, CURLOPT_HTTPHEADER, array('Accept: application/json', 'X-Requested-With: XMLHttpRequest', 'Connection: Keep-Alive'));
        $data = curl_exec($ch);
        if ($data === false) {
            $err = curl_errno($ch);
            $errmsg = curl_error($ch);
            $result[0] = '';
        } else {
            $result[0] = $data;
        }
        curl_close($ch);
    }
    return $result[0];
}
Example #5
0
function multiget($urls, &$result, $tcurl = 'curl')
{
    if ($tcurl == 'curl') {
        $curl = new CURL();
        $opts = array(CURLOPT_RETURNTRANSFER => true);
        foreach ($urls as $key => $link) {
            $curl->addSession($link, $key, $opts);
        }
        $result = $curl->exec();
        $curl->clear();
    } else {
        $curl = new MCurl();
        $curl->threads = 100;
        $curl->timeout = 15;
        unset($results);
        $curl->sec_multiget($urls, $result);
    }
}