function grabWebPage($url) { println('get url: ' . $url); $response = readItemBackup($url); if ($response) { throw new Exception("hit backup", 1); } if (!$response) { $curl = MCurl::curlGetRequest($url); $curl->setUseProxy(true); $curl->setTimeout(10); $curl->setRetry(999); $curl->setRetrySleep(3); $response = $curl->sendRequest(); } return $response; }
<?php # 抓1号店分类 require dirname(__FILE__) . '/script_init.php'; $dbProxy = BaseDal::getDBProxy(DBQUICKSHOP); $sql = "SELECT *\r\n FROM yhd_category\r\n WHERE yhd_parent_cid=0\r\n ORDER BY sort_order"; $ret = $dbProxy->rs2array($sql); foreach ($ret as $category) { $cid = $category['yhd_cid']; $url = sprintf('http://www.yhd.com/header/ajaxGetGlobalLeftFloatMenuDataV10.do?categoryId=%d', $cid); println('sending request: ' . $url); $curl = MCurl::curlGetRequest($url); $curl->setUseProxy(true); $response = $curl->sendRequest(); println('received response'); $json = json_decode($response, true); $html = $json['value']; grepHtml($html, $cid); println(); } ////////////////////////////////// function grepHtml($html, $parent_cid) { $pattern = '/<dl[\\s\\S]*?<\\/dl>/'; if (preg_match_all($pattern, $html, $reg)) { foreach ($reg[0] as $dl) { grepDl($dl, $parent_cid); } } } function grepDl($html, $parent_cid)