/** * 通过urls多线程下载图片 * @param array $urls * @return array */ private function MultiDownByUrls($urls) { $opts = array(CURLOPT_RETURNTRANSFER => 1, CURLOPT_AUTOREFERER => 1, CURLOPT_HEADER => 0, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'); $mutil = new MultiHttpRequest($urls, $opts); $mutil->start(); return $mutil->getRes(); }
require "libs/class_curl_multi.php"; //连接数据库 $link = mysql_connect("localhost", "root", "greenwen"); mysql_select_db("www_curlmulti", $link); //清空数据库 mysql_query("TRUNCATE TABLE content"); //域名前缀 $base = "http://sellbest.net"; //需要采集的规则列表(分页) $list = array('http://sellbest.net/by-category/page[1-2]/36-iPad-CASES.html'); //在列表页面内容链接表达式 $list_rules = '<p class="productName">.*?<a href="(.*?)">.*?</a>.*?</p>'; //内容页面信息字段表达式 $detail_rules = array('meta_title' => '<title>(.*?)</title>', 'meta_keywords' => '<meta name="keywords" content="(.*?)" />', 'meta_description' => '<meta name="description" content="(.*?)" />', 'product_name' => '<h4 class="h4-title float-l"> (.*?)</h4>', 'product_image' => '<div class="v-inner">.*?<a href="(.*?)" id="originalImg"><img src=".*?" alt=".*?" /></a>.*?</div>', 'product_price' => 'Our Price : <strong>(.*?)</strong>', 'product_description' => '<div class="description-text" id="description"><div class="border-cont">(.*?)</div>'); //实例 $mp = new MultiHttpRequest(); //调试使用记录采集条目 $j = 1; //每次并发几个链接 $limit = 10; // 分页时被跳过的页数 $last_page = 0; //开始采集 foreach ($list as $link) { //解析列表页数 preg_match_all('/\\[(.*)\\]/i', $link, $_page); if ($_page[1][0] == '') { continue; } $pages = explode('-', $_page[1][0]); if (count($pages) != 2) {