Example #1
0
 /**
  * 通过urls多线程下载图片
  * @param array $urls
  * @return array
  */
 private function MultiDownByUrls($urls)
 {
     $opts = array(CURLOPT_RETURNTRANSFER => 1, CURLOPT_AUTOREFERER => 1, CURLOPT_HEADER => 0, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36');
     $mutil = new MultiHttpRequest($urls, $opts);
     $mutil->start();
     return $mutil->getRes();
 }
Example #2
0
require "libs/class_curl_multi.php";
//连接数据库
$link = mysql_connect("localhost", "root", "greenwen");
mysql_select_db("www_curlmulti", $link);
//清空数据库
mysql_query("TRUNCATE TABLE content");
//域名前缀
$base = "http://sellbest.net";
//需要采集的规则列表(分页)
$list = array('http://sellbest.net/by-category/page[1-2]/36-iPad-CASES.html');
//在列表页面内容链接表达式
$list_rules = '<p class="productName">.*?<a href="(.*?)">.*?</a>.*?</p>';
//内容页面信息字段表达式
$detail_rules = array('meta_title' => '<title>(.*?)</title>', 'meta_keywords' => '<meta name="keywords" content="(.*?)" />', 'meta_description' => '<meta name="description" content="(.*?)" />', 'product_name' => '<h4 class="h4-title float-l"> (.*?)</h4>', 'product_image' => '<div class="v-inner">.*?<a href="(.*?)" id="originalImg"><img src=".*?" alt=".*?" /></a>.*?</div>', 'product_price' => 'Our Price : <strong>(.*?)</strong>', 'product_description' => '<div class="description-text" id="description"><div class="border-cont">(.*?)</div>');
//实例
$mp = new MultiHttpRequest();
//调试使用记录采集条目
$j = 1;
//每次并发几个链接
$limit = 10;
// 分页时被跳过的页数
$last_page = 0;
//开始采集
foreach ($list as $link) {
    //解析列表页数
    preg_match_all('/\\[(.*)\\]/i', $link, $_page);
    if ($_page[1][0] == '') {
        continue;
    }
    $pages = explode('-', $_page[1][0]);
    if (count($pages) != 2) {