<?php require '../../CurlMulti/Core.php'; $curl = new CurlMulti_Core(); $curl->maxThread = 1; $curl->taskPoolType = 'queue'; $url = 'http://www.baidu.com'; for ($i = 0; $i < 10; $i++) { $curl->add(array('url' => $url . '?wd=' . $i, 'args' => array('i' => $i)), 'cbProcess'); echo "{$i} added\n"; } $curl->start(); function cbProcess($r, $args) { echo $args['i'] . " finished\n"; }
<?php require '../../CurlMulti/Core.php'; require './inc/cb_info.php'; $curl = new CurlMulti_Core(); $curl->cbInfo = 'cbInfo'; $curl->maxThread = 2; $curl->cache['enable'] = true; $curl->cache['dir'] = __DIR__ . '/cache'; if (!file_exists($curl->cache['dir'])) { mkdir($curl->cache['dir']); } $url = 'http://www.baidu.com'; for ($i = 0; $i < 100; $i++) { $curl->add(array('url' => $url . '?wd=' . $i)); } $curl->start();
<?php require '../CurlMulti/Core.php'; $url1 = 'https://gcc.gnu.org'; $url2 = 'http://urlnotexits'; $curl = new CurlMulti_Core(); $curl->opt[CURLOPT_HTTPPROXYTUNNEL] = true; $curl->opt[CURLOPT_SSL_VERIFYPEER] = false; $curl->opt[CURLOPT_SSL_VERIFYHOST] = false; $curl->add(array('url' => $url1, 'args' => array('title' => 'This is url1')), 'cbProcess', 'cbFail'); $curl->add(array('url' => $url2, 'args' => array('title' => 'This is url2'), 'opt' => array(CURLOPT_TIMEOUT => 1)), 'cbProcess', 'cbFail'); $curl->start(); function cbProcess($r, $args) { echo "success, url=" . $r['info']['url'] . "\n"; print_r(array_keys($r)); print_r($args); } function cbFail($r, $args) { echo "fail, url=" . $r['info']['url'] . "\n"; print_r($r); print_r($args); }
<?php require '../../CurlMulti/Core.php'; $url1 = 'http://badurl1'; $url2 = 'http://badurl2'; $curl = new CurlMulti_Core(); $curl->maxTry = 1; $curl->opt[CURLOPT_CONNECTTIMEOUT] = 1; $curl->opt[CURLOPT_TIMEOUT] = 1; // cbFail golbal $curl->cbFail = 'cbFailGlobal'; // cbFail for individual task $curl->add(array('url' => $url1), null, 'cbFailTask')->add(array('url' => $url2))->start(); function cbFailTask($err, $args) { echo 'Task Fail: ' . $err['info']['url'] . "\n"; print_r($err['error']); } function cbFailGlobal($err, $args) { echo 'Global Fail: ' . $err['info']['url'] . "\n"; print_r($err['error']); }
include ROOT_PATH . '/../config/config_local.php'; } elseif ($env == 'test') { include ROOT_PATH . '/../config/config_test.php'; } elseif ($env == 'pro') { include ROOT_PATH . '/../config/config.php'; } else { die('config'); } $db = new medoo($dbConfig); $dbCom = new medoo($dbcomConfig); $rabbitmqObj = new RabbitMQ($rabbitmqConfig); $ip = ''; $baseUrl = 'http://139.129.76.139/enterprises/w1/getDataById?id='; $baseUrl = "http://app.qichacha.com/enterprises/new/a1/getData"; //初始化项目 $curl = new CurlMulti_Core(); $curl->opt[CURLOPT_TIMEOUT] = 10; $curl->maxThread = 2; $curl->maxTry = 0; $curl->cbTask = array('addCollectTask', array()); $curl->cbInfo = 'getStatusInfo'; $curl->start(); //初始化采集任务,取队列构造 function addCollectTask() { global $curl, $baseUrl, $rabbitmqObj, $db, $dbCom, $dbConfig, $dbcomConfig; $list = array(); while (count($list) < $curl->maxThread) { $rs = $rabbitmqObj->get('combusiness_unique1_ssdb'); if (!empty($rs)) { $resArr = json_decode($rs, true);
<?php require '../../CurlMulti/Core.php'; $curl = new CurlMulti_Core(); $url = 'http://www.baidu.com/img/bd_logo1.png'; $file = __DIR__ . '/baidu.png'; $curl->add(array('url' => $url, 'file' => __DIR__ . '/baidu.png', 'args' => array('filePath' => $file)), 'cbProcess'); // start spider $curl->start(); function cbProcess($r, $args) { echo "download finished successfully, file={$args['filePath']}\n"; }
<?php require '../../CurlMulti/Core.php'; $url = 'http://badurl'; $curl = new CurlMulti_Core(); // timeout will occur 10 times $curl->maxTry = 10; $curl->opt[CURLOPT_CONNECTTIMEOUT] = 1; $curl->opt[CURLOPT_TIMEOUT] = 1; $curl->cbFail = 'cbFail'; $curl->add(array('url' => $url)); // start spider $curl->start(); function cbFail($err, $args) { print_r($err['error']); }
<?php require '../../CurlMulti/Core.php'; require './inc/cb_info.php'; $curl = new CurlMulti_Core(); $curl->cbInfo = 'cbInfo'; $curl->maxThread = 10; $curl->maxThreadType['html'] = 2; $curl->maxThreadType['image'] = 5; $url1 = 'http://www.baidu.com'; $url2 = 'http://www.baidu.com/img/bd_logo1.png'; for ($i = 0; $i < 100; $i++) { $curl->add(array('url' => $url1 . '?wd=' . $i, 'ctl' => array('type' => 'html'))); $curl->add(array('url' => $url2 . '?i=' . $i, 'ctl' => array('type' => 'image'))); } $curl->start();
<?php $urls = array(); for ($i = 0; $i < 100; $i++) { $urls[] = 'http://localhost/' . $i . '.html'; } $dir = __DIR__ . '/cache'; $curl = new CurlMulti_Core(); $curl->opt[CURLOPT_TIMEOUT] = 30; $curl->maxThread = 10; $curl->maxTry = 3; $curl->cache = array('enable' => true, 'dir' => $dir, 'expire' => 3600); $cbFailGlobal = 'callbackFail'; $cbFailTask = 'callbackFailTask'; $curl->cbFail = $cbFailGlobal; foreach ($urls as $k => $v) { $task = array('url' => $v, 'args' => array('page' . $k)); $curl->add($task, 'callback1', $cbFailTask); } // download task with task CURLOPT_*,callback can be ommited $curl->add(array('url' => 'https://www.kernel.org/pub/linux/kernel/v3.x/linux-3.19.tar.xz', 'file' => __DIR__ . '/linux-3.19.tar.xz', 'opt' => array(CURLOPT_TIMEOUT => 600))); // start the loop $curl->start(); function callback1($r, $args) { global $curl; // you can call $curl->add() anywhere $curl->add(array('url' => 'https://gcc.gnu.org/'), 'callback2'); } function callback2($r, $args) {
<?php require '../../CurlMulti/Core.php'; $url = array(); $curl = new CurlMulti_Core(); $curl->start(function () { static $i = 0; echo $i++ . "\n"; if ($i >= 3) { return false; } sleep(1); return true; });
<?php require '../../CurlMulti/Core.php'; $curl = new CurlMulti_Core(); $curl->opt[CURLOPT_RETURNTRANSFER] = false; $url = 'http://www.baidu.com'; $curl->add(array('url' => $url, 'opt' => array(CURLOPT_RETURNTRANSFER => true)), function ($r, $args) { echo "content length: " . strlen($r['content']); }); $curl->start();
<?php require '../../CurlMulti/Core.php'; $url = array('http://baidu.com', 'http://bing.com'); $curl = new CurlMulti_Core(); foreach ($url as $v) { $curl->add(array('url' => $v), 'cbProcess'); } // start spider $curl->start(); function cbProcess($r, $args) { echo "success, url=" . $r['info']['url'] . "\n"; print_r(array_keys($r)); print_r($args); }
<?php require '../../CurlMulti/Core.php'; $curl = new CurlMulti_Core(); $url = 'http://baidu.com'; $curl->add(array('url' => $url), 'cb1'); // start spider $curl->start(); function cb1($r, $args) { echo $r['info']['url'] . " finished\n"; global $curl; $curl->add(array('url' => 'http://bing.com'), 'cb2'); echo "http://bing.com added\n"; } function cb2($r, $args) { echo $r['info']['url'] . " finished\n"; }