Esempio n. 1
0
<?php

require '../../CurlMulti/Core.php';
$curl = new CurlMulti_Core();
$curl->maxThread = 1;
$curl->taskPoolType = 'queue';
$url = 'http://www.baidu.com';
for ($i = 0; $i < 10; $i++) {
    $curl->add(array('url' => $url . '?wd=' . $i, 'args' => array('i' => $i)), 'cbProcess');
    echo "{$i} added\n";
}
$curl->start();
function cbProcess($r, $args)
{
    echo $args['i'] . " finished\n";
}
Esempio n. 2
0
<?php

require '../../CurlMulti/Core.php';
require './inc/cb_info.php';
$curl = new CurlMulti_Core();
$curl->cbInfo = 'cbInfo';
$curl->maxThread = 2;
$curl->cache['enable'] = true;
$curl->cache['dir'] = __DIR__ . '/cache';
if (!file_exists($curl->cache['dir'])) {
    mkdir($curl->cache['dir']);
}
$url = 'http://www.baidu.com';
for ($i = 0; $i < 100; $i++) {
    $curl->add(array('url' => $url . '?wd=' . $i));
}
$curl->start();
Esempio n. 3
0
<?php

require '../CurlMulti/Core.php';
$url1 = 'https://gcc.gnu.org';
$url2 = 'http://urlnotexits';
$curl = new CurlMulti_Core();
$curl->opt[CURLOPT_HTTPPROXYTUNNEL] = true;
$curl->opt[CURLOPT_SSL_VERIFYPEER] = false;
$curl->opt[CURLOPT_SSL_VERIFYHOST] = false;
$curl->add(array('url' => $url1, 'args' => array('title' => 'This is url1')), 'cbProcess', 'cbFail');
$curl->add(array('url' => $url2, 'args' => array('title' => 'This is url2'), 'opt' => array(CURLOPT_TIMEOUT => 1)), 'cbProcess', 'cbFail');
$curl->start();
function cbProcess($r, $args)
{
    echo "success, url=" . $r['info']['url'] . "\n";
    print_r(array_keys($r));
    print_r($args);
}
function cbFail($r, $args)
{
    echo "fail, url=" . $r['info']['url'] . "\n";
    print_r($r);
    print_r($args);
}
Esempio n. 4
0
<?php

require '../../CurlMulti/Core.php';
$url1 = 'http://badurl1';
$url2 = 'http://badurl2';
$curl = new CurlMulti_Core();
$curl->maxTry = 1;
$curl->opt[CURLOPT_CONNECTTIMEOUT] = 1;
$curl->opt[CURLOPT_TIMEOUT] = 1;
// cbFail golbal
$curl->cbFail = 'cbFailGlobal';
// cbFail for individual task
$curl->add(array('url' => $url1), null, 'cbFailTask')->add(array('url' => $url2))->start();
function cbFailTask($err, $args)
{
    echo 'Task Fail: ' . $err['info']['url'] . "\n";
    print_r($err['error']);
}
function cbFailGlobal($err, $args)
{
    echo 'Global Fail: ' . $err['info']['url'] . "\n";
    print_r($err['error']);
}
Esempio n. 5
0
    include ROOT_PATH . '/../config/config_local.php';
} elseif ($env == 'test') {
    include ROOT_PATH . '/../config/config_test.php';
} elseif ($env == 'pro') {
    include ROOT_PATH . '/../config/config.php';
} else {
    die('config');
}
$db = new medoo($dbConfig);
$dbCom = new medoo($dbcomConfig);
$rabbitmqObj = new RabbitMQ($rabbitmqConfig);
$ip = '';
$baseUrl = 'http://139.129.76.139/enterprises/w1/getDataById?id=';
$baseUrl = "http://app.qichacha.com/enterprises/new/a1/getData";
//初始化项目
$curl = new CurlMulti_Core();
$curl->opt[CURLOPT_TIMEOUT] = 10;
$curl->maxThread = 2;
$curl->maxTry = 0;
$curl->cbTask = array('addCollectTask', array());
$curl->cbInfo = 'getStatusInfo';
$curl->start();
//初始化采集任务,取队列构造
function addCollectTask()
{
    global $curl, $baseUrl, $rabbitmqObj, $db, $dbCom, $dbConfig, $dbcomConfig;
    $list = array();
    while (count($list) < $curl->maxThread) {
        $rs = $rabbitmqObj->get('combusiness_unique1_ssdb');
        if (!empty($rs)) {
            $resArr = json_decode($rs, true);
Esempio n. 6
0
<?php

require '../../CurlMulti/Core.php';
$curl = new CurlMulti_Core();
$url = 'http://www.baidu.com/img/bd_logo1.png';
$file = __DIR__ . '/baidu.png';
$curl->add(array('url' => $url, 'file' => __DIR__ . '/baidu.png', 'args' => array('filePath' => $file)), 'cbProcess');
// start spider
$curl->start();
function cbProcess($r, $args)
{
    echo "download finished successfully, file={$args['filePath']}\n";
}
Esempio n. 7
0
<?php

require '../../CurlMulti/Core.php';
$url = 'http://badurl';
$curl = new CurlMulti_Core();
// timeout will occur 10 times
$curl->maxTry = 10;
$curl->opt[CURLOPT_CONNECTTIMEOUT] = 1;
$curl->opt[CURLOPT_TIMEOUT] = 1;
$curl->cbFail = 'cbFail';
$curl->add(array('url' => $url));
// start spider
$curl->start();
function cbFail($err, $args)
{
    print_r($err['error']);
}
Esempio n. 8
0
<?php

require '../../CurlMulti/Core.php';
require './inc/cb_info.php';
$curl = new CurlMulti_Core();
$curl->cbInfo = 'cbInfo';
$curl->maxThread = 10;
$curl->maxThreadType['html'] = 2;
$curl->maxThreadType['image'] = 5;
$url1 = 'http://www.baidu.com';
$url2 = 'http://www.baidu.com/img/bd_logo1.png';
for ($i = 0; $i < 100; $i++) {
    $curl->add(array('url' => $url1 . '?wd=' . $i, 'ctl' => array('type' => 'html')));
    $curl->add(array('url' => $url2 . '?i=' . $i, 'ctl' => array('type' => 'image')));
}
$curl->start();
Esempio n. 9
0
<?php

$urls = array();
for ($i = 0; $i < 100; $i++) {
    $urls[] = 'http://localhost/' . $i . '.html';
}
$dir = __DIR__ . '/cache';
$curl = new CurlMulti_Core();
$curl->opt[CURLOPT_TIMEOUT] = 30;
$curl->maxThread = 10;
$curl->maxTry = 3;
$curl->cache = array('enable' => true, 'dir' => $dir, 'expire' => 3600);
$cbFailGlobal = 'callbackFail';
$cbFailTask = 'callbackFailTask';
$curl->cbFail = $cbFailGlobal;
foreach ($urls as $k => $v) {
    $task = array('url' => $v, 'args' => array('page' . $k));
    $curl->add($task, 'callback1', $cbFailTask);
}
// download task with task CURLOPT_*,callback can be ommited
$curl->add(array('url' => 'https://www.kernel.org/pub/linux/kernel/v3.x/linux-3.19.tar.xz', 'file' => __DIR__ . '/linux-3.19.tar.xz', 'opt' => array(CURLOPT_TIMEOUT => 600)));
// start the loop
$curl->start();
function callback1($r, $args)
{
    global $curl;
    // you can call $curl->add() anywhere
    $curl->add(array('url' => 'https://gcc.gnu.org/'), 'callback2');
}
function callback2($r, $args)
{
Esempio n. 10
0
<?php

require '../../CurlMulti/Core.php';
$url = array();
$curl = new CurlMulti_Core();
$curl->start(function () {
    static $i = 0;
    echo $i++ . "\n";
    if ($i >= 3) {
        return false;
    }
    sleep(1);
    return true;
});
Esempio n. 11
0
<?php

require '../../CurlMulti/Core.php';
$curl = new CurlMulti_Core();
$curl->opt[CURLOPT_RETURNTRANSFER] = false;
$url = 'http://www.baidu.com';
$curl->add(array('url' => $url, 'opt' => array(CURLOPT_RETURNTRANSFER => true)), function ($r, $args) {
    echo "content length: " . strlen($r['content']);
});
$curl->start();
Esempio n. 12
0
<?php

require '../../CurlMulti/Core.php';
$url = array('http://baidu.com', 'http://bing.com');
$curl = new CurlMulti_Core();
foreach ($url as $v) {
    $curl->add(array('url' => $v), 'cbProcess');
}
// start spider
$curl->start();
function cbProcess($r, $args)
{
    echo "success, url=" . $r['info']['url'] . "\n";
    print_r(array_keys($r));
    print_r($args);
}
Esempio n. 13
0
<?php

require '../../CurlMulti/Core.php';
$curl = new CurlMulti_Core();
$url = 'http://baidu.com';
$curl->add(array('url' => $url), 'cb1');
// start spider
$curl->start();
function cb1($r, $args)
{
    echo $r['info']['url'] . " finished\n";
    global $curl;
    $curl->add(array('url' => 'http://bing.com'), 'cb2');
    echo "http://bing.com added\n";
}
function cb2($r, $args)
{
    echo $r['info']['url'] . " finished\n";
}