Esempio n. 1
0
 public function __construct()
 {
     require_once 'HTTP/Client.php';
     // /netcat/require/lib
     require_once 'HTTP/Request/Listener.php';
     $headers = array('User-Agent' => nc_search::get_setting('CrawlerUserAgent'));
     $this->http_client = new HTTP_Client(null, $headers);
     $this->http_client->enableHistory(false);
     $this->http_client->setMaxRedirects(nc_search::get_setting('CrawlerMaxRedirects'));
     $max_doc_size = nc_search::get_setting('CrawlerMaxDocumentSize');
     $db = nc_Core::get_object()->db;
     $accepted_mime_types = $db->get_col("SELECT DISTINCT `ContentType`\n                                               FROM `Search_Extension`\n                                              WHERE `ExtensionInterface` = 'nc_search_document_parser'");
     //        $accepted_mime_types = nc_search::load_all('nc_search_extension_rule')
     //                        ->where('extension_interface', 'nc_search_document_parser')
     //                        ->each('get', 'content_type');
     //        $accepted_mime_types = array_unique($accepted_mime_types);
     $listener = new nc_search_indexer_crawler_listener($max_doc_size, $accepted_mime_types);
     $this->http_client->attach($listener, true);
 }
 function testSpamTrapPerformance()
 {
     global $base_url;
     while (@ob_end_clean()) {
     }
     $c = new HTTP_Client();
     echo "Hitting some pages to test how the system performs when hit by spambots\n";
     echo "Spidering content ...\n";
     list($dom, $xp) = $this->get_and_parse("{$base_url}/homepage.php");
     $content = array();
     foreach ($xp->query("//a/@href") as $node) {
         if (preg_match("|/content.php\\?|", $node->value)) {
             $content[$node->value] = true;
         }
     }
     $n_retrieved = 0;
     $total_time = 0.0;
     foreach ($content as $url => $foo) {
         echo "Content page to retrieve: {$url}\n";
         list(, , $t_taken) = $this->get_and_parse($url);
         ++$n_retrieved;
         $total_time += $t_taken;
         if ($n_retrieved >= 3) {
             break;
         }
     }
     echo sprintf("avg time per download: %.1f s\n", $total_time / $n_retrieved);
     echo "Posting spam...\n";
     echo "Testing that we can't post comments without being logged in...\n";
     foreach ($content as $url => $foo) {
         echo "Posting anonymous comment to {$url}\n";
         if (!preg_match("/cid=(\\d+)/", $url, $m)) {
             echo "error determining cid from {$url}\n";
             continue;
         }
         $cid = $m[1];
         $c = new HTTP_Client();
         $c->setMaxRedirects(0);
         $ret = $c->post($url, array("addcomment" => "Submit Comment", "name" => "automatic test robot", "email" => "*****@*****.**", "homepage" => "http://peopleaggregator.net/", "cid" => $cid, "ccid" => ""));
         var_dump($c);
         $this->assertEquals($c->_responses[0]['code'], 302);
         break;
     }
     echo "Posting personal messages...\n";
 }
Esempio n. 3
0
    $errlog = new IC2_DataObject_Errors();
    if ($errlog->get($uri)) {
        ic2_error($errlog->errcode, '', false);
    }
}
// }}}
// {{{ init http-client
// 設定を確認
$conn_timeout = isset($ini['Getter']['conn_timeout']) && $ini['Getter']['conn_timeout'] > 0 ? (double) $ini['Getter']['conn_timeout'] : 60.0;
$read_timeout = isset($ini['Getter']['read_timeout']) && $ini['Getter']['read_timeout'] > 0 ? (int) $ini['Getter']['read_timeout'] : 60;
$ic2_ua = !empty($_conf['expack.user_agent']) ? $_conf['expack.user_agent'] : $_SERVER['HTTP_USER_AGENT'];
// キャッシュされていなければ、取得を試みる
$client = new HTTP_Client();
$client->setRequestParameter('timeout', $conn_timeout);
$client->setRequestParameter('readTimeout', array($read_timeout, 0));
$client->setMaxRedirects(3);
$client->setDefaultHeader('User-Agent', $ic2_ua);
if ($force && $time) {
    $client->setDefaultHeader('If-Modified-Since', http_date($time));
}
// プロキシ設定
if ($ini['Proxy']['enabled'] && $ini['Proxy']['host'] && $ini['Proxy']['port']) {
    $client->setRequestParameter('proxy_host', $ini['Proxy']['host']);
    $client->setRequestParameter('proxy_port', $ini['Proxy']['port']);
    if ($ini['Proxy']['user']) {
        $client->setRequestParameter('proxy_user', $ini['Proxy']['user']);
        $client->setRequestParameter('proxy_pass', $ini['Proxy']['pass']);
        $proxy_auth_data = base64_encode($ini['Proxy']['user'] . ':' . $ini['Proxy']['pass']);
        $client->setDefaultHeader('Proxy-Authorization', 'Basic ' . $proxy_auth_data);
    }
}
Esempio n. 4
0
$cache = new Cache_Lite($options);
// del http cache parameter _dc=12334234
$route = preg_replace('/&_dc=(\\d+)$/i', '', substr($_SERVER["QUERY_STRING"], 6));
if ($data = $cache->get($route)) {
    header('Cache-Control: no-cache, must-revalidate');
    header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');
    header('Content-type: application/json');
    echo $data;
} else {
    $parsed_url = parse_url($route);
    if ($parsed_url) {
        if (array_key_exists("host", $parsed_url)) {
            $url = $route;
        } else {
            $url = 'http://api.worldbank.org' . $route;
        }
    } else {
        $url = 'http://api.worldbank.org' . $route;
    }
    $hc = new HTTP_Client();
    $hc->setMaxRedirects(1);
    $hc->get($url);
    $response = $hc->currentResponse();
    $headers = $response['headers'];
    $data = $response['body'];
    foreach ($headers as $k => $v) {
        header($k . ': ' . $v);
    }
    echo $data;
    $cache->save($data, $route);
}