public function __construct() { require_once 'HTTP/Client.php'; // /netcat/require/lib require_once 'HTTP/Request/Listener.php'; $headers = array('User-Agent' => nc_search::get_setting('CrawlerUserAgent')); $this->http_client = new HTTP_Client(null, $headers); $this->http_client->enableHistory(false); $this->http_client->setMaxRedirects(nc_search::get_setting('CrawlerMaxRedirects')); $max_doc_size = nc_search::get_setting('CrawlerMaxDocumentSize'); $db = nc_Core::get_object()->db; $accepted_mime_types = $db->get_col("SELECT DISTINCT `ContentType`\n FROM `Search_Extension`\n WHERE `ExtensionInterface` = 'nc_search_document_parser'"); // $accepted_mime_types = nc_search::load_all('nc_search_extension_rule') // ->where('extension_interface', 'nc_search_document_parser') // ->each('get', 'content_type'); // $accepted_mime_types = array_unique($accepted_mime_types); $listener = new nc_search_indexer_crawler_listener($max_doc_size, $accepted_mime_types); $this->http_client->attach($listener, true); }
function testSpamTrapPerformance() { global $base_url; while (@ob_end_clean()) { } $c = new HTTP_Client(); echo "Hitting some pages to test how the system performs when hit by spambots\n"; echo "Spidering content ...\n"; list($dom, $xp) = $this->get_and_parse("{$base_url}/homepage.php"); $content = array(); foreach ($xp->query("//a/@href") as $node) { if (preg_match("|/content.php\\?|", $node->value)) { $content[$node->value] = true; } } $n_retrieved = 0; $total_time = 0.0; foreach ($content as $url => $foo) { echo "Content page to retrieve: {$url}\n"; list(, , $t_taken) = $this->get_and_parse($url); ++$n_retrieved; $total_time += $t_taken; if ($n_retrieved >= 3) { break; } } echo sprintf("avg time per download: %.1f s\n", $total_time / $n_retrieved); echo "Posting spam...\n"; echo "Testing that we can't post comments without being logged in...\n"; foreach ($content as $url => $foo) { echo "Posting anonymous comment to {$url}\n"; if (!preg_match("/cid=(\\d+)/", $url, $m)) { echo "error determining cid from {$url}\n"; continue; } $cid = $m[1]; $c = new HTTP_Client(); $c->setMaxRedirects(0); $ret = $c->post($url, array("addcomment" => "Submit Comment", "name" => "automatic test robot", "email" => "*****@*****.**", "homepage" => "http://peopleaggregator.net/", "cid" => $cid, "ccid" => "")); var_dump($c); $this->assertEquals($c->_responses[0]['code'], 302); break; } echo "Posting personal messages...\n"; }
$errlog = new IC2_DataObject_Errors(); if ($errlog->get($uri)) { ic2_error($errlog->errcode, '', false); } } // }}} // {{{ init http-client // 設定を確認 $conn_timeout = isset($ini['Getter']['conn_timeout']) && $ini['Getter']['conn_timeout'] > 0 ? (double) $ini['Getter']['conn_timeout'] : 60.0; $read_timeout = isset($ini['Getter']['read_timeout']) && $ini['Getter']['read_timeout'] > 0 ? (int) $ini['Getter']['read_timeout'] : 60; $ic2_ua = !empty($_conf['expack.user_agent']) ? $_conf['expack.user_agent'] : $_SERVER['HTTP_USER_AGENT']; // キャッシュされていなければ、取得を試みる $client = new HTTP_Client(); $client->setRequestParameter('timeout', $conn_timeout); $client->setRequestParameter('readTimeout', array($read_timeout, 0)); $client->setMaxRedirects(3); $client->setDefaultHeader('User-Agent', $ic2_ua); if ($force && $time) { $client->setDefaultHeader('If-Modified-Since', http_date($time)); } // プロキシ設定 if ($ini['Proxy']['enabled'] && $ini['Proxy']['host'] && $ini['Proxy']['port']) { $client->setRequestParameter('proxy_host', $ini['Proxy']['host']); $client->setRequestParameter('proxy_port', $ini['Proxy']['port']); if ($ini['Proxy']['user']) { $client->setRequestParameter('proxy_user', $ini['Proxy']['user']); $client->setRequestParameter('proxy_pass', $ini['Proxy']['pass']); $proxy_auth_data = base64_encode($ini['Proxy']['user'] . ':' . $ini['Proxy']['pass']); $client->setDefaultHeader('Proxy-Authorization', 'Basic ' . $proxy_auth_data); } }
$cache = new Cache_Lite($options); // del http cache parameter _dc=12334234 $route = preg_replace('/&_dc=(\\d+)$/i', '', substr($_SERVER["QUERY_STRING"], 6)); if ($data = $cache->get($route)) { header('Cache-Control: no-cache, must-revalidate'); header('Expires: Mon, 26 Jul 1997 05:00:00 GMT'); header('Content-type: application/json'); echo $data; } else { $parsed_url = parse_url($route); if ($parsed_url) { if (array_key_exists("host", $parsed_url)) { $url = $route; } else { $url = 'http://api.worldbank.org' . $route; } } else { $url = 'http://api.worldbank.org' . $route; } $hc = new HTTP_Client(); $hc->setMaxRedirects(1); $hc->get($url); $response = $hc->currentResponse(); $headers = $response['headers']; $data = $response['body']; foreach ($headers as $k => $v) { header($k . ': ' . $v); } echo $data; $cache->save($data, $route); }