コード例 #1
0
 public function test_construct_and_index()
 {
     $filename = 'data/big_granito_1.html';
     $tmr = self::timer();
     $mmr = self::memer();
     $html = self::file_get_contents($filename);
     $mem = self::memer($mmr);
     $exe = self::timer($tmr);
     self::log("Loaded    " . self::fmtNumber(strlen($html) / 1024, 2) . "Kb\tin\t{$exe}\t{$mem} RAM");
     $tmr = self::timer();
     $mmr = self::memer();
     $doc = new hQuery($html, false);
     $mem = self::memer($mmr);
     $exe = self::timer($tmr);
     self::log("Construct " . self::fmtNumber($doc->size / 1024, 2) . "Kb\tin\t{$exe}\t{$mem} RAM");
     $doc->location(self::fn($filename));
     $tmr = self::timer();
     $mmr = self::memer();
     $tags = $doc->index();
     $mem = self::memer($mmr);
     $exe = self::timer($tmr);
     $this->assertLessThan(6000000, self::timer($tmr, false), 'should index 3Mb in less then 3 sec');
     $count = self::fmtNumber(self::listSumCounts($tags));
     self::log("Indexed   {$count} tags\tin\t{$exe}\t{$mem} RAM");
     self::log("Original Charset: {$doc->charset}");
     $tags = array_map('count', $tags);
     $counts = NULL;
     foreach ($tags as $k => $v) {
         $counts[$v] = (empty($counts[$v]) ? '' : $counts[$v] . ', ') . $k;
     }
     krsort($counts);
     // self::log('Tag counts:', $counts);
     return array($doc);
 }
コード例 #2
0
 public static function setUpBeforeClass()
 {
     hQuery::$_mockup_class = 'TestHQueryTests';
     self::$inst = TestHQueryTests::fromHTML('<doctype html>' . '<html>' . '<head>' . '<meta charset="ISO-8859-2" />' . '<title>Sample HTML Doc</title>' . '</head>' . '<body class="test-class">' . '<div id="test-div" class="test-class test-div">' . 'This is some text' . '<a href="/path">' . 'This is a link' . '</a>' . ' between tags' . '<span id="aSpan">Span text</span>' . '</div>' . 'Contents...' . '</body>' . '</html>', self::$baseUrl . 'index.html');
     self::log(get_class(self::$inst));
 }
コード例 #3
0
ファイル: hquery.php プロジェクト: nicolabavaro/hQuery.php
 /**
  * Executes a HTTP write-read session.
  *
  * @param string $host - IP/HOST address or URL
  * @param array  $head - list off HTTP headers to be sent along with the request to $host
  * @param mixed  $body - data to be sent as the contents of the request. If is array or object, a http query is built.
  * @param array  $options - list of option as key-value:
  *                              timeout - connection timeout in seconds
  *                              host    - goes in headers, overrides $host (ex. $host == '127.0.0.1', $options['host'] == 'www.example.com')
  *                              scheme  - http, ssl, tls, udp, ...
  *                              close   - whether to close connection o not
  *
  * @return array [contents, headers, http-status-code, http-status-message]
  *
  * @author Dumitru Uzun
  *
  */
 public static function http_wr($host, $head = NULL, $body = NULL, $options = NULL)
 {
     self::$last_http_result = $ret = new stdClass();
     empty($options) and $options = array();
     if ($p = strpos($host, '://') and $p < 7) {
         $ret->url = $host;
         $p = parse_url($host);
         if (!$p) {
             throw new Exception('Wrong host specified');
         }
         // error
         $host = $p['host'];
         $path = @$p['path'];
         if (isset($p['query'])) {
             $path .= '?' . $p['query'];
         }
         if (isset($p['port'])) {
             $port = $p['port'];
         }
         unset($p['path'], $p['query']);
         $options += $p;
     } else {
         $p = explode('/', $host, 2);
         list($host, $path) = $p;
         $p = explode(':', $host, 2);
         list($host, $port) = $p;
     }
     if (strncmp($path, '/', 1)) {
         $path = '/' . $path;
     }
     // isset($path) or $path = '/';
     if (!isset($port)) {
         if (isset($options['port'])) {
             $port = $options['port'];
         } else {
             switch ($options['scheme']) {
                 case 'tls':
                 case 'ssl':
                 case 'https':
                     $port = 443;
                     break;
                 case 'ftp':
                     $port = 21;
                     break;
                 case 'sftp':
                     $port = 22;
                     break;
                 case 'http':
                 default:
                     $port = 80;
             }
         }
     }
     $ret->host = $host;
     $_h = array('host' => isset($options['host']) ? $options['host'] : $host, 'accept' => 'text/html,application/xhtml+xml,application/xml;q =0.9,*/*;q=0.8');
     if (!empty($options['scheme'])) {
         switch ($p['scheme']) {
             case 'http':
             case 'ftp':
                 break;
             case 'https':
                 $host = 'tls://' . $host;
                 break;
             default:
                 $host = $options['scheme'] . '://' . $host;
         }
     }
     static $boundary = "\r\n\r\n";
     $blen = strlen($boundary);
     if ($body) {
         if (is_array($body) || is_object($body)) {
             $body = http_build_query($body);
             $_h['content-type'] = 'application/x-www-form-urlencoded';
         }
         $body = (string) $body;
         $_h['content-length'] = strlen($body);
         $body .= $boundary;
         empty($options['method']) and $options['method'] = 'POST';
     } else {
         $body = NULL;
     }
     $meth = @$options['method'] and $meth = strtoupper($meth) or $meth = 'GET';
     if ($head) {
         if (!is_array($head)) {
             $head = explode("\r\n", $head);
         }
         foreach ($head as $i => $v) {
             if (is_int($i)) {
                 $v = explode(':', $v, 2);
                 if (count($v) != 2) {
                     continue;
                 }
                 // Invalid header
                 list($i, $v) = $v;
             }
             $i = strtolower(strtr($i, ' _', '--'));
             $_h[$i] = trim($v);
         }
     }
     if (@$options['decode'] == 'gzip') {
         // if(function_exists('gzdecode')) {
         $_h['accept-encoding'] = 'gzip';
         // }
         // else {
         // $options['decode'] = NULL;
         // }
     }
     if (!isset($options['close']) || @$options['close']) {
         $_h['connection'] = 'close';
     } else {
         $_h['connection'] = 'keep-alive';
     }
     $prot = empty($options['protocol']) ? 'HTTP/1.1' : $options['protocol'];
     $head = array("{$meth} {$path} {$prot}");
     foreach ($_h as $i => $v) {
         $i = explode('-', $i);
         foreach ($i as &$j) {
             $j = ucfirst($j);
         }
         $i = implode('-', $i);
         $head[] = $i . ': ' . $v;
     }
     $rqst = implode("\r\n", $head) . $boundary . $body;
     $head = NULL;
     // free mem
     $timeout = isset($options['timeout']) ? $options['timeout'] : @ini_get("default_socket_timeout");
     $ret->options = $options;
     // ------------------- Connection and data transfer -------------------
     $errno = $errstr = $rsps = '';
     $h = $_rh = NULL;
     $fs = @fsockopen($host, $port, $errno, $errstr, $timeout);
     if (!$fs) {
         throw new Exception('unable to create socket "' . $host . ':' . $port . '"' . $errstr, $errno);
     }
     if (!fwrite($fs, $rqst)) {
         throw new Exception("unable to write");
     } else {
         $l = $blen - 1;
         // read headers
         while ($open = !feof($fs) && ($p = @fgets($fs, 1024))) {
             if ($p == "\r\n") {
                 break;
             }
             $rsps .= $p;
         }
         if ($rsps) {
             $h = explode("\r\n", rtrim($rsps));
             list($rprot, $rcode, $rmsg) = explode(' ', array_shift($h), 3);
             foreach ($h as $v) {
                 $v = explode(':', $v, 2);
                 $_rh[strtoupper(strtr($v[0], '- ', '__'))] = isset($v[1]) ? trim($v[1]) : NULL;
             }
             $rsps = NULL;
             switch ($rcode) {
                 case 301:
                 case 302:
                 case 303:
                 case 307:
                     // repeat request using the same method and post data
                     if (@$options['redirects'] > 0 && ($loc = @$_rh['LOCATION'])) {
                         $loc = self::abs_url($loc, (empty($options['scheme']) ? '' : $options['scheme'] . '//') . $host . ':' . $port . (empty($options['path']) ? '' : $options['path']));
                         unset($_h['host'], $options['host'], $options['port'], $options['scheme'], $options['method']);
                         --$options['redirects'];
                         // ??? could save cookies for redirect
                         return self::http_wr($loc, $_h, $body, $options);
                     }
                     break;
             }
             // Detect body length
             if (@(!$open) || $rcode < 200 || $rcode == 204 || $rcode == 304 || $meth == 'HEAD') {
                 $te = 1;
             } elseif (isset($_rh['TRANSFER_ENCODING']) && strtolower($_rh['TRANSFER_ENCODING']) === 'chunked') {
                 $te = 3;
             } elseif (isset($_rh['CONTENT_LENGTH'])) {
                 $bl = (int) $_rh['CONTENT_LENGTH'];
                 $te = 2;
             }
             switch ($te) {
                 case 1:
                     break;
                 case 2:
                     while ($bl > 0 and $open &= !feof($fs) && ($p = @fread($fs, $bl))) {
                         $rsps .= $p;
                         $bl -= strlen($p);
                     }
                     break;
                 case 3:
                     while ($open &= !feof($fs) && ($p = @fgets($fs, 1024))) {
                         $_re = explode(';', rtrim($p));
                         $cs = reset($_re);
                         $bl = hexdec($cs);
                         if (!$bl) {
                             break;
                         }
                         // empty chunk
                         while ($bl > 0 and $open &= !feof($fs) && ($p = @fread($fs, $bl))) {
                             $rsps .= $p;
                             $bl -= strlen($p);
                         }
                         @fgets($fs, 3);
                         // \r\n
                     }
                     if ($open &= !feof($fs) && ($p = @fgets($fs, 1024))) {
                         if ($p = rtrim($p)) {
                             // ??? Trailer Header
                             $v = explode(':', $p, 2);
                             $_rh[strtoupper(strtr($v[0], '- ', '__'))] = isset($v[1]) ? trim($v[1]) : NULL;
                             @fgets($fs, 3);
                             // \r\n
                         }
                     }
                     break;
                 default:
                     while ($open &= !feof($fs) && ($p = @fread($fs, 1024))) {
                         // ???
                         $rsps .= $p;
                     }
                     break;
             }
             if ($rsps != '' && @$options['decode'] == 'gzip' && @$_rh['CONTENT_ENCODING'] == 'gzip') {
                 $r = self::gzdecode($rsps);
                 if ($r !== false) {
                     unset($_rh['CONTENT_ENCODING']);
                     $rsps = $r;
                 }
             }
             $ret->code = $rcode;
             $ret->msg = $rmsg;
             $ret->headers = isset($_rh) ? $_rh : NULL;
             $ret->body = $rsps;
             $ret->method = $meth;
             // $ret->host    = $host;
             $ret->port = $port;
             $ret->path = $path;
             $ret->request = $rqst;
             return $ret;
             // Old return:
             //     contents  headers  status-code  status-message
             // return array( $rsps,    @$_rh,   $rcode,      $rmsg,           $host, $port, $path, $rqst  );
         }
     }
     fclose($fs);
     return false;
     // no response
 }
コード例 #4
0
 public static function file_exists($fn)
 {
     $ffn = PHPUNIT_DIR . $fn;
     if (!file_exists($ffn)) {
         $zfn = $ffn . '.gz';
         if (!file_exists($zfn)) {
             return false;
         }
         $gz = file_get_contents($zfn);
         $data = hQuery::gzdecode($gz);
         if (!file_put_contents($ffn, $data)) {
             return false;
         }
     }
     return $ffn;
 }