public function test_construct_and_index() { $filename = 'data/big_granito_1.html'; $tmr = self::timer(); $mmr = self::memer(); $html = self::file_get_contents($filename); $mem = self::memer($mmr); $exe = self::timer($tmr); self::log("Loaded " . self::fmtNumber(strlen($html) / 1024, 2) . "Kb\tin\t{$exe}\t{$mem} RAM"); $tmr = self::timer(); $mmr = self::memer(); $doc = new hQuery($html, false); $mem = self::memer($mmr); $exe = self::timer($tmr); self::log("Construct " . self::fmtNumber($doc->size / 1024, 2) . "Kb\tin\t{$exe}\t{$mem} RAM"); $doc->location(self::fn($filename)); $tmr = self::timer(); $mmr = self::memer(); $tags = $doc->index(); $mem = self::memer($mmr); $exe = self::timer($tmr); $this->assertLessThan(6000000, self::timer($tmr, false), 'should index 3Mb in less then 3 sec'); $count = self::fmtNumber(self::listSumCounts($tags)); self::log("Indexed {$count} tags\tin\t{$exe}\t{$mem} RAM"); self::log("Original Charset: {$doc->charset}"); $tags = array_map('count', $tags); $counts = NULL; foreach ($tags as $k => $v) { $counts[$v] = (empty($counts[$v]) ? '' : $counts[$v] . ', ') . $k; } krsort($counts); // self::log('Tag counts:', $counts); return array($doc); }
public static function setUpBeforeClass() { hQuery::$_mockup_class = 'TestHQueryTests'; self::$inst = TestHQueryTests::fromHTML('<doctype html>' . '<html>' . '<head>' . '<meta charset="ISO-8859-2" />' . '<title>Sample HTML Doc</title>' . '</head>' . '<body class="test-class">' . '<div id="test-div" class="test-class test-div">' . 'This is some text' . '<a href="/path">' . 'This is a link' . '</a>' . ' between tags' . '<span id="aSpan">Span text</span>' . '</div>' . 'Contents...' . '</body>' . '</html>', self::$baseUrl . 'index.html'); self::log(get_class(self::$inst)); }
/** * Executes a HTTP write-read session. * * @param string $host - IP/HOST address or URL * @param array $head - list off HTTP headers to be sent along with the request to $host * @param mixed $body - data to be sent as the contents of the request. If is array or object, a http query is built. * @param array $options - list of option as key-value: * timeout - connection timeout in seconds * host - goes in headers, overrides $host (ex. $host == '127.0.0.1', $options['host'] == 'www.example.com') * scheme - http, ssl, tls, udp, ... * close - whether to close connection o not * * @return array [contents, headers, http-status-code, http-status-message] * * @author Dumitru Uzun * */ public static function http_wr($host, $head = NULL, $body = NULL, $options = NULL) { self::$last_http_result = $ret = new stdClass(); empty($options) and $options = array(); if ($p = strpos($host, '://') and $p < 7) { $ret->url = $host; $p = parse_url($host); if (!$p) { throw new Exception('Wrong host specified'); } // error $host = $p['host']; $path = @$p['path']; if (isset($p['query'])) { $path .= '?' . $p['query']; } if (isset($p['port'])) { $port = $p['port']; } unset($p['path'], $p['query']); $options += $p; } else { $p = explode('/', $host, 2); list($host, $path) = $p; $p = explode(':', $host, 2); list($host, $port) = $p; } if (strncmp($path, '/', 1)) { $path = '/' . $path; } // isset($path) or $path = '/'; if (!isset($port)) { if (isset($options['port'])) { $port = $options['port']; } else { switch ($options['scheme']) { case 'tls': case 'ssl': case 'https': $port = 443; break; case 'ftp': $port = 21; break; case 'sftp': $port = 22; break; case 'http': default: $port = 80; } } } $ret->host = $host; $_h = array('host' => isset($options['host']) ? $options['host'] : $host, 'accept' => 'text/html,application/xhtml+xml,application/xml;q =0.9,*/*;q=0.8'); if (!empty($options['scheme'])) { switch ($p['scheme']) { case 'http': case 'ftp': break; case 'https': $host = 'tls://' . $host; break; default: $host = $options['scheme'] . '://' . $host; } } static $boundary = "\r\n\r\n"; $blen = strlen($boundary); if ($body) { if (is_array($body) || is_object($body)) { $body = http_build_query($body); $_h['content-type'] = 'application/x-www-form-urlencoded'; } $body = (string) $body; $_h['content-length'] = strlen($body); $body .= $boundary; empty($options['method']) and $options['method'] = 'POST'; } else { $body = NULL; } $meth = @$options['method'] and $meth = strtoupper($meth) or $meth = 'GET'; if ($head) { if (!is_array($head)) { $head = explode("\r\n", $head); } foreach ($head as $i => $v) { if (is_int($i)) { $v = explode(':', $v, 2); if (count($v) != 2) { continue; } // Invalid header list($i, $v) = $v; } $i = strtolower(strtr($i, ' _', '--')); $_h[$i] = trim($v); } } if (@$options['decode'] == 'gzip') { // if(function_exists('gzdecode')) { $_h['accept-encoding'] = 'gzip'; // } // else { // $options['decode'] = NULL; // } } if (!isset($options['close']) || @$options['close']) { $_h['connection'] = 'close'; } else { $_h['connection'] = 'keep-alive'; } $prot = empty($options['protocol']) ? 'HTTP/1.1' : $options['protocol']; $head = array("{$meth} {$path} {$prot}"); foreach ($_h as $i => $v) { $i = explode('-', $i); foreach ($i as &$j) { $j = ucfirst($j); } $i = implode('-', $i); $head[] = $i . ': ' . $v; } $rqst = implode("\r\n", $head) . $boundary . $body; $head = NULL; // free mem $timeout = isset($options['timeout']) ? $options['timeout'] : @ini_get("default_socket_timeout"); $ret->options = $options; // ------------------- Connection and data transfer ------------------- $errno = $errstr = $rsps = ''; $h = $_rh = NULL; $fs = @fsockopen($host, $port, $errno, $errstr, $timeout); if (!$fs) { throw new Exception('unable to create socket "' . $host . ':' . $port . '"' . $errstr, $errno); } if (!fwrite($fs, $rqst)) { throw new Exception("unable to write"); } else { $l = $blen - 1; // read headers while ($open = !feof($fs) && ($p = @fgets($fs, 1024))) { if ($p == "\r\n") { break; } $rsps .= $p; } if ($rsps) { $h = explode("\r\n", rtrim($rsps)); list($rprot, $rcode, $rmsg) = explode(' ', array_shift($h), 3); foreach ($h as $v) { $v = explode(':', $v, 2); $_rh[strtoupper(strtr($v[0], '- ', '__'))] = isset($v[1]) ? trim($v[1]) : NULL; } $rsps = NULL; switch ($rcode) { case 301: case 302: case 303: case 307: // repeat request using the same method and post data if (@$options['redirects'] > 0 && ($loc = @$_rh['LOCATION'])) { $loc = self::abs_url($loc, (empty($options['scheme']) ? '' : $options['scheme'] . '//') . $host . ':' . $port . (empty($options['path']) ? '' : $options['path'])); unset($_h['host'], $options['host'], $options['port'], $options['scheme'], $options['method']); --$options['redirects']; // ??? could save cookies for redirect return self::http_wr($loc, $_h, $body, $options); } break; } // Detect body length if (@(!$open) || $rcode < 200 || $rcode == 204 || $rcode == 304 || $meth == 'HEAD') { $te = 1; } elseif (isset($_rh['TRANSFER_ENCODING']) && strtolower($_rh['TRANSFER_ENCODING']) === 'chunked') { $te = 3; } elseif (isset($_rh['CONTENT_LENGTH'])) { $bl = (int) $_rh['CONTENT_LENGTH']; $te = 2; } switch ($te) { case 1: break; case 2: while ($bl > 0 and $open &= !feof($fs) && ($p = @fread($fs, $bl))) { $rsps .= $p; $bl -= strlen($p); } break; case 3: while ($open &= !feof($fs) && ($p = @fgets($fs, 1024))) { $_re = explode(';', rtrim($p)); $cs = reset($_re); $bl = hexdec($cs); if (!$bl) { break; } // empty chunk while ($bl > 0 and $open &= !feof($fs) && ($p = @fread($fs, $bl))) { $rsps .= $p; $bl -= strlen($p); } @fgets($fs, 3); // \r\n } if ($open &= !feof($fs) && ($p = @fgets($fs, 1024))) { if ($p = rtrim($p)) { // ??? Trailer Header $v = explode(':', $p, 2); $_rh[strtoupper(strtr($v[0], '- ', '__'))] = isset($v[1]) ? trim($v[1]) : NULL; @fgets($fs, 3); // \r\n } } break; default: while ($open &= !feof($fs) && ($p = @fread($fs, 1024))) { // ??? $rsps .= $p; } break; } if ($rsps != '' && @$options['decode'] == 'gzip' && @$_rh['CONTENT_ENCODING'] == 'gzip') { $r = self::gzdecode($rsps); if ($r !== false) { unset($_rh['CONTENT_ENCODING']); $rsps = $r; } } $ret->code = $rcode; $ret->msg = $rmsg; $ret->headers = isset($_rh) ? $_rh : NULL; $ret->body = $rsps; $ret->method = $meth; // $ret->host = $host; $ret->port = $port; $ret->path = $path; $ret->request = $rqst; return $ret; // Old return: // contents headers status-code status-message // return array( $rsps, @$_rh, $rcode, $rmsg, $host, $port, $path, $rqst ); } } fclose($fs); return false; // no response }
public static function file_exists($fn) { $ffn = PHPUNIT_DIR . $fn; if (!file_exists($ffn)) { $zfn = $ffn . '.gz'; if (!file_exists($zfn)) { return false; } $gz = file_get_contents($zfn); $data = hQuery::gzdecode($gz); if (!file_put_contents($ffn, $data)) { return false; } } return $ffn; }