public function ProcessState(&$state) { while ($state["state"] !== "done") { switch ($state["state"]) { case "initialize": if (!isset($this->data["allowedprotocols"][$state["urlinfo"]["scheme"]]) || !$this->data["allowedprotocols"][$state["urlinfo"]["scheme"]]) { return array("success" => false, "error" => \CubicleSoft\HTTP::HTTPTranslate("Protocol '%s' is not allowed in '%s'.", $state["urlinfo"]["scheme"], $state["url"]), "errorcode" => "allowed_protocols"); } $filename = \CubicleSoft\HTTP::ExtractFilename($state["urlinfo"]["path"]); $pos = strrpos($filename, "."); $fileext = $pos !== false ? strtolower(substr($filename, $pos + 1)) : ""; // Set up some standard headers. $headers = array(); $profile = strtolower($state["profile"]); $tempprofile = explode("-", $profile); if (count($tempprofile) == 2) { $profile = $tempprofile[0]; $fileext = $tempprofile[1]; } if (substr($profile, 0, 2) == "ie" || $profile == "auto" && substr($this->data["useragent"], 0, 2) == "ie") { if ($fileext == "css") { $headers["Accept"] = "text/css"; } else { if ($fileext == "png" || $fileext == "jpg" || $fileext == "jpeg" || $fileext == "gif" || $fileext == "svg") { $headers["Accept"] = "image/png, image/svg+xml, image/*;q=0.8, */*;q=0.5"; } else { if ($fileext == "js") { $headers["Accept"] = "application/javascript, */*;q=0.8"; } else { if ($this->data["referer"] != "" || $fileext == "" || $fileext == "html" || $fileext == "xhtml" || $fileext == "xml") { $headers["Accept"] = "text/html, application/xhtml+xml, */*"; } else { $headers["Accept"] = "*/*"; } } } } $headers["Accept-Language"] = "en-US"; $headers["User-Agent"] = \CubicleSoft\HTTP::GetUserAgent(substr($profile, 0, 2) == "ie" ? $profile : $this->data["useragent"]); } else { if ($profile == "firefox" || $profile == "auto" && $this->data["useragent"] == "firefox") { if ($fileext == "css") { $headers["Accept"] = "text/css,*/*;q=0.1"; } else { if ($fileext == "png" || $fileext == "jpg" || $fileext == "jpeg" || $fileext == "gif" || $fileext == "svg") { $headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5"; } else { if ($fileext == "js") { $headers["Accept"] = "*/*"; } else { $headers["Accept"] = "text/html, application/xhtml+xml, */*"; } } } $headers["Accept-Language"] = "en-us,en;q=0.5"; $headers["Cache-Control"] = "max-age=0"; $headers["User-Agent"] = \CubicleSoft\HTTP::GetUserAgent("firefox"); } else { if ($profile == "opera" || $profile == "auto" && $this->data["useragent"] == "opera") { // Opera has the right idea: Just send the same thing regardless of the request type. $headers["Accept"] = "text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1"; $headers["Accept-Language"] = "en-US,en;q=0.9"; $headers["Cache-Control"] = "no-cache"; $headers["User-Agent"] = \CubicleSoft\HTTP::GetUserAgent("opera"); } else { if ($profile == "safari" || $profile == "chrome" || $profile == "auto" && ($this->data["useragent"] == "safari" || $this->data["useragent"] == "chrome")) { if ($fileext == "css") { $headers["Accept"] = "text/css,*/*;q=0.1"; } else { if ($fileext == "png" || $fileext == "jpg" || $fileext == "jpeg" || $fileext == "gif" || $fileext == "svg" || $fileext == "js") { $headers["Accept"] = "*/*"; } else { $headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; } } $headers["Accept-Charset"] = "ISO-8859-1,utf-8;q=0.7,*;q=0.3"; $headers["Accept-Language"] = "en-US,en;q=0.8"; $headers["User-Agent"] = \CubicleSoft\HTTP::GetUserAgent($profile == "safari" || $profile == "chrome" ? $profile : $this->data["useragent"]); } } } } if ($this->data["referer"] != "") { $headers["Referer"] = $this->data["referer"]; } // Generate the final headers array. $headers = array_merge($headers, $state["httpopts"]["headers"], $state["tempoptions"]["headers"]); // Calculate the host and reverse host and remove port information. $host = isset($headers["Host"]) ? $headers["Host"] : $state["urlinfo"]["host"]; $pos = strpos($host, "]"); if (substr($host, 0, 1) == "[" && $pos !== false) { $host = substr($host, 0, $pos + 1); } else { $pos = strpos($host, ":"); if ($pos !== false) { $host = substr($host, 0, $pos); } } $dothost = $host; $dothost = strtolower($dothost); if (substr($dothost, 0, 1) != ".") { $dothost = "." . $dothost; } $state["dothost"] = $dothost; // Append cookies and delete old, invalid cookies. $secure = $state["urlinfo"]["scheme"] == "https"; $cookiepath = $state["urlinfo"]["path"]; if ($cookiepath == "") { $cookiepath = "/"; } $pos = strrpos($cookiepath, "/"); if ($pos !== false) { $cookiepath = substr($cookiepath, 0, $pos + 1); } $state["cookiepath"] = $cookiepath; $cookies = array(); foreach ($this->data["cookies"] as $domain => $paths) { if (strlen($dothost) >= strlen($domain) && substr($dothost, -strlen($domain)) === $domain) { foreach ($paths as $path => $cookies2) { if (substr($cookiepath, 0, strlen($path)) == $path) { foreach ($cookies2 as $num => $info) { if (isset($info["expires_ts"]) && $this->GetExpiresTimestamp($info["expires_ts"]) < time()) { unset($this->data["cookies"][$domain][$path][$num]); } else { if ($secure || !isset($info["secure"])) { $cookies[$info["name"]] = $info["value"]; } } } if (!count($this->data["cookies"][$domain][$path])) { unset($this->data["cookies"][$domain][$path]); } } } if (!count($this->data["cookies"][$domain])) { unset($this->data["cookies"][$domain]); } } } $cookies2 = array(); foreach ($cookies as $name => $value) { $cookies2[] = rawurlencode($name) . "=" . rawurlencode($value); } $headers["Cookie"] = implode("; ", $cookies2); if ($headers["Cookie"] == "") { unset($headers["Cookie"]); } // Generate the final options array. $state["options"] = array_merge($state["httpopts"], $state["tempoptions"]); $state["options"]["headers"] = $headers; if ($state["timeout"] !== false) { $state["options"]["timeout"] = \CubicleSoft\HTTP::GetTimeLeft($state["startts"], $state["timeout"]); } // Let a callback handle any additional state changes. if (isset($state["options"]["pre_retrievewebpage_callback"]) && is_callable($state["options"]["pre_retrievewebpage_callback"]) && !call_user_func_array($state["options"]["pre_retrievewebpage_callback"], array(&$state))) { return array("success" => false, "error" => \CubicleSoft\HTTP::HTTPTranslate("Pre-RetrieveWebpage callback returned with a failure condition for '%s'.", $state["url"]), "errorcode" => "pre_retrievewebpage_callback"); } // Process the request. $result = \CubicleSoft\HTTP::RetrieveWebpage($state["url"], $state["options"]); $result["url"] = $state["url"]; unset($state["options"]["files"]); unset($state["options"]["body"]); $result["options"] = $state["options"]; $result["firstreqts"] = $state["startts"]; $result["numredirects"] = $state["numredirects"]; $result["redirectts"] = $state["redirectts"]; if (isset($result["rawsendsize"])) { $state["totalrawsendsize"] += $result["rawsendsize"]; } $result["totalrawsendsize"] = $state["totalrawsendsize"]; if (!$result["success"]) { return array("success" => false, "error" => \CubicleSoft\HTTP::HTTPTranslate("Unable to retrieve content. %s", $result["error"]), "info" => $result, "state" => $state, "errorcode" => "retrievewebpage"); } if (isset($state["options"]["async"]) && $state["options"]["async"]) { $state["async"] = true; $state["httpstate"] = $result["state"]; $state["state"] = "process_async"; } else { $state["result"] = $result; $state["state"] = "post_retrieval"; } break; case "process_async": // Run a cycle of the HTTP state processor. $result = \CubicleSoft\HTTP::ProcessState($state["httpstate"]); if (!$result["success"]) { return $result; } $result["url"] = $state["url"]; $result["options"] = $state["options"]; unset($result["options"]["files"]); unset($result["options"]["body"]); $result["firstreqts"] = $state["startts"]; $result["numredirects"] = $state["numredirects"]; $result["redirectts"] = $state["redirectts"]; if (isset($result["rawsendsize"])) { $state["totalrawsendsize"] += $result["rawsendsize"]; } $result["totalrawsendsize"] = $state["totalrawsendsize"]; $state["httpstate"] = false; $state["result"] = $result; $state["state"] = "post_retrieval"; break; case "post_retrieval": // Set up structures for another round. if ($this->data["autoreferer"]) { $this->data["referer"] = $state["url"]; } if (isset($state["result"]["headers"]["Location"]) && $this->data["followlocation"]) { $state["redirectts"] = microtime(true); unset($state["tempoptions"]["method"]); unset($state["tempoptions"]["write_body_callback"]); unset($state["tempoptions"]["body"]); unset($state["tempoptions"]["postvars"]); unset($state["tempoptions"]["files"]); $state["tempoptions"]["headers"]["Referer"] = $state["url"]; $state["url"] = $state["result"]["headers"]["Location"][0]; // Generate an absolute URL. if ($this->data["referer"] != "") { $state["url"] = \CubicleSoft\HTTP::ConvertRelativeToAbsoluteURL($this->data["referer"], $state["url"]); } $urlinfo2 = \CubicleSoft\HTTP::ExtractURL($state["url"]); if (!isset($this->data["allowedredirprotocols"][$urlinfo2["scheme"]]) || !$this->data["allowedredirprotocols"][$urlinfo2["scheme"]]) { return array("success" => false, "error" => \CubicleSoft\HTTP::HTTPTranslate("Protocol '%s' is not allowed. Server attempted to redirect to '%s'.", $urlinfo2["scheme"], $state["url"]), "info" => $state["result"], "errorcode" => "allowed_redir_protocols"); } if ($urlinfo2["host"] != $state["urlinfo"]["host"]) { unset($state["tempoptions"]["headers"]["Host"]); unset($state["httpopts"]["headers"]["Host"]); } $state["urlinfo"] = $urlinfo2; $state["numredirects"]++; } // Handle any 'Set-Cookie' headers. if (isset($state["result"]["headers"]["Set-Cookie"])) { foreach ($state["result"]["headers"]["Set-Cookie"] as $cookie) { $items = explode("; ", $cookie); $item = trim(array_shift($items)); if ($item != "") { $cookie2 = array(); $pos = strpos($item, "="); if ($pos === false) { $cookie2["name"] = urldecode($item); $cookie2["value"] = ""; } else { $cookie2["name"] = urldecode(substr($item, 0, $pos)); $cookie2["value"] = urldecode(substr($item, $pos + 1)); } $cookie = array(); foreach ($items as $item) { $item = trim($item); if ($item != "") { $pos = strpos($item, "="); if ($pos === false) { $cookie[strtolower(trim(urldecode($item)))] = ""; } else { $cookie[strtolower(trim(urldecode(substr($item, 0, $pos))))] = urldecode(substr($item, $pos + 1)); } } } $cookie = array_merge($cookie, $cookie2); if (isset($cookie["expires"])) { $ts = \CubicleSoft\HTTP::GetDateTimestamp($cookie["expires"]); $cookie["expires_ts"] = gmdate("Y-m-d H:i:s", $ts === false ? time() - 24 * 60 * 60 : $ts); } else { if (isset($cookie["max-age"])) { $cookie["expires_ts"] = gmdate("Y-m-d H:i:s", time() + (int) $cookie["max-age"]); } else { unset($cookie["expires_ts"]); } } if (!isset($cookie["domain"])) { $cookie["domain"] = $state["dothost"]; } if (!isset($cookie["path"])) { $cookie["path"] = $state["cookiepath"]; } $this->SetCookie($cookie); } } } if ($state["numfollow"] > 0) { $state["numfollow"]--; } // If this is a redirect, handle it by starting over. if (isset($state["result"]["headers"]["Location"]) && $this->data["followlocation"] && $state["numfollow"]) { $state["result"] = false; $state["state"] = "initialize"; } else { $state["result"]["numredirects"] = $state["numredirects"]; $state["result"]["redirectts"] = $state["redirectts"]; // Extract the forms from the page in a parsed format. // Call \CubicleSoft\WebBrowser::GenerateFormRequest() to prepare an actual request for Process(). if ($this->data["extractforms"]) { $state["result"]["forms"] = $this->ExtractForms($state["result"]["url"], $state["result"]["body"], isset($state["tempoptions"]["extractforms_hint"]) ? $state["tempoptions"]["extractforms_hint"] : false); } $state["state"] = "done"; } break; } } return $state["result"]; }
public function Wait($timeout = false, $readfps = array(), $writefps = array(), $exceptfps = NULL) { $this->UpdateStreamsAndTimeout("", $timeout, $readfps, $writefps); $result = array("success" => true, "clients" => array(), "removed" => array(), "readfps" => array(), "writefps" => array(), "exceptfps" => array()); if (!count($readfps) && !count($writefps)) { return $result; } $result2 = self::FixedStreamSelect($readfps, $writefps, $exceptfps, $timeout); if ($result2 === false) { return array("success" => false, "error" => \CubicleSoft\HTTP::HTTPTranslate("Wait() failed due to stream_select() failure. Most likely cause: Connection failure."), "errorcode" => "stream_select_failed"); } // Handle new connections. if (isset($readfps["http_s"])) { while (($fp = @stream_socket_accept($this->fp, 0)) !== false) { // Enable non-blocking mode. stream_set_blocking($fp, 0); $client = $this->InitNewClient(); $client->fp = $fp; $client->ipaddr = stream_socket_get_name($fp, true); } unset($readfps["http_s"]); } // Handle clients in the read queue. foreach ($readfps as $cid => $fp) { if (!is_string($cid) || strlen($cid) < 6 || substr($cid, 0, 7) !== "http_c_") { continue; } $id = (int) substr($cid, 7); if (!isset($this->clients[$id])) { continue; } $client = $this->clients[$id]; $client->lastts = microtime(true); if ($client->httpstate !== false) { $result2 = \CubicleSoft\HTTP::ProcessState($client->httpstate); if ($result2["success"]) { // Trigger the last variable to process when extracting form variables. if ($client->contenttype !== false && $client->contenttype[""] === "application/x-www-form-urlencoded") { $this->ProcessClientRequestBody($result2["request"], "&", $id); } if ($client->currfile !== false) { $client->files[$client->currfile]->Close(); $client->currfile = false; } $result["clients"][$id] = $client; $client->requestcomplete = true; $client->requests++; $client->mode = "init_response"; $client->responseheaders = array(); $client->responsefinalized = false; $client->responsebodysize = false; $client->httpstate["type"] = "request"; $client->httpstate["startts"] = microtime(true); $client->httpstate["waituntil"] = -1.0; $client->httpstate["data"] = ""; $client->httpstate["bodysize"] = false; $client->httpstate["chunked"] = false; $client->httpstate["secure"] = $this->ssl; $client->httpstate["state"] = "send_data"; $client->SetResponseCode(200); $client->SetResponseContentType("text/html; charset=UTF-8"); if (isset($client->headers["Connection"])) { $connection = \CubicleSoft\HTTP::ExtractHeader($client->headers["Connection"]); if (strtolower($connection[""]) === "close") { $client->keepalive = false; } } $ver = explode("/", $client->request["httpver"]); $ver = (double) array_pop($ver); if ($ver < 1.1) { $client->keepalive = false; } if ($client->requests >= $this->maxrequests) { $client->keepalive = false; } if ($this->usegzip && isset($client->headers["Accept-Encoding"])) { $encodings = \CubicleSoft\HTTP::ExtractHeader($client->headers["Accept-Encoding"]); $encodings = explode(",", $encodings[""]); $gzip = false; foreach ($encodings as $encoding) { if (strtolower(trim($encoding)) === "gzip") { $gzip = true; } } if ($gzip) { $client->deflate = new \CubicleSoft\DeflateStream(); $client->deflate->Init("wb", -1, array("type" => "gzip")); $client->AddResponseHeader("Content-Encoding", "gzip", true); } } } else { if ($result2["errorcode"] !== "no_data") { if ($client->requests) { $result["removed"][$id] = array("result" => $result2, "client" => $client); } $this->RemoveClient($id); } else { if ($client->requestcomplete === false && $client->httpstate["state"] !== "request_line" && $client->httpstate["state"] !== "headers") { // Allows the caller an opportunity to adjust some client options based on inputs on a per-client basis (e.g. recvlimit). $result["clients"][$id] = $client; } } } } unset($readfps[$cid]); } // Handle clients in the write queue. foreach ($writefps as $cid => $fp) { if (!is_string($cid) || strlen($cid) < 6 || substr($cid, 0, 7) !== "http_c_") { continue; } $id = (int) substr($cid, 7); if (!isset($this->clients[$id])) { continue; } $client = $this->clients[$id]; $client->lastts = microtime(true); if ($client->httpstate !== false) { // Transform the client response into real data. if ($client->mode === "response_ready") { if ($client->responsefinalized) { $client->AddResponseHeader("Content-Length", (string) strlen($client->writedata), true); $client->httpstate["bodysize"] = strlen($client->writedata); } else { if ($client->responsebodysize !== false) { $client->AddResponseHeader("Content-Length", (string) $client->responsebodysize, true); $client->httpstate["bodysize"] = $client->responsebodysize; } else { if ($client->keepalive) { $client->AddResponseHeader("Transfer-Encoding", "chunked", true); $client->httpstate["chunked"] = true; } } } $client->AddResponseHeader("Date", gmdate("D, d M Y H:i:s T"), true); if (!$client->keepalive || $client->requests >= $this->maxrequests) { $client->AddResponseHeader("Connection", "close", true); } foreach ($client->responseheaders as $name => $vals) { foreach ($vals as $val) { $client->httpstate["data"] .= $name . ": " . $val . "\r\n"; } } $client->responseheaders = false; $client->httpstate["data"] .= "\r\n"; $client->mode = "handle_response"; } $result2 = \CubicleSoft\HTTP::ProcessState($client->httpstate); if ($result2["success"]) { if (!$client->responsefinalized) { $result["clients"][$id] = $client; } else { if ($client->keepalive && $client->requests < $this->maxrequests) { // Reset client. $client->mode = "init_request"; $client->httpstate = false; $client->readdata = ""; $client->request = false; $client->url = ""; $client->headers = false; $client->contenttype = false; $client->contenthandled = true; $client->cookievars = false; $client->requestvars = false; $client->requestcomplete = false; $client->deflate = false; $client->writedata = ""; foreach ($client->files as $filename => $tempfile) { unset($client->files[$filename]); } $client->files = array(); $this->initclients[$id] = $client; unset($this->clients[$id]); } else { $result["removed"][$id] = array("result" => array("success" => true), "client" => $client); $this->RemoveClient($id); } } } else { if ($result2["errorcode"] !== "no_data") { $result["removed"][$id] = array("result" => $result2, "client" => $client); $this->RemoveClient($id); } } } unset($writefps[$cid]); } // Initialize new clients. foreach ($this->initclients as $id => $client) { do { $origmode = $client->mode; switch ($client->mode) { case "init": $result2 = $this->ssl ? @stream_socket_enable_crypto($client->fp, true, STREAM_CRYPTO_METHOD_TLS_SERVER) : true; if ($result2 === true) { $client->mode = "init_request"; } else { if ($result2 === false) { @fclose($client->fp); unset($this->initclients[$id]); } } break; case "init_request": // Use the HTTP class in server mode to handle state. // The callback functions are located in WebServer to avoid the issue of pass-by-reference memory leaks. $options = $this->defaultclientoptions; $options["async"] = true; $options["read_headers_callback"] = array($this, "ProcessClientRequestHeaders"); $options["read_headers_callback_opts"] = $id; $options["read_body_callback"] = array($this, "ProcessClientRequestBody"); $options["read_body_callback_opts"] = $id; $options["write_body_callback"] = array($this, "ProcessClientResponseBody"); $options["write_body_callback_opts"] = $id; if (!isset($options["readlinelimit"])) { $options["readlinelimit"] = 116000; } if (!isset($options["maxheaders"])) { $options["maxheaders"] = 1000; } if (!isset($options["recvlimit"])) { $options["recvlimit"] = 1000000; } $startts = microtime(true); $timeout = isset($options["timeout"]) ? $options["timeout"] : false; $result2 = array("success" => true, "rawsendsize" => 0, "rawsendheadersize" => 0, "rawrecvsize" => 0, "rawrecvheadersize" => 0, "startts" => $startts); $debug = isset($options["debug"]) && $options["debug"]; if ($debug) { $result2["rawsend"] = ""; $result2["rawrecv"] = ""; } $client->httpstate = \CubicleSoft\HTTP::InitResponseState($client->fp, $debug, $options, $startts, $timeout, $result2, false, false); $client->mode = "handle_request"; $client->lastts = microtime(true); $this->clients[$id] = $client; unset($this->initclients[$id]); break; } } while (isset($this->initclients[$id]) && $origmode !== $client->mode); } // Handle client timeouts. $ts = microtime(true); foreach ($this->clients as $id => $client) { if ($client->lastts + $this->defaultclienttimeout < $ts) { if ($client->requests) { $result["removed"][$id] = array("result" => $result2, "client" => $client); } $this->RemoveClient($id); } } // Return any extra handles that were being waited on. $result["readfps"] = $readfps; $result["writefps"] = $writefps; $result["exceptfps"] = $exceptfps; return $result; }