function getWebPage($url) { $web = new WebBrowser(); $result = $web->Process($url); if (!$result["success"]) { echo "Error retrieving URL. " . $result["error"] . "\n"; return false; } else { if ($result["response"]["code"] != 200) { echo "Error retrieving URL. Server returned: " . $result["response"]["code"] . " " . $result["response"]["meaning"] . "\n"; return false; } else { return $result["body"]; } } }
/** * GetWebPage() * * Retrieve web page html for given url in parameter $url. Uses third-party class under ultimate-web-scraper * * @param string $url The URL to fetch. * * @return false if failed to get web page for URL or the HTML content on success * * */ function GetWebPage($url) { $web = new WebBrowser(); $result = $web->Process($url); // // If fetch failed, or HTTP response code is anything other than 200, report an error // and return false // if (!$result["success"]) { $this->log->error("Error retrieving URL. " . $result["error"]); return false; } else { if ($result["response"]["code"] != 200) { $this->log->error("Error retrieving URL. Server returned: " . $result["response"]["code"] . " " . $result["response"]["meaning"]); return false; } else { return $result["body"]; } } }
private function ProcessVerification(&$result, $info) { global $sso_ipaddr, $sso_session_info; if ($info["publickey"] != "" && $info["privatekey"] != "" && (!$info["remember"] || !isset($sso_session_info["sso_recaptcha_passed"]) || !$sso_session_info["sso_recaptcha_passed"])) { if (!isset($_REQUEST["g-recaptcha-response"])) { $result["errors"][] = BB_Translate("Human Verification information is missing."); } else { require_once SSO_ROOT_PATH . "/" . SSO_SUPPORT_PATH . "/http.php"; require_once SSO_ROOT_PATH . "/" . SSO_SUPPORT_PATH . "/web_browser.php"; $url = "https://www.google.com/recaptcha/api/siteverify?secret=" . urlencode($info["privatekey"]) . "&response=" . urlencode($_REQUEST["g-recaptcha-response"]) . "&remoteip=" . urlencode($sso_ipaddr["ipv6"]); $web = new WebBrowser(); $result2 = $web->Process($url); if (!$result2["success"]) { $result["errors"][] = BB_Translate("Human Verification failed. Error retrieving response from remote service.", $result2["error"]); } else { if ($result2["response"]["code"] != 200) { $result["errors"][] = BB_Translate("Human Verification failed. The remote service responded with: %s", $result2["response"]["code"] . " " . $result2["response"]["meaning"]); } else { $data = @json_decode($result2["body"], true); if ($data === false) { $result["errors"][] = BB_Translate("Human Verification failed. Unable to decode the response from the remote service."); } else { if (!isset($data["success"])) { $result["errors"][] = BB_Translate("Incorrect Human Verification entered. Try again. (Code: %s)", BB_Translate($data["error"])); } else { if ($info["remember"]) { $sso_session_info["sso_recaptcha_passed"] = true; if (!SSO_SaveSessionInfo()) { $result["errors"][] = BB_Translate("Unable to save session information."); return; } } } } } } } } }
public function ProcessFrontend() { global $sso_rng, $sso_provider, $sso_settings, $sso_session_info; $redirect_uri = BB_GetRequestHost() . SSO_ROOT_URL . "/index.php?sso_provider=" . urlencode($sso_provider) . "&sso_google_action=signin"; if (isset($_REQUEST["sso_google_action"]) && $_REQUEST["sso_google_action"] == "signin") { // Recover the language settings. if (!isset($sso_session_info["sso_google_info"])) { $this->DisplayError(BB_Translate("Unable to authenticate the request.")); return; } $url = BB_GetRequestHost() . SSO_ROOT_URL . "/index.php?sso_provider=" . urlencode($sso_provider) . "&sso_google_action=signin2"; if (isset($_REQUEST["state"])) { $url .= "&state=" . urlencode($_REQUEST["state"]); } if (isset($_REQUEST["code"])) { $url .= "&code=" . urlencode($_REQUEST["code"]); } if (isset($_REQUEST["error"])) { $url .= "&error=" . urlencode($_REQUEST["error"]); } $url .= "&lang=" . urlencode($sso_session_info["sso_google_info"]["lang"]); header("Location: " . $url); } else { if (isset($_REQUEST["sso_google_action"]) && $_REQUEST["sso_google_action"] == "signin2") { // Validate the token. if (!isset($_REQUEST["state"]) || !isset($sso_session_info["sso_google_info"]) || $_REQUEST["state"] !== $sso_session_info["sso_google_info"]["token"]) { $this->DisplayError(BB_Translate("Unable to authenticate the request.")); return; } // Check for token expiration. if (CSDB::ConvertFromDBTime($sso_session_info["sso_google_info"]["expires"]) < time()) { $this->DisplayError(BB_Translate("Verification token has expired.")); return; } if (isset($_REQUEST["error"])) { if ($_REQUEST["error"] == "access_denied") { $message = BB_Translate("The request to sign in with Google was denied."); } else { $message = BB_Translate("The error message returned was '%s'.", $_REQUEST["error"]); } $this->DisplayError(BB_Translate("Sign in failed. %s", $message)); return; } if (!isset($_REQUEST["code"])) { $this->DisplayError(BB_Translate("Sign in failed. Authorization code missing.")); return; } // Get an access token from the authorization code. require_once SSO_ROOT_PATH . "/" . SSO_SUPPORT_PATH . "/http.php"; require_once SSO_ROOT_PATH . "/" . SSO_SUPPORT_PATH . "/web_browser.php"; $url = "https://accounts.google.com/o/oauth2/token"; $options = array("postvars" => array("code" => $_REQUEST["code"], "client_id" => $sso_settings["sso_google"]["client_id"], "client_secret" => $sso_settings["sso_google"]["client_secret"], "redirect_uri" => $redirect_uri, "grant_type" => "authorization_code")); $web = new WebBrowser(); $result = $web->Process($url, "auto", $options); if (!$result["success"]) { $this->DisplayError(BB_Translate("Sign in failed. Error retrieving URL for Google access token. %s", $result["error"])); } else { if ($result["response"]["code"] != 200) { $this->DisplayError(BB_Translate("Sign in failed. The Google access token server returned: %s", $result["response"]["code"] . " " . $result["response"]["meaning"])); } else { // Get the access token. $data = @json_decode($result["body"], true); if ($data === false || !isset($data["access_token"])) { $this->DisplayError(BB_Translate("Sign in failed. Error retrieving access token from Google.")); } else { // Get the user's profile information. $url = "https://www.googleapis.com/oauth2/v1/userinfo?access_token=" . urlencode($data["access_token"]); $result = $web->Process($url); if (!$result["success"]) { $this->DisplayError(BB_Translate("Sign in failed. Error retrieving URL for Google profile information. %s", $result["error"])); } else { if ($result["response"]["code"] != 200) { $this->DisplayError(BB_Translate("Sign in failed. The Google profile information server returned: %s", $result["response"]["code"] . " " . $result["response"]["meaning"])); } else { $profile = @json_decode($result["body"], true); if ($profile === false) { $this->DisplayError(BB_Translate("Sign in failed. Error retrieving profile information from Google.")); } $origprofile = $profile; // Remove unverified e-mail addresses. if (!isset($profile["verified_email"]) || !$profile["verified_email"]) { unset($profile["verified_email"]); unset($profile["email"]); } // Convert most profile fields into strings. foreach ($profile as $key => $val) { if (is_string($val)) { continue; } if (is_bool($val)) { $val = (string) (int) $val; } else { if (is_numeric($val)) { $val = (string) $val; } else { if (is_object($val) && isset($val->id) && isset($val->name)) { $val = $val->name; } } } $profile[$key] = $val; } $mapinfo = array(); foreach (self::$fieldmap as $key => $info) { $key2 = $sso_settings["sso_google"]["map_" . $key]; if ($key2 != "" && isset($profile[$key])) { $mapinfo[$key2] = $profile[$key]; } } SSO_ActivateUser($profile["id"], serialize($origprofile), $mapinfo); // Only falls through on account lockout or a fatal error. $this->DisplayError(BB_Translate("User activation failed.")); } } } } } } else { // Create internal data packet. $token = $sso_rng->GenerateString(); $sso_session_info["sso_google_info"] = array("lang" => isset($_REQUEST["lang"]) ? $_REQUEST["lang"] : "", "token" => $token, "expires" => CSDB::ConvertToDBTime(time() + 30 * 60)); if (!SSO_SaveSessionInfo()) { $this->DisplayError(BB_Translate("Unable to save session information.")); return; } // Calculate the required scope. $scope = array("https://www.googleapis.com/auth/userinfo.profile" => true); foreach (self::$fieldmap as $key => $info) { if ($info["extra"] != "" && $sso_settings["sso_google"]["map_" . $key] != "") { $scope[$info["extra"]] = true; } } // Get the login redirection URL. $options = array("response_type" => "code", "client_id" => $sso_settings["sso_google"]["client_id"], "redirect_uri" => $redirect_uri, "scope" => implode(" ", array_keys($scope)), "state" => $token); $options2 = array(); foreach ($options as $key => $val) { $options2[] = urlencode($key) . "=" . urlencode($val); } $url = "https://accounts.google.com/o/oauth2/auth?" . implode("&", $options2); SSO_ExternalRedirect($url); } } }
echo $result . "\n\n"; echo "-------------------\n\n"; echo "Testing Word HTML cleanup\n"; echo "-------------------------\n"; $testfile = file_get_contents("test_word.txt"); $pos = strpos($testfile, "@EXIT@"); if ($pos === false) { $pos = strlen($testfile); } $testfile = substr($testfile, 0, $pos); $result = TagFilter::Run($testfile, $options); echo $result . "\n\n"; echo "-------------------------\n\n"; $html = new simple_html_dom(); $web = new WebBrowser(); $result = $web->Process("http://www.barebonescms.com/"); if (!$result["success"]) { echo "[FAIL] An error occurred. " . $result["error"] . "\n"; } else { if ($result["response"]["code"] != 200) { echo "[FAIL] An unexpected response code was returned. " . $result["response"]["line"] . "\n"; } else { echo "[PASS] The expected response was returned.\n"; $html->load($result["body"]); $rows = $html->find('a[href]'); foreach ($rows as $row) { echo "\t" . HTTP::ConvertRelativeToAbsoluteURL($result["url"], $row->href) . "\n"; } } } $result = $web->Process("https://www.barebonescms.com/");
<?php ini_set('display_errors', 1); error_reporting(E_ALL ^ E_NOTICE); require "https://scraperwiki.com/editor/raw/webbrowser"; //require_once "support/simple_html_dom.php"; // Simple HTML DOM tends to leak RAM like // a sieve. Declare what you will need here. // Objects are reusable. // $html = new simple_html_dom(); $url = "https://scraperwiki.com/login/"; $web = new WebBrowser(); $result = $web->Process($url); if (!$result["success"]) { echo "Error retrieving URL. " . $result["error"] . "\n"; } else { if ($result["response"]["code"] != 200) { echo "Error retrieving URL. Server returned: " . $result["response"]["code"] . " " . $result["response"]["meaning"] . "\n"; } else { $postvars = array("user_or_email" => "russell", "password" => "blabla1rc", "csrfmiddlewaretoken" => $input_csrfmiddlewaretoken, "submit" => "Log in"); $tempoptions = array("method" => "POST", "postvars" => $postvars); $url = $url; $result2 = $web->Process($url, 'auto', $tempoptions); echo "RESULT 2 IS DONE<br><br>"; echo "<pre>"; print_r($result2); echo "</pre>"; }
public function Connect($url, $origin, $profile = "auto", $options = array(), $web = false) { $this->Disconnect(); if (class_exists("CSPRNG") && $this->csprng === false) { $this->csprng = new CSPRNG(); } if (isset($options["fp"]) && is_resource($options["fp"])) { $this->fp = $options["fp"]; } else { // Use WebBrowser to initiate the connection. if ($web === false) { $web = new WebBrowser(); } // Transform URL. $url2 = HTTP::ExtractURL($url); if ($url2["scheme"] != "ws" && $url2["scheme"] != "wss") { return array("success" => false, "error" => HTTP::HTTPTranslate("WebSocket::Connect() only supports the 'ws' and 'wss' protocols."), "errorcode" => "protocol_check"); } $url2["scheme"] = str_replace("ws", "http", $url2["scheme"]); $url2 = HTTP::CondenseURL($url2); // Generate correct request headers. if (!isset($options["headers"])) { $options["headers"] = array(); } $options["headers"]["Connection"] = "keep-alive, Upgrade"; if ($origin != "") { $options["headers"]["Origin"] = $origin; } $options["headers"]["Pragma"] = "no-cache"; $key = base64_encode($this->PRNGBytes(16)); $options["headers"]["Sec-WebSocket-Key"] = $key; $options["headers"]["Sec-WebSocket-Version"] = "13"; $options["headers"]["Upgrade"] = "websocket"; // No async support for connecting at this time. Async mode is enabled AFTER connecting though. unset($options["async"]); // Connect to the WebSocket. $result = $web->Process($url2, $profile, $options); if (!$result["success"]) { return $result; } if ($result["response"]["code"] != 101) { return array("success" => false, "error" => HTTP::HTTPTranslate("WebSocket::Connect() failed to connect to the WebSocket. Server returned: %s %s", $result["response"]["code"], $result["response"]["meaning"]), "errorcode" => "incorrect_server_response"); } if (!isset($result["headers"]["Sec-Websocket-Accept"])) { return array("success" => false, "error" => HTTP::HTTPTranslate("Server failed to include a 'Sec-WebSocket-Accept' header in its response to the request."), "errorcode" => "missing_server_websocket_accept_header"); } // Verify the Sec-WebSocket-Accept response. if ($result["headers"]["Sec-Websocket-Accept"][0] !== base64_encode(sha1($key . self::KEY_GUID, true))) { return array("success" => false, "error" => HTTP::HTTPTranslate("The server's 'Sec-WebSocket-Accept' header is invalid."), "errorcode" => "invalid_server_websocket_accept_header"); } $this->fp = $result["fp"]; } // Enable non-blocking mode. stream_set_blocking($this->fp, 0); $this->state = self::STATE_OPEN; $this->readdata = ""; $this->readmessages = array(); $this->writedata = ""; $this->writemessages = array(); $this->lastkeepalive = time(); $this->keepalivesent = false; return array("success" => true); }
<?php echo "<html>\n<head><title>Listowanie odnosnikow z podanej strony www</title>\n</head>\n<body>\n<h1>Sebastian Rutkowski 125NCI_B</h1>\n"; require_once "ultimate-web-scraper/support/http.php"; require_once "ultimate-web-scraper/support/web_browser.php"; require_once "ultimate-web-scraper/support/simple_html_dom.php"; $url = $_GET["adres_strony"]; $html = file_get_html($url); $web = new WebBrowser(); $result = $web->Process($url); if (!$result["success"]) { echo "Error retrieving URL. " . $result["error"] . "\n"; } else { if ($result["response"]["code"] != 200) { echo "Error retrieving URL. Server returned: " . $result["response"]["code"] . " " . $result["response"]["meaning"] . "\n"; } else { echo "Wszystkie odnosniki na stronie:\n<br><br>"; $html->load($result["body"]); $links = $html->find('a[href]'); foreach ($links as $link) { echo "\t" . $link->href . "\n<br>"; } } } echo "</body>\n</html>";
function BB_IsValidHTMLImage($url, $options = array()) { if (!function_exists("fsockopen")) { return array("success" => false, "error" => "Unable to retrieve the image since the PHP function 'fsockopen' does not exist."); } require_once ROOT_PATH . "/" . SUPPORT_PATH . "/http.php"; require_once ROOT_PATH . "/" . SUPPORT_PATH . "/web_browser.php"; // Map relative URLs to the local server. $url = trim($url); if (stripos($url, "http://") !== 0 && stripos($url, "https://") !== 0) { if (substr($url, 0, 1) == "/" || substr($url, 0, 1) == "\\") { $url = BB_GetRequestHost(isset($options["protocol"]) ? $options["protocol"] : "") . $url; } else { $base = BB_GetFullRequestURLBase(isset($options["protocol"]) ? $options["protocol"] : ""); $pos = strrpos($base, "/"); if ($pos !== false) { $base = substr($base, 0, $pos + 1); } $url = $base . $url; } } $web = new WebBrowser(); $result = $web->Process($url); if (!$result["success"]) { return array("success" => false, "error" => "Unable to retrieve the image. " . $result["error"]); } if ($result["response"]["code"] != 200) { return array("success" => false, "error" => "Unable to retrieve the image. Server returned: " . $result["response"]["code"] . " " . $result["response"]["meaning"]); } $type = BB_GetImageType($result["body"]); if ((!isset($options["allow_gif"]) || $options["allow_gif"]) && $type == "gif") { return array("success" => true, "type" => "gif", "data" => $result["body"], "url" => $url); } else { if ((!isset($options["allow_jpg"]) || $options["allow_jpg"]) && $type == "jpg") { return array("success" => true, "type" => "jpg", "data" => $result["body"], "url" => $url); } else { if ((!isset($options["allow_png"]) || $options["allow_png"]) && $type == "png") { return array("success" => true, "type" => "png", "data" => $result["body"], "url" => $url); } } } return array("success" => false, "error" => "Invalid image."); }