function file_get_html($url, $testId, $use_include_path = false, $context = null, $offset = -1, $maxLen = -1, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT) { connectToDb($db); if ($db) { incrementHttpRequests($db, $testId); } // We DO force the tags to be terminated. $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $defaultBRText); // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. $contents = file_get_contents($url, $use_include_path, $context, $offset); // Paperg - use our own mechanism for getting the contents as we want to control the timeout. // $contents = retrieve_url_contents($url); if (empty($contents)) { return false; } // The second parameter can force the selectors to all be lowercase. $dom->load($contents, $lowercase, $stripRN); return $dom; }
function SendRequest($arguments) { connectToDb(&$db); if ($db) { incrementHttpRequests($db, $this->testId); } //fsockopen is called in receivePage below so increment HTTP requests sent if (strlen($this->error)) { return $this->error; } if (isset($arguments["ProxyUser"])) { $this->proxy_request_user = $arguments["ProxyUser"]; } elseif (isset($this->proxy_user)) { $this->proxy_request_user = $this->proxy_user; } if (isset($arguments["ProxyPassword"])) { $this->proxy_request_password = $arguments["ProxyPassword"]; } elseif (isset($this->proxy_password)) { $this->proxy_request_password = $this->proxy_password; } if (isset($arguments["ProxyRealm"])) { $this->proxy_request_realm = $arguments["ProxyRealm"]; } elseif (isset($this->proxy_realm)) { $this->proxy_request_realm = $this->proxy_realm; } if (isset($arguments["ProxyWorkstation"])) { $this->proxy_request_workstation = $arguments["ProxyWorkstation"]; } elseif (isset($this->proxy_workstation)) { $this->proxy_request_workstation = $this->proxy_workstation; } switch ($this->state) { case "Disconnected": return $this->SetError("1 connection was not yet established"); case "Connected": $connect = 0; break; case "ConnectedToProxy": if (strlen($error = $this->ConnectFromProxy($arguments, $headers))) { return $error; } $connect = 1; break; default: return $this->SetError("2 can not send request in the current connection state"); } if (isset($arguments["RequestMethod"])) { $this->request_method = $arguments["RequestMethod"]; } if (isset($arguments["User-Agent"])) { $this->user_agent = $arguments["User-Agent"]; } if (!isset($arguments["Headers"]["User-Agent"]) && strlen($this->user_agent)) { $arguments["Headers"]["User-Agent"] = $this->user_agent; } if (isset($arguments["KeepAlive"])) { $this->keep_alive = intval($arguments["KeepAlive"]); } if (!isset($arguments["Headers"]["Connection"]) && $this->keep_alive) { $arguments["Headers"]["Connection"] = 'Keep-Alive'; } if (isset($arguments["Accept"])) { $this->user_agent = $arguments["Accept"]; } if (!isset($arguments["Headers"]["Accept"]) && strlen($this->accept)) { $arguments["Headers"]["Accept"] = $this->accept; } if (strlen($this->request_method) == 0) { return $this->SetError("3 it was not specified a valid request method"); } if (isset($arguments["RequestURI"])) { $this->request_uri = $arguments["RequestURI"]; } if (strlen($this->request_uri) == 0 || substr($this->request_uri, 0, 1) != "/") { return $this->SetError("4 it was not specified a valid request URI"); } $this->request_arguments = $arguments; $this->request_headers = isset($arguments["Headers"]) ? $arguments["Headers"] : array(); $body_length = 0; $this->request_body = ""; $get_body = 1; if ($this->request_method == "POST" || $this->request_method == "PUT") { if (isset($arguments['StreamRequest'])) { $get_body = 0; $this->request_headers["Transfer-Encoding"] = "chunked"; } elseif (isset($arguments["PostFiles"]) || $this->force_multipart_form_post && isset($arguments["PostValues"])) { $boundary = "--" . md5(uniqid(time())); $this->request_headers["Content-Type"] = "multipart/form-data; boundary=" . $boundary . (isset($arguments["CharSet"]) ? "; charset=" . $arguments["CharSet"] : ""); $post_parts = array(); if (isset($arguments["PostValues"])) { $values = $arguments["PostValues"]; if (GetType($values) != "array") { return $this->SetError("5 it was not specified a valid POST method values array"); } for (Reset($values), $value = 0; $value < count($values); Next($values), $value++) { $input = Key($values); $headers = "--" . $boundary . "\r\nContent-Disposition: form-data; name=\"" . $input . "\"\r\n\r\n"; $data = $values[$input]; $post_parts[] = array("HEADERS" => $headers, "DATA" => $data); $body_length += strlen($headers) + strlen($data) + strlen("\r\n"); } } $body_length += strlen("--" . $boundary . "--\r\n"); $files = isset($arguments["PostFiles"]) ? $arguments["PostFiles"] : array(); Reset($files); $end = GetType($input = Key($files)) != "string"; for (; !$end;) { if (strlen($error = $this->GetFileDefinition($files[$input], $definition))) { return "3 " . $error; } $headers = "--" . $boundary . "\r\nContent-Disposition: form-data; name=\"" . $input . "\"; filename=\"" . $definition["NAME"] . "\"\r\nContent-Type: " . $definition["Content-Type"] . "\r\n\r\n"; $part = count($post_parts); $post_parts[$part] = array("HEADERS" => $headers); if (isset($definition["FILENAME"])) { $post_parts[$part]["FILENAME"] = $definition["FILENAME"]; $data = ""; } else { $data = $definition["DATA"]; } $post_parts[$part]["DATA"] = $data; $body_length += strlen($headers) + $definition["Content-Length"] + strlen("\r\n"); Next($files); $end = GetType($input = Key($files)) != "string"; } $get_body = 0; } elseif (isset($arguments["PostValues"])) { $values = $arguments["PostValues"]; if (GetType($values) != "array") { return $this->SetError("5 it was not specified a valid POST method values array"); } for (Reset($values), $value = 0; $value < count($values); Next($values), $value++) { $k = Key($values); if (GetType($values[$k]) == "array") { for ($v = 0; $v < count($values[$k]); $v++) { if ($value + $v > 0) { $this->request_body .= "&"; } $this->request_body .= UrlEncode($k) . "=" . UrlEncode($values[$k][$v]); } } else { if ($value > 0) { $this->request_body .= "&"; } $this->request_body .= UrlEncode($k) . "=" . UrlEncode($values[$k]); } } $this->request_headers["Content-Type"] = "application/x-www-form-urlencoded" . (isset($arguments["CharSet"]) ? "; charset=" . $arguments["CharSet"] : ""); $get_body = 0; } } if ($get_body && (isset($arguments["Body"]) || isset($arguments["BodyStream"]))) { if (isset($arguments["Body"])) { $this->request_body = $arguments["Body"]; } else { $stream = $arguments["BodyStream"]; $this->request_body = ""; for ($part = 0; $part < count($stream); $part++) { if (isset($stream[$part]["Data"])) { $this->request_body .= $stream[$part]["Data"]; } elseif (isset($stream[$part]["File"])) { if (!($file = @fopen($stream[$part]["File"], "rb"))) { return $this->SetPHPError("could not open upload file " . $stream[$part]["File"], $php_errormsg); } while (!feof($file)) { if (GetType($block = @fread($file, $this->file_buffer_length)) != "string") { $error = $this->SetPHPError("could not read body stream file " . $stream[$part]["File"], $php_errormsg); fclose($file); return $error; } $this->request_body .= $block; } fclose($file); } else { return "5 it was not specified a valid file or data body stream element at position " . $part; } } } if (!isset($this->request_headers["Content-Type"])) { $this->request_headers["Content-Type"] = "application/octet-stream" . (isset($arguments["CharSet"]) ? "; charset=" . $arguments["CharSet"] : ""); } } if (isset($arguments["AuthUser"])) { $this->request_user = $arguments["AuthUser"]; } elseif (isset($this->user)) { $this->request_user = $this->user; } if (isset($arguments["AuthPassword"])) { $this->request_password = $arguments["AuthPassword"]; } elseif (isset($this->password)) { $this->request_password = $this->password; } if (isset($arguments["AuthRealm"])) { $this->request_realm = $arguments["AuthRealm"]; } elseif (isset($this->realm)) { $this->request_realm = $this->realm; } if (isset($arguments["AuthWorkstation"])) { $this->request_workstation = $arguments["AuthWorkstation"]; } elseif (isset($this->workstation)) { $this->request_workstation = $this->workstation; } if (strlen($this->proxy_host_name) == 0 || $connect) { $request_uri = $this->request_uri; } else { switch (strtolower($this->protocol)) { case "http": $default_port = 80; break; case "https": $default_port = 443; break; } $request_uri = strtolower($this->protocol) . "://" . $this->host_name . ($this->host_port == 0 || $this->host_port == $default_port ? "" : ":" . $this->host_port) . $this->request_uri; } if ($this->use_curl) { $version = GetType($v = curl_version()) == "array" ? isset($v["version"]) ? $v["version"] : "0.0.0" : (preg_match("/^libcurl\\/([0-9]+\\.[0-9]+\\.[0-9]+)/", $v, $m) ? $m[1] : "0.0.0"); $curl_version = 100000 * intval($this->Tokenize($version, ".")) + 1000 * intval($this->Tokenize(".")) + intval($this->Tokenize("")); $protocol_version = $curl_version < 713002 ? "1.0" : $this->protocol_version; } else { $protocol_version = $this->protocol_version; } $this->request = $this->request_method . " " . $request_uri . " HTTP/" . $protocol_version; if ($body_length || ($body_length = strlen($this->request_body))) { $this->request_headers["Content-Length"] = $body_length; } for ($headers = array(), $host_set = 0, Reset($this->request_headers), $header = 0; $header < count($this->request_headers); Next($this->request_headers), $header++) { $header_name = Key($this->request_headers); $header_value = $this->request_headers[$header_name]; if (GetType($header_value) == "array") { for (Reset($header_value), $value = 0; $value < count($header_value); Next($header_value), $value++) { $headers[] = $header_name . ": " . $header_value[Key($header_value)]; } } else { $headers[] = $header_name . ": " . $header_value; } if (strtolower(Key($this->request_headers)) == "host") { $this->request_host = strtolower($header_value); $host_set = 1; } } if (!$host_set) { $headers[] = "Host: " . $this->host_name; $this->request_host = strtolower($this->host_name); } if (count($this->cookies)) { $cookies = array(); $this->PickCookies($cookies, 0); if (strtolower($this->protocol) == "https") { $this->PickCookies($cookies, 1); } if (count($cookies)) { $h = count($headers); $headers[$h] = "Cookie:"; for (Reset($cookies), $cookie = 0; $cookie < count($cookies); Next($cookies), $cookie++) { $cookie_name = Key($cookies); $headers[$h] .= " " . $cookie_name . "=" . $cookies[$cookie_name]["value"] . ";"; } } } $next_state = "RequestSent"; if ($this->use_curl) { if (isset($arguments['StreamRequest'])) { return $this->SetError("Streaming request data is not supported when using Curl"); } if ($body_length && strlen($this->request_body) == 0) { for ($request_body = "", $success = 1, $part = 0; $part < count($post_parts); $part++) { $request_body .= $post_parts[$part]["HEADERS"] . $post_parts[$part]["DATA"]; if (isset($post_parts[$part]["FILENAME"])) { if (!($file = @fopen($post_parts[$part]["FILENAME"], "rb"))) { $this->SetPHPError("could not open upload file " . $post_parts[$part]["FILENAME"], $php_errormsg); $success = 0; break; } while (!feof($file)) { if (GetType($block = @fread($file, $this->file_buffer_length)) != "string") { $this->SetPHPError("could not read upload file", $php_errormsg); $success = 0; break; } $request_body .= $block; } fclose($file); if (!$success) { break; } } $request_body .= "\r\n"; } $request_body .= "--" . $boundary . "--\r\n"; } else { $request_body = $this->request_body; } curl_setopt($this->connection, CURLOPT_HEADER, 1); curl_setopt($this->connection, CURLOPT_RETURNTRANSFER, 1); if ($this->timeout) { curl_setopt($this->connection, CURLOPT_TIMEOUT, $this->timeout); } curl_setopt($this->connection, CURLOPT_SSL_VERIFYPEER, 0); curl_setopt($this->connection, CURLOPT_SSL_VERIFYHOST, 0); $request = $this->request . "\r\n" . implode("\r\n", $headers) . "\r\n\r\n" . $request_body; curl_setopt($this->connection, CURLOPT_CUSTOMREQUEST, $request); if ($this->debug) { $this->OutputDebug("C " . $request); } if (!($success = strlen($this->response = curl_exec($this->connection)) != 0)) { $error = curl_error($this->connection); $this->SetError("Could not execute the request" . (strlen($error) ? ": " . $error : "")); } } else { if ($success = $this->PutLine($this->request)) { for ($header = 0; $header < count($headers); $header++) { if (!($success = $this->PutLine($headers[$header]))) { break; } } if ($success && ($success = $this->PutLine(""))) { if (isset($arguments['StreamRequest'])) { $next_state = "SendingRequestBody"; } elseif ($body_length) { if (strlen($this->request_body)) { $success = $this->PutData($this->request_body); } else { for ($part = 0; $part < count($post_parts); $part++) { if (!($success = $this->PutData($post_parts[$part]["HEADERS"])) || !($success = $this->PutData($post_parts[$part]["DATA"]))) { break; } if (isset($post_parts[$part]["FILENAME"])) { if (!($file = @fopen($post_parts[$part]["FILENAME"], "rb"))) { $this->SetPHPError("could not open upload file " . $post_parts[$part]["FILENAME"], $php_errormsg); $success = 0; break; } while (!feof($file)) { if (GetType($block = @fread($file, $this->file_buffer_length)) != "string") { $this->SetPHPError("could not read upload file", $php_errormsg); $success = 0; break; } if (!($success = $this->PutData($block))) { break; } } fclose($file); if (!$success) { break; } } if (!($success = $this->PutLine(""))) { break; } } if ($success) { $success = $this->PutLine("--" . $boundary . "--"); } } if ($success) { $sucess = $this->FlushData(); } } } } } if (!$success) { return $this->SetError("5 could not send the HTTP request: " . $this->error); } $this->state = $next_state; return ""; }
function go() { connectToDb($db); $starting_time = $this->getmicrotime(); // Init, split given URL into host, port, path and file a.s.o. $url_parts = PHPCrawlerUtils::splitURL($this->url_to_crawl); // Set base-host and base-path "global" for this class, // we need it very often (i guess at this point...) $this->base_path = $url_parts["path"]; $this->base_host = $url_parts["host"]; $this->base_domain = $url_parts["domain"]; // If the base port wasnt set by the user -> // take the one from the given start-URL. if ($this->base_port == "") { $this->base_port = $url_parts["port"]; } // if the base-port WAS set by the user $url_parts["port"] = $this->base_port; // Reset the base_url $this->url_to_crawl = PHPCrawlerUtils::rebuildURL($url_parts); $this->url_to_crawl = PHPCrawlerUtils::normalizeURL($this->url_to_crawl); // Init counters $links_followed = 0; $files_received = 0; // Put the first url into our main-array $tmp[0]["url_rebuild"] = $this->url_to_crawl; PHPCrawlerUtils::removeMatchingLinks($tmp, $this->not_follow_matches); if (isset($tmp[0]["url_rebuild"]) && $tmp[0]["url_rebuild"] != "") { PHPCrawlerUtils::addToArray($tmp, $this->urls_to_crawl, $this->url_map, $this->store_extended_linkinfo); } // MAIN-LOOP ------------------------------------------------------------------- // It works like this: // The first loop looks through all the "Priority"-arrays and checks if any // of these arrays is filled with URLS. for ($pri_level = $this->max_priority_level + 1; $pri_level > -1; $pri_level--) { // Yep. Found a priority-array with at least one URL if (isset($this->urls_to_crawl[$pri_level]) && !isset($stop_crawling)) { // Now "process" all URLS in this priroity-array @reset($this->urls_to_crawl[$pri_level]); while (list($key) = @each($this->urls_to_crawl[$pri_level])) { $all_start = $this->getmicrotime(); $stop_crawling_this_level = false; // init // Request URL (crawl()) unset($page_data); if (!isset($this->urls_to_crawl[$pri_level][$key]["referer_url"])) { $this->urls_to_crawl[$pri_level][$key]["referer_url"] = ""; } if ($db) { incrementHttpRequests($db, $this->testId); } //Increment number of HTTP requests sent as fsockopen is called next $page_data = $this->pageRequest->receivePage($this->urls_to_crawl[$pri_level][$key]["url_rebuild"], $this->urls_to_crawl[$pri_level][$key]["referer_url"]); // If the request-object just irnored the URL -> // -> Stop and remove URL from Array if ($page_data == false) { unset($this->urls_to_crawl[$pri_level][$key]); continue; } $links_followed++; // Now $page_data["links_found"] contains all found links at this point // Check if a "<base href.."-tag is given in the source and xtract // the base URL // !! Doesnt have to be rebuild cause it only can be a full // qualified URL !! $base_url = PHPCrawlerUtils::getBasePathFromTag($page_data["source"]); if ($base_url == "") { $actual_url =& $this->urls_to_crawl[$pri_level][$key]["url_rebuild"]; } else { $actual_url = $base_url; } // Set flag "content_found" if..content was found if (isset($page_data["http_status_code"]) && $page_data["http_status_code"] == 200) { $content_found = true; } // Check for a REDIRECT-header and if wanted, put it into the array of found links $redirect = PHPCrawlerUtils::getRedirectLocation($page_data["header"]); if ($redirect && $this->follow_redirects == true) { $tmp_array["link_raw"] = $redirect; $tmp_array["referer_url"] = $this->urls_to_crawl[$pri_level][$key]["url_rebuild"]; $page_data["links_found"][] = $tmp_array; } // Count files that have been received completly if ($page_data["received"] == true) { $files_received++; } // If traffic-limit is reached -> stop crawling if ($page_data["traffic_limit_reached"] == true) { $stop_crawling = true; } // Check if pagelimit is reached if set // (and check WHICH page-limit was set) if ($this->page_limit_all > 0) { if ($this->page_limit_count_ct_only == true && $files_received >= $this->page_limit_all) { $stop_crawling = true; } elseif ($this->page_limit_count_ct_only == false && $links_followed >= $this->page_limit_all) { $stop_crawling = true; } } // Add the actual referer to the page_data array for the handlePageData-method $page_data["refering_linktext"] =& $this->urls_to_crawl[$pri_level][$key]["linktext"]; $page_data["refering_link_raw"] =& $this->urls_to_crawl[$pri_level][$key]["link_raw"]; $page_data["refering_linkcode"] =& $this->urls_to_crawl[$pri_level][$key]["linkcode"]; // build new absolute URLs from found links $page_data["links_found"] = PHPCrawlerUtils::buildURLs($page_data["links_found"], $actual_url); // Call the overridable user-function here, but first // "save" the found links from user-manipulation $links_found = $page_data["links_found"]; $user_return = $this->handlePageData($page_data); // Stop crawling if user returned a negative value if ($user_return < 0) { $stop_crawling = true; $page_data["user_abort"] = true; } // Compare the found links with link-priorities set by the user // and add the priority-level to our array $links_found if ($this->benchmark == true) { $bm_start = $this->getmicrotime(); } PHPCrawlerUtils::addURLPriorities($links_found, $this->link_priorities); if ($this->benchmark == true) { echo "addUrlPriorities(): " . ($this->getmicrotime() - $bm_start) . "<br>"; } // Here we can delete the tmp-file maybe created by the pageRequest-object if (file_exists($this->pageRequest->tmp_file)) { @unlink($this->pageRequest->tmp_file); } // Stop everything if a limit was reached if (isset($stop_crawling)) { break; $pri_level = 1000; } // Remove links to other hosts if follow_mode is 2 or 3 if ($this->general_follow_mode == 2 || $this->general_follow_mode == 3) { PHPCrawlerUtils::removeURLsToOtherHosts($links_found, $this->urls_to_crawl[$pri_level][$key]["url_rebuild"]); } // Remove links to other domains if follow_mode=1 if ($this->general_follow_mode == 1) { PHPCrawlerUtils::removeURLsToOtherDomains($links_found, $this->urls_to_crawl[$pri_level][$key]["url_rebuild"]); } // Remove "pathUp"-links if follow_mode=3 // (fe: base-site: www.foo.com/bar/index.htm -> dont follow: www.foo.com/anotherbar/xyz) if ($this->general_follow_mode == 3) { PHPCrawlerUtils::removePathUpLinks($links_found, $this->url_to_crawl); } // If given, dont follow "not matching"-links // (dont follow given preg_matches) if (count($this->not_follow_matches) > 0) { PHPCrawlerUtils::removeMatchingLinks($links_found, $this->not_follow_matches); } // If given, just follow "matching"-links // (only follow given preg_matches) if (count($this->follow_matches) > 0) { $links_found =& PHPCrawlerUtils::removeNotMatchingLinks($links_found, $this->follow_matches); } // Add found and filtered links to the main_array urls_to_crawl if ($this->benchmark == true) { $bm_start = $this->getmicrotime(); } PHPCrawlerUtils::addToArray($links_found, $this->urls_to_crawl, $this->url_map, $this->store_extended_linkinfo); if ($this->benchmark == true) { echo "addToArray(): " . ($this->getmicrotime() - $bm_start) . "<br>"; } // If there is wasnt any content found so far (code 200) and theres // a redirect location // -> follow it, doesnt matter what follow-mode was choosen ! // (put it into the main-array !) if (!isset($content_found) && $redirect != "" && $this->follow_redirects_till_content == true) { $rd[0]["url_rebuild"] = phpcrawlerutils::buildURL($redirect, $actual_url); $rd[0]["priority_level"] = 0; PHPCrawlerUtils::addToArray($rd, $this->urls_to_crawl, $this->url_map, $this->store_extended_linkinfo); } // Now we remove the actual URL from the priority-array unset($this->urls_to_crawl[$pri_level][$key]); // Now we check if a priority-array with a higher priority // contains URLS and if so, stop processing this pri-array and "switch" to the higher // one for ($pri_level_check = $this->max_priority_level + 1; $pri_level_check > $pri_level; $pri_level_check--) { if (isset($this->urls_to_crawl[$pri_level_check]) && $pri_level_check > $pri_level) { $stop_crawling_this_level = true; } } // Stop crawling this level if ($stop_crawling_this_level == true) { $pri_level = $this->max_priority_level + 1; break; } // Unset crawled URL, not nedded anymore unset($this->urls_to_crawl[$pri_level][$key]); // echo "All:".($this->getmicrotime()-$all_start); } // end of loop over priority-array // If a priority_level was crawled completely -> unset the whole array if ($stop_crawling_this_level == false) { unset($this->urls_to_crawl[$pri_level]); } } // end if priority-level exists } // end of main loop // Loop stopped here, build report-array (status_return) $this->status_return["links_followed"] = $links_followed; $this->status_return["files_received"] = $files_received; $this->status_return["bytes_received"] = $this->pageRequest->traffic_all; $this->status_return["traffic_limit_reached"] = $page_data["traffic_limit_reached"]; if (isset($page_data["file_limit_reached"])) { $this->status_return["file_limit_reached"] = $page_data["file_limit_reached"]; } else { $this->status_return["file_limit_reached"] = false; } if (isset($page_data["user_abort"])) { $this->status_return["user_abort"] = $page_data["user_abort"]; } else { $this->status_return["user_abort"] = false; } if (isset($stop_crawling)) { $this->status_return["limit_reached"] = true; } else { $this->status_return["limit_reached"] = false; } // Process-time $this->status_return["process_runtime"] = $this->getMicroTime() - $starting_time; // Average bandwith / throughput $this->status_return["data_throughput"] = round($this->status_return["bytes_received"] / $this->status_return["process_runtime"]); if ($this->firstCrawl) { $query = "UPDATE tests SET status = 'Finished Crawling!' WHERE id = {$this->testId};"; if (connectToDb($db)) { $db->query($query); $duration = $this->status_return["process_runtime"]; $query = "UPDATE tests SET duration = {$duration} WHERE id = {$this->testId};"; $db->query($query); } } }
function testSslCertificate($urlsToTest, $testId) { connectToDb($db); updateStatus($db, "Testing {$urlsToTest} for untrustworthy SSL certificates...", $testId); $log = new Logger(); $log->lfile('logs/eventlogs'); $log->lwrite("Starting SSL certificate verification function on {$urlsToTest}"); //Identify which URLs, if any, begin with https $log->lwrite("Identifying which URLs, if any, begin with HTTPS"); updateStatus($db, "Identifying which URLs, if any, begin with HTTPS...", $testId); $usingHttps = false; $httpsUrl = ''; foreach ($urlsToTest as $currentUrl) { if (substr($currentUrl, 0, 5) == 'https') { $usingHttps = true; $httpsUrl = $currentUrl; echo "https url = {$currentUrl} <br>"; $log->lwrite("Found HTTPS URL: {$currentUrl}"); break; } } if ($usingHttps) { //Check if Mozilla's cacert.pem file is online and update our version of it if needed $log->lwrite("Checking if cacert.pem is up to date"); $http = new http_class(); $http->timeout = 0; $http->data_timeout = 0; //$http->debug=1; $http->user_agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"; $http->follow_redirect = 1; $http->redirection_limit = 5; $cacertsUrl = "http://curl.haxx.se/ca/cacert.pem"; $error = $http->GetRequestArguments($cacertsUrl, $arguments); $error = $http->Open($arguments); $log->lwrite("URL to be requested is: {$cacertsUrl}"); if ($error == "") { $log->lwrite("Sending HTTP request to {$cacertsUrl}"); $error = $http->SendRequest($arguments); if ($error == "") { $headers = array(); $error = $http->ReadReplyHeaders($headers); if ($error == "") { $responseCode = $http->response_status; //This is a string $log->lwrite("Received response code: {$responseCode}"); if (intval($responseCode) == 200) { //Update cacerts.pem file $cacerts = file_get_contents($cacertsUrl); $oldCacerts = file_get_contents('tests/cacert.pem'); if ($cacerts != $oldCacerts) { file_put_contents('tests/cacert.pem', $cacerts); $log->lwrite("cacert.pem file updated"); } else { $log->lwrite("cacert.pem is already up to date so was not updated"); } } else { $log->lwrite("Problem accessing Mozilla's URL containing cacert.pem file"); } } } } // Initialize session and set URL. $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $httpsUrl); // Set so curl_exec returns the result instead of outputting it. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $user_agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"; curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //Check server's certificate against certificates specified in .pem file below curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); //If last parameter is 1, checks the SSL certificate for a comman name (the domain of the site sometimes specified in the certificate), e.g. the site that acquired the certificate //If last parameter is 2, checks for the common name and, if it exists, checks that it matches the hostname provided //Default is 2 curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2); //Using Mozillas certificate file with trusted certificates curl_setopt($ch, CURLOPT_CAINFO, getcwd() . "/cacert.pem"); // Get the response and close the channel. $response = curl_exec($ch); if ($db) { incrementHttpRequests($db, $testId); } if (empty($response)) { //The echo's here are for testing/debugging the function on its own echo '<br>SSL Certificate is not trusted!<br>Url: ' . $httpsUrl . '<br>'; echo 'Method: GET <br>'; //echo 'Url Requested: ' . $testUrl . '<br>'; echo 'Error: ' . curl_error($ch) . '<br>'; $tableName = 'test' . $testId; //Check if this vulnerability has already been found and added to DB. If it hasn't, add it to DB. $query = "SELECT * FROM test_results WHERE test_id = {$testId} AND type = 'sslcert' AND method = 'get' AND url = '{$httpsUrl}' AND attack_str = '{$httpsUrl}'"; $result = $db->query($query); if (!$result) { $log->lwrite("Could not execute query {$query}"); } else { $log->lwrite("Successfully executed query {$query}"); $numRows = $result->num_rows; if ($numRows == 0) { $log->lwrite("Number of rows is {$numRows} for query: {$query}"); insertTestResult($db, $testId, 'sslcert', 'get', $httpsUrl, $httpsUrl); } } } curl_close($ch); } }