protected function parseInternal($URL_string) { $URL_map = @parse_URL($URL_string); if (!$URL_map || !$URL_map['host'] || !preg_match('/[a-zA-Z0-9._-]*[a-zA-Z0-9]$/', $URL_map['host'])) { return false; } $URL_map = array_map('trim', $URL_map); $this->parsedURL = $URL_string; $this->scheme = isset($URL_map['scheme']) ? $URL_map['scheme'] : 'http'; $this->host = $URL_map['host']; $this->port = isset($URL_map['port']) ? (int) $URL_map['port'] : $this->scheme == 'https' ? 443 : 80; $this->path = isset($URL_map['path']) ? $URL_map['path'] : ''; if (isset($URL_map['query'])) { parse_str($URL_map['query'], $this->query); } if (!$this->query) { $this->query = array(); } if ($this->path == '') { $this->path = '/'; } $this->path .= isset($URL_map['query']) ? "?{$URL_map['query']}" : ''; isset($URL_map['fragment']) and $this->path .= '#' . $URL_map['fragment']; return true; }
function validate_URL($URL) { $URL_components = parse_URL($URL); if (!isset($URL_components['scheme']) || !isset($URL_components['host'])) { return false; } //explode our host name on '.'s and reattach the last two with a dot in between... voila, a domain $domain_components = explode('.', $URL_components['host']); $domain = $domain_components[count($domain_components) - 2] . '.' . $domain_components[count($domain_components) - 1]; if (in_array($domain, $this->trusted_domains) && ($URL_components['scheme'] == 'http' || $URL_components['scheme'] == 'https')) { return true; } else { return false; } }
/** * Header the browser to the current location of the most recent page * that occupied a given URL * * How it works: * * 1. Looks for the URL in the URL_history table. * * 2. If there is no URL, send a 404 header. * If there are URLs, send a 301 header and a Location header to the * location of the live page that most recent inhabited that URL. * * Important: Because it may attempt to header the client to a different URL, * this method must be called before any output is started, or in the context * of output buffering. * * @param string $request_uri a URL relative to the host root (e.g. /foo/bar/) * @return NULL * * @todo modify to make multidomain safe */ function check_URL_history($request_uri) { $url_arr = parse_URL($request_uri); // This catches links that might not have had a trailing slash // pages always have a trailing slash in the db $URL = '/' . trim_slashes($url_arr['path']) . '/'; $URL = str_replace('//', '/', $URL); $query_string = !empty($url_arr['query']) ? '?' . $url_arr['query'] : ''; $query = 'SELECT * FROM URL_history WHERE url ="' . reason_sql_string_escape($URL) . '" ORDER BY timestamp DESC'; $results = db_query($query); $num_results = mysql_num_rows($results); if (mysql_num_rows($results) > 0) { while ($row = mysql_fetch_array($results)) { $page_id = $row['page_id']; $page = new entity($page_id); if (reason_is_entity($page, 'minisite_page') && $page->get_value('state') == 'Live' && ($redir = @reason_get_page_url($page))) { if ($redir == $request_uri) { //Could potentially update rewrites here, solving most times this happens, perhaps. trigger_error("A page should exist here, but apparently does not at the moment. A web administrator may need to run URL updating on this site."); } else { header('Location: ' . $redir . $query_string, true, 301); exit; } } } } // if we have gotten this far and not found a URL lets send a 404 http_response_code(404); }