/** * Tests {@link idna_encode()} */ function test_idna_encode() { global $evo_charset; $old_evo_charset = $evo_charset; $evo_charset = 'utf-8'; // this file $this->assertEqual(idna_encode('läu.de'), 'xn--lu-via.de'); $evo_charset = $old_evo_charset; }
/** * Get the base domain (without protocol and any subdomain) of an URL. * * Gets a max of 3 domain parts (x.y.tld) * * @param string URL * @return string the base domain (may become empty, if found invalid) */ function get_base_domain($url) { global $evo_charset; //echo '<p>'.$url; // Chop away the http part and the path: $domain = preg_replace('~^([a-z]+://)?([^:/#]+)(.*)$~i', '\\2', $url); if (empty($domain) || preg_match('~^(\\d+\\.)+\\d+$~', $domain)) { // Empty or All numeric = IP address, don't try to cut it any further return $domain; } //echo '<br>'.$domain; // Get the base domain up to 3 levels (x.y.tld): // NOTE: "_" is not really valid, but for Windows it is.. // NOTE: \w includes "_" // convert URL to IDN: $domain = idna_encode($domain); $domain_pattern = '~ ( \\w (\\w|-|_)* \\. ){0,2} \\w (\\w|-|_)* $~ix'; if (!preg_match($domain_pattern, $domain, $match)) { return ''; } $base_domain = convert_charset(idna_decode($match[0]), $evo_charset, 'UTF-8'); // Remove any www. prefix: $base_domain = preg_replace('~^www\\.~i', '', $base_domain); //echo '<br>'.$base_domain.'</p>'; return $base_domain; }
/** * Check the validity of a given URL * * Checks allowed URI schemes and URL ban list. * URL can be empty. * * Note: We have a problem when trying to "antispam" a keyword which is already blacklisted * If that keyword appears in the URL... then the next page has a bad referer! :/ * * {@internal This function gets tested in misc.funcs.simpletest.php.}} * * @param string Url to validate * @param string Context ("posting", "commenting", "download_src", "http-https") * @param boolean also do an antispam check on the url * @return mixed false (which means OK) or error message */ function validate_url($url, $context = 'posting', $antispam_check = true) { global $Debuglog, $debug; if (empty($url)) { // Empty URL, no problem return false; } // Do not give verbose info for comments, unless debug is enabled. $verbose = $debug || $context != 'commenting'; $allowed_uri_schemes = get_allowed_uri_schemes($context); // Validate URL structure if ($url[0] == '$') { // This is a 'special replace code' URL (used in footers) if (!preg_match('~\\$([a-z_]+)\\$~', $url)) { return T_('Invalid URL $code$ format'); } } elseif (preg_match('~^\\w+:~', $url)) { // there's a scheme and therefor an absolute URL: if (substr($url, 0, 7) == 'mailto:') { // mailto:link if (!in_array('mailto', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'mailto:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } preg_match('~^(mailto):(.*?)(\\?.*)?$~', $url, $match); if (!$match) { return $verbose ? sprintf(T_('Invalid email link: %s.'), htmlspecialchars($url)) : T_('Invalid email link.'); } elseif (!is_email($match[2])) { return $verbose ? sprintf(T_('Supplied email address (%s) is invalid.'), htmlspecialchars($match[2])) : T_('Invalid email address.'); } } elseif (substr($url, 0, 6) == 'clsid:') { // clsid:link if (!in_array('clsid', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'clsid:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } if (!preg_match('~^(clsid):([a-fA-F0-9\\-]+)$~', $url, $match)) { return T_('Invalid class ID format'); } } elseif (substr($url, 0, 11) == 'javascript:') { // javascript: // Basically there could be anything here if (!in_array('javascript', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'javascript:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } preg_match('~^(javascript):~', $url, $match); } else { // convert URL to IDN: $url = idna_encode($url); if (!preg_match('~^ # start ([a-z][a-z0-9+.\\-]*) # scheme :// # authorize absolute URLs only ( // not present in clsid: -- problem? ; mailto: handled above) (\\w+(:\\w+)?@)? # username or username and password (optional) ( localhost | [a-z0-9]([a-z0-9\\-])* # Don t allow anything too funky like entities \\. # require at least 1 dot [a-z0-9]([a-z0-9.\\-])+ # Don t allow anything too funky like entities ) (:[0-9]+)? # optional port specification .* # allow anything in the path (including spaces - used in FileManager - but no newlines). $~ix', $url, $match)) { // Cannot validate URL structure $Debuglog->add('URL «' . $url . '» does not match url pattern!', 'error'); return $verbose ? sprintf(T_('Invalid URL format (%s).'), htmlspecialchars($url)) : T_('Invalid URL format.'); } $scheme = strtolower($match[1]); if (!in_array($scheme, $allowed_uri_schemes)) { // Scheme not allowed $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } } } else { // URL is relative.. if ($context == 'commenting' || $context == 'download_src' || $context == 'http-https') { // We do not allow relative URLs in comments and download urls return $verbose ? sprintf(T_('URL "%s" must be absolute.'), htmlspecialchars($url)) : T_('URL must be absolute.'); } $char = substr($url, 0, 1); if ($char != '/' && $char != '#') { // must start with a slash or hash (for HTML anchors to the same page) return $verbose ? sprintf(T_('URL "%s" must be a full path starting with "/" or an anchor starting with "#".'), htmlspecialchars($url)) : T_('URL must be a full path starting with "/" or an anchor starting with "#".'); } } if ($antispam_check) { // Search for blocked keywords: if ($block = antispam_check($url)) { return $verbose ? sprintf(T_('URL "%s" not allowed: blacklisted word "%s".'), htmlspecialchars($url), $block) : T_('URL not allowed'); } } return false; // OK }