/** * IDNA-Encode URL to Punycode. * @param string URL * @return string Encoded URL (ASCII) */ function idna_encode($url) { global $evo_charset; $url_utf8 = convert_charset($url, 'utf-8', $evo_charset); if (version_compare(PHP_VERSION, '5', '>=')) { load_class('_ext/idna/_idna_convert.class.php', 'idna_convert'); $IDNA = new idna_convert(); } else { load_class('_ext/idna/_idna_convert.class.php4', 'Net_IDNA_php4'); $IDNA = new Net_IDNA_php4(); } //echo '['.$url_utf8.'] '; $url = $IDNA->encode($url_utf8); /* if( $idna_error = $IDNA->get_last_error() ) { echo $idna_error; } */ // echo '['.$url.']<br>'; return $url; }
/** * Check the validity of a given URL * * Checks allowed URI schemes and URL ban list. * URL can be empty. * * Note: We have a problem when trying to "antispam" a keyword which is already blacklisted * If that keyword appears in the URL... then the next page has a bad referer! :/ * * {@internal This function gets tested in misc.funcs.simpletest.php.}} * * @param string Url to validate * @param string * @param boolean also do an antispam check on the url * @return mixed false (which means OK) or error message */ function validate_url($url, $context = 'posting', $antispam_check = true) { global $Debuglog, $debug; if (empty($url)) { // Empty URL, no problem return false; } $verbose = $debug || $context != 'commenting'; $allowed_uri_schemes = get_allowed_uri_schemes($context); // Validate URL structure if ($url[0] == '$') { // This is a 'special replace code' URL (used in footers) if (!preg_match('¤\\$([a-z_]+)\\$¤', $url)) { return T_('Invalid URL $code$ format'); } } elseif (preg_match('~^\\w+:~', $url)) { // there's a scheme and therefor an absolute URL: if (substr($url, 0, 7) == 'mailto:') { // mailto:link if (!in_array('mailto', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'mailto:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } preg_match('~^(mailto):(.*?)(\\?.*)?$~', $url, $match); if (!$match) { return $verbose ? sprintf(T_('Invalid email link: %s.'), htmlspecialchars($url)) : T_('Invalid email link.'); } elseif (!is_email($match[2])) { return $verbose ? sprintf(T_('Supplied email address (%s) is invalid.'), htmlspecialchars($match[2])) : T_('Invalid email address.'); } } elseif (substr($url, 0, 6) == 'clsid:') { // clsid:link if (!in_array('clsid', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'clsid:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } if (!preg_match('¤^(clsid):([a-fA-F0-9\\-]+)$¤', $url, $match)) { return T_('Invalid class ID format'); } } elseif (substr($url, 0, 11) == 'javascript:') { // javascript: // Basically there could be anything here if (!in_array('javascript', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'javascript:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } preg_match('¤^(javascript):¤', $url, $match); } else { // convert URL to IDN: load_funcs('_ext/idna/_idna_convert.class.php'); $IDNA = new Net_IDNA_php4(); global $evo_charset; $url = $IDNA->encode(convert_charset($url, 'utf-8', $evo_charset)); if (!preg_match('~^ # start ([a-z][a-z0-9+.\\-]*) # scheme :// # authorize absolute URLs only ( // not present in clsid: -- problem? ; mailto: handled above) (\\w+(:\\w+)?@)? # username or username and password (optional) ( localhost | [a-z0-9]([a-z0-9\\-])* # Don t allow anything too funky like entities \\. # require at least 1 dot [a-z0-9]([a-z0-9.\\-])+ # Don t allow anything too funky like entities ) (:[0-9]+)? # optional port specification [^ ]* # allow no space $~ix', $url, $match)) { // Cannot validate URL structure $Debuglog->add('URL «' . $url . '» does not match url pattern!', 'error'); return $verbose ? sprintf(T_('Invalid URL format (%s).'), htmlspecialchars($url)) : T_('Invalid URL format.'); } $scheme = strtolower($match[1]); if (!in_array($scheme, $allowed_uri_schemes)) { // Scheme not allowed $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } } } else { // URL is relative.. if ($context == 'commenting') { // We do not allow relative URLs in comments return $verbose ? sprintf(T_('URL "%s" must be absolute.'), htmlspecialchars($url)) : T_('URL must be absolute.'); } $char = substr($url, 0, 1); if ($char != '/' && $char != '#') { // must start with a slash or hash (for HTML anchors to the same page) return $verbose ? sprintf(T_('URL "%s" must be a full path starting with "/" or an anchor starting with "#".'), htmlspecialchars($url)) : T_('URL must be a full path starting with "/" or an anchor starting with "#".'); } } if ($antispam_check) { // Search for blocked keywords: if ($block = antispam_check($url)) { return $verbose ? sprintf(T_('URL "%s" not allowed: blacklisted word "%s".'), htmlspecialchars($url), $block) : T_('URL not allowed'); } } return false; // OK }