<?php YiiBase::setPathOfAlias('ygin', realpath(dirname(__FILE__) . '/../')); YiiBase::setPathOfAlias('ngin', realpath(dirname(__FILE__) . '/../')); // TODO придется оставить на какое-то время для совместимости. //Название хоста, для отправки отчетов об ошибках $errorSubjectHost = ''; if (isset($_SERVER['HTTP_HOST'])) { //Если пришло в punicode if (strpos($_SERVER['HTTP_HOST'], 'xn--') !== false) { YiiBase::import('ygin.lib.IDNA', true); $idna = new Net_IDNA_php4(); $errorSubjectHost = $idna->decode($_SERVER['HTTP_HOST']); } else { $errorSubjectHost = $_SERVER['HTTP_HOST']; } } else { $errorSubjectHost = realpath(dirname(__FILE__) . '/../../'); } return array('basePath' => dirname(__FILE__) . DIRECTORY_SEPARATOR . '../../protected', 'aliases' => array('temp' => 'webroot.temp', 'xupload' => 'ygin.ext.xupload', 'fileUpload' => 'ygin.components.fileUpload'), 'preload' => array('log'), 'import' => array('ygin.helpers.*', 'ygin.components.*', 'ygin.interface.*'), 'components' => array('session' => array('autoStart' => false, 'cookieParams' => array('timeout' => '7200')), 'db' => array('nullConversion' => PDO::NULL_EMPTY_STRING), 'log' => array('class' => 'CLogRouter', 'routes' => array('email_error' => array('class' => 'ygin.components.DaEmailLogRoute', 'levels' => 'error, warning', 'filter' => array('class' => 'ygin.components.DaLogFilter', 'ignoreCategories' => array('exception.CHttpException.404', 'exception.DaHttpException.*')), 'subject' => 'Ошибка на сайте (' . $errorSubjectHost . ')', 'enabled' => YII_DEBUG == false), 'mailErrorLog' => array('class' => 'DaFileLogRoute', 'categories' => 'application.sendMail.error', 'logFile' => 'mail_error_log.log'), 'errorLog' => array('class' => 'CFileLogRoute', 'levels' => 'error, warning', 'logFile' => 'error_log.log', 'filter' => array('class' => 'ygin.components.DaLogFilter', 'ignoreCategories' => array('exception.CHttpException.404', 'exception.DaHttpException.*', 'application.sendMail.error'), 'logVars' => array('_GET', '_POST', '_FILES', '_COOKIE', '_SESSION', '_SERVER'))), 'loginLog' => array('class' => 'DaFileLogRoute', 'levels' => 'info', 'categories' => 'application.login.*', 'logFile' => 'login.log', 'filter' => array('class' => 'CLogFilter', 'logVars' => array()))))));
/** * Check the content of a given URL (referer), if the requested URI (with different hostname variations) * is present. * * @todo Use DB cache to avoid checking the same page again and again! (Plugin DB table) * * @param string * @param string URI to append to matching pattern for hostnames * @return boolean */ function is_referer_linking_us($referer, $uri) { global $misc_inc_path, $lib_subdir, $ReqHost; if (empty($referer)) { return false; } // Load page content (max. 500kb), using fsockopen: $url_parsed = parse_url($referer); if (empty($url_parsed['scheme'])) { $url_parsed = parse_url('http://' . $referer); } $host = $url_parsed['host']; $port = empty($url_parsed['port']) ? 80 : $url_parsed['port']; $path = empty($url_parsed['path']) ? '/' : $url_parsed['path']; if (!empty($url_parsed['query'])) { $path .= '?' . $url_parsed['query']; } $fp = @fsockopen($host, $port, $errno, $errstr, 30); if (!$fp) { // could not access referring page $this->debug_log('is_referer_linking_us(): could not access «' . $referer . '» (host: ' . $host . '): ' . $errstr . ' (#' . $errno . ')'); return false; } // Set timeout for data: if (function_exists('stream_set_timeout')) { stream_set_timeout($fp, 20); } else { socket_set_timeout($fp, 20); } // PHP 4 // Send request: $out = "GET {$path} HTTP/1.0\r\n"; $out .= "Host: {$host}:{$port}\r\n"; $out .= "Connection: Close\r\n\r\n"; fwrite($fp, $out); // Skip headers: $i = 0; $source_charset = 'iso-8859-1'; // default while (($s = fgets($fp, 4096)) !== false) { $i++; if ($s == "\r\n" || $i > 100) { break; } if (preg_match('~^Content-Type:.*?charset=([\\w-]+)~i', $s, $match)) { $source_charset = $match[1]; } } // Get the refering page's content $content_ref_page = ''; $bytes_read = 0; while (($s = fgets($fp, 4096)) !== false) { $content_ref_page .= $s; $bytes_read += strlen($s); if ($bytes_read > 512000) { // do not pull more than 500kb of data! break; } } fclose($fp); if (!strlen($content_ref_page)) { $this->debug_log('is_referer_linking_us(): empty $content_ref_page (' . bytesreadable($bytes_read) . ' read)'); return false; } /** * IDNA converter class */ load_funcs('_ext/idna/_idna_convert.class.php'); $IDNA = new Net_IDNA_php4(); $have_idn_name = false; // Build the search pattern: // We match for basically for 'href="[SERVER][URI]', where [SERVER] is a list of possible hosts (especially IDNA) $search_pattern = '~\\shref=["\']?https?://('; $possible_hosts = array($_SERVER['HTTP_HOST']); if ($_SERVER['SERVER_NAME'] != $_SERVER['HTTP_HOST']) { $possible_hosts[] = $_SERVER['SERVER_NAME']; } $search_pattern_hosts = array(); foreach ($possible_hosts as $l_host) { if (preg_match('~^([^.]+\\.)(.*?)([^.]+\\.[^.]+)$~', $l_host, $match)) { // we have subdomains in this hostname if (stristr($match[1], 'www')) { // search also for hostname without 'www.' $search_pattern_hosts[] = $match[2] . $match[3]; } } $search_pattern_hosts[] = $l_host; } $search_pattern_hosts = array_unique($search_pattern_hosts); foreach ($search_pattern_hosts as $l_host) { // add IDN, because this could be linked: $l_idn_host = $IDNA->decode($l_host); // the decoded puny-code ("xn--..") name (utf8) if ($l_idn_host != $l_host) { $have_idn_name = true; $search_pattern_hosts[] = $l_idn_host; } } // add hosts to pattern, preg_quoted for ($i = 0, $n = count($search_pattern_hosts); $i < $n; $i++) { $search_pattern_hosts[$i] = preg_quote($search_pattern_hosts[$i], '~'); } $search_pattern .= implode('|', $search_pattern_hosts) . ')'; if (empty($uri)) { // host(s) should end with "/", "'", '"', "?" or whitespace $search_pattern .= '[/"\'\\s?]'; } else { $search_pattern .= preg_quote($uri, '~'); // URI should end with "'", '"' or whitespace $search_pattern .= '["\'\\s]'; } $search_pattern .= '~i'; if ($have_idn_name) { // Convert charset to UTF-8, because the decoded domain name is UTF-8, too: if (can_convert_charsets('utf-8', $source_charset)) { $content_ref_page = convert_charset($content_ref_page, 'utf-8', $source_charset); } else { $this->debug_log('is_referer_linking_us(): warning: cannot convert charset of referring page'); } } if (preg_match($search_pattern, $content_ref_page)) { $this->debug_log('is_referer_linking_us(): found current URL in page (' . bytesreadable($bytes_read) . ' read)'); return true; } else { if (strpos($referer, $ReqHost) === 0 && !empty($uri)) { // Referer is the same host.. just search for $uri if (strpos($content_ref_page, $uri) !== false) { $this->debug_log('is_referer_linking_us(): found current URI in page (' . bytesreadable($bytes_read) . ' read)'); return true; } } $this->debug_log('is_referer_linking_us(): ' . sprintf('did not find «%s» in «%s» (%s bytes read).', $search_pattern, $referer, bytesreadable($bytes_read))); return false; } }
/** * Decode IDNA puny-code ("xn--..") to UTF-8 name. * * @param string * @return string The decoded puny-code ("xn--..") (UTF8!) */ function idna_decode($url) { if (version_compare(PHP_VERSION, '5', '>=')) { load_class('_ext/idna/_idna_convert.class.php', 'idna_convert'); $IDNA = new idna_convert(); } else { load_class('_ext/idna/_idna_convert.class.php4', 'Net_IDNA_php4'); $IDNA = new Net_IDNA_php4(); } return $IDNA->decode($url); }
/** * Check the validity of a given URL * * Checks allowed URI schemes and URL ban list. * URL can be empty. * * Note: We have a problem when trying to "antispam" a keyword which is already blacklisted * If that keyword appears in the URL... then the next page has a bad referer! :/ * * {@internal This function gets tested in misc.funcs.simpletest.php.}} * * @param string Url to validate * @param string * @param boolean also do an antispam check on the url * @return mixed false (which means OK) or error message */ function validate_url($url, $context = 'posting', $antispam_check = true) { global $Debuglog, $debug; if (empty($url)) { // Empty URL, no problem return false; } $verbose = $debug || $context != 'commenting'; $allowed_uri_schemes = get_allowed_uri_schemes($context); // Validate URL structure if ($url[0] == '$') { // This is a 'special replace code' URL (used in footers) if (!preg_match('¤\\$([a-z_]+)\\$¤', $url)) { return T_('Invalid URL $code$ format'); } } elseif (preg_match('~^\\w+:~', $url)) { // there's a scheme and therefor an absolute URL: if (substr($url, 0, 7) == 'mailto:') { // mailto:link if (!in_array('mailto', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'mailto:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } preg_match('~^(mailto):(.*?)(\\?.*)?$~', $url, $match); if (!$match) { return $verbose ? sprintf(T_('Invalid email link: %s.'), htmlspecialchars($url)) : T_('Invalid email link.'); } elseif (!is_email($match[2])) { return $verbose ? sprintf(T_('Supplied email address (%s) is invalid.'), htmlspecialchars($match[2])) : T_('Invalid email address.'); } } elseif (substr($url, 0, 6) == 'clsid:') { // clsid:link if (!in_array('clsid', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'clsid:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } if (!preg_match('¤^(clsid):([a-fA-F0-9\\-]+)$¤', $url, $match)) { return T_('Invalid class ID format'); } } elseif (substr($url, 0, 11) == 'javascript:') { // javascript: // Basically there could be anything here if (!in_array('javascript', $allowed_uri_schemes)) { // Scheme not allowed $scheme = 'javascript:'; $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } preg_match('¤^(javascript):¤', $url, $match); } else { // convert URL to IDN: load_funcs('_ext/idna/_idna_convert.class.php'); $IDNA = new Net_IDNA_php4(); global $evo_charset; $url = $IDNA->encode(convert_charset($url, 'utf-8', $evo_charset)); if (!preg_match('~^ # start ([a-z][a-z0-9+.\\-]*) # scheme :// # authorize absolute URLs only ( // not present in clsid: -- problem? ; mailto: handled above) (\\w+(:\\w+)?@)? # username or username and password (optional) ( localhost | [a-z0-9]([a-z0-9\\-])* # Don t allow anything too funky like entities \\. # require at least 1 dot [a-z0-9]([a-z0-9.\\-])+ # Don t allow anything too funky like entities ) (:[0-9]+)? # optional port specification [^ ]* # allow no space $~ix', $url, $match)) { // Cannot validate URL structure $Debuglog->add('URL «' . $url . '» does not match url pattern!', 'error'); return $verbose ? sprintf(T_('Invalid URL format (%s).'), htmlspecialchars($url)) : T_('Invalid URL format.'); } $scheme = strtolower($match[1]); if (!in_array($scheme, $allowed_uri_schemes)) { // Scheme not allowed $Debuglog->add('URI scheme «' . $scheme . '» not allowed!', 'error'); return $verbose ? sprintf(T_('URI scheme "%s" not allowed.'), htmlspecialchars($scheme)) : T_('URI scheme not allowed.'); } } } else { // URL is relative.. if ($context == 'commenting') { // We do not allow relative URLs in comments return $verbose ? sprintf(T_('URL "%s" must be absolute.'), htmlspecialchars($url)) : T_('URL must be absolute.'); } $char = substr($url, 0, 1); if ($char != '/' && $char != '#') { // must start with a slash or hash (for HTML anchors to the same page) return $verbose ? sprintf(T_('URL "%s" must be a full path starting with "/" or an anchor starting with "#".'), htmlspecialchars($url)) : T_('URL must be a full path starting with "/" or an anchor starting with "#".'); } } if ($antispam_check) { // Search for blocked keywords: if ($block = antispam_check($url)) { return $verbose ? sprintf(T_('URL "%s" not allowed: blacklisted word "%s".'), htmlspecialchars($url), $block) : T_('URL not allowed'); } } return false; // OK }