protected function execute($values, EntityInterface $entity = null) { if (!is_array($values)) { $this->throwError('non_array_value', [], IncidentInterface::CRITICAL); return false; } $allowed_values = []; if ($this->hasOption(self::OPTION_ALLOWED_VALUES)) { $allowed_values = $this->getAllowedValues(); } $sanitized = []; $text_rule = new TextRule('text', $this->getOptions()); foreach ($values as $val) { $is_valid = $text_rule->apply($val); if (!$is_valid) { foreach ($text_rule->getIncidents() as $incident) { $this->throwError($incident->getName(), $incident->getParameters(), $incident->getSeverity()); } return false; } else { $val = $text_rule->getSanitizedValue(); } // check for allowed values if ($this->hasOption(self::OPTION_ALLOWED_VALUES)) { if (!in_array($val, $allowed_values, true)) { $this->throwError(self::OPTION_ALLOWED_VALUES, [self::OPTION_ALLOWED_VALUES => $allowed_values, 'value' => $val]); return false; } } $sanitized[] = $val; } $this->setSanitizedValue($sanitized); return true; }
protected function execute($value, EntityInterface $entity = null) { if (!is_string($value)) { $this->throwError('non_string_value', ['value' => $value], IncidentInterface::CRITICAL); return false; } $null_value = $this->getOption(AttributeInterface::OPTION_NULL_VALUE, ''); $mandatory = $this->getOption(self::OPTION_MANDATORY, false); if (!$mandatory && $value === $null_value) { // parse_url with empty string doesn't return false but 'path' being an empty string $this->setSanitizedValue($null_value); return true; } $text_rule = new TextRule('text', $this->getOptions()); $is_valid = $text_rule->apply($value); if (!$is_valid) { foreach ($text_rule->getIncidents() as $incident) { $this->throwError($incident->getName(), $incident->getParameters(), $incident->getSeverity()); } return false; } // we now have a valid string, that might be some kind of URL $val = $text_rule->getSanitizedValue(); // default scheme to add if it's missing $default_scheme = $this->getOption(self::OPTION_DEFAULT_SCHEME, ''); // try to parse the string as URL $raw_parts = parse_url($val); if ($raw_parts === false) { $this->throwError('parse_error', ['value' => $val]); return false; } // parse_url returns [ 'path' => 'localhost' ] for 'localhost' or '123.123.123.123' // scheme and host are missing, might be a string like: 'test.de/foo/bar' or 'localhost' or '194.123.45.167'… if (!array_key_exists('host', $raw_parts) && !array_key_exists('scheme', $raw_parts)) { $val = $default_scheme . $this->getOption(self::OPTION_SCHEME_SEPARATOR, '://') . $val; } // reevaluate the new url value and hope it's now a valid url and the addition didn't do too much harm $raw_parts = parse_url($val); if ($raw_parts === false) { $this->throwError('parse_error', ['value' => $val]); return false; } // validate mandatory host part if (!array_key_exists('host', $raw_parts)) { $this->throwError('host_missing'); return false; } $url_parts = $raw_parts; if (!array_key_exists('scheme', $url_parts)) { $url_parts['scheme'] = $default_scheme; } $allowed_schemes = $this->getOption(self::OPTION_ALLOWED_SCHEMES, []); if (array_key_exists('scheme', $url_parts) && !empty($allowed_schemes) && !in_array($url_parts['scheme'], $allowed_schemes, true)) { $this->throwError('scheme_not_allowed', ['value' => $val, 'scheme' => $url_parts['scheme'], 'allowed_schemes' => $allowed_schemes]); return false; } // add default values for parts when they're missing if ($this->hasOption(self::OPTION_DEFAULT_USER) && !array_key_exists('user', $url_parts)) { $url_parts['user'] = $this->getOption(self::OPTION_DEFAULT_USER); } if ($this->hasOption(self::OPTION_DEFAULT_PASS) && !array_key_exists('pass', $url_parts)) { $url_parts['pass'] = $this->getOption(self::OPTION_DEFAULT_PASS); } if ($this->hasOption(self::OPTION_DEFAULT_PORT) && !array_key_exists('port', $url_parts)) { $url_parts['port'] = $this->getOption(self::OPTION_DEFAULT_PORT); } if ($this->hasOption(self::OPTION_DEFAULT_PATH) && !array_key_exists('path', $url_parts)) { $url_parts['path'] = $this->getOption(self::OPTION_DEFAULT_PATH); } if ($this->hasOption(self::OPTION_DEFAULT_QUERY) && !array_key_exists('query', $url_parts)) { $url_parts['query'] = $this->getOption(self::OPTION_DEFAULT_QUERY); } if ($this->hasOption(self::OPTION_DEFAULT_FRAGMENT) && !array_key_exists('fragment', $url_parts)) { $url_parts['fragment'] = $this->getOption(self::OPTION_DEFAULT_FRAGMENT); } // force certain values for parts if ($this->hasOption(self::OPTION_FORCE_USER)) { $url_parts['user'] = $this->getOption(self::OPTION_FORCE_USER); } if ($this->hasOption(self::OPTION_FORCE_PASS)) { $url_parts['pass'] = $this->getOption(self::OPTION_FORCE_PASS); } if ($this->hasOption(self::OPTION_FORCE_HOST)) { $url_parts['host'] = $this->getOption(self::OPTION_FORCE_HOST); } if ($this->hasOption(self::OPTION_FORCE_PORT)) { $url_parts['port'] = $this->getOption(self::OPTION_FORCE_PORT); } if ($this->hasOption(self::OPTION_FORCE_PATH)) { $url_parts['path'] = $this->getOption(self::OPTION_FORCE_PATH); } if ($this->hasOption(self::OPTION_FORCE_QUERY)) { $url_parts['query'] = $this->getOption(self::OPTION_FORCE_QUERY); } if ($this->hasOption(self::OPTION_FORCE_FRAGMENT)) { $url_parts['fragment'] = $this->getOption(self::OPTION_FORCE_FRAGMENT); } // check for required parts according to existing options $require_user = $this->getOption(self::OPTION_REQUIRE_USER, false); if ($require_user && !array_key_exists('user', $url_parts)) { $this->throwError('user_part_missing', ['value' => $val]); return false; } $require_pass = $this->getOption(self::OPTION_REQUIRE_PASS, false); if ($require_pass && !array_key_exists('pass', $url_parts)) { $this->throwError('pass_part_missing', ['value' => $val]); return false; } $require_port = $this->getOption(self::OPTION_REQUIRE_PORT, false); if ($require_port && !array_key_exists('port', $url_parts)) { $this->throwError('port_part_missing', ['value' => $val]); return false; } $require_path = $this->getOption(self::OPTION_REQUIRE_PATH, false); if ($require_path && !array_key_exists('path', $url_parts)) { $this->throwError('path_part_missing', ['value' => $val]); return false; } $require_query = $this->getOption(self::OPTION_REQUIRE_QUERY, false); if ($require_query && !array_key_exists('query', $url_parts)) { $this->throwError('query_part_missing', ['value' => $val]); return false; } $require_fragment = $this->getOption(self::OPTION_REQUIRE_FRAGMENT, false); if ($require_fragment && !array_key_exists('fragment', $url_parts)) { $this->throwError('fragment_part_missing', ['value' => $val]); return false; } $use_idn = $this->getOption(self::OPTION_USE_IDN, true); $convert_host_to_punycode = $this->getOption(self::OPTION_CONVERT_HOST_TO_PUNYCODE, false); $idn_available = function_exists('idn_to_ascii') ? true : false; if (!$idn_available && $use_idn) { throw new RuntimeException('The INTL extension needs to be installed to check international domain names of URLs.'); } if (!$idn_available && $convert_host_to_punycode) { throw new RuntimeException('The INTL extension needs to be installed to convert domains names to punycode.'); } // test url parts are the ones used to generate a complete URL ALWAYS containing a scheme $test_url_parts = $url_parts; // punycode url parts are the ones used to generate a punycode URL ALWAYS containing a scheme $punycode_url_parts = $test_url_parts; //$ipv4_host = filter_var($host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4); $ipv6_host = $url_parts['host']; if ($ipv6_host[0] === '[' && mb_substr($ipv6_host, -1) === ']') { $ipv6_host = mb_substr($ipv6_host, 1, -1); } $ipv6_host = filter_var($ipv6_host, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6); if ($ipv6_host !== false) { $ipv6_host = '[' . $url_parts['host'] . ']'; $test_url_parts['host'] = 'ipv6domain.de'; // just for filter_var test as it doesn't understand ipv6 } // check host for being convertible to punycode if ($use_idn) { $idn_host = idn_to_ascii($url_parts['host']); // @TODO options, variants, idna_info if ($idn_host === false) { $this->throwError('invalid_idn_host', ['value' => $val]); return false; } $punycode_url_parts['host'] = $idn_host; } /** * Check for suspicious letters in the domain name (confusable chars from other charsets, e.g. cyrillic) * @see http://en.wikipedia.org/wiki/IDN_homograph_attack * @see http://kb.mozillazine.org/Network.IDN.blacklist_chars * @see http://stackoverflow.com/questions/17458876/php-spoofchecker-class * @see http://www.unicode.org/Public/security/revision-06/confusables.txt * @see http://icu-project.org/apiref/icu4j50m1/com/ibm/icu/text/SpoofChecker.html for docs on constants */ $accept_suspicious_host = $this->getOption(self::OPTION_ACCEPT_SUSPICIOUS_HOST, true); $convert_suspicious_host = $this->getOption(self::OPTION_CONVERT_SUSPICIOUS_HOST, true); $spoofchecker_available = extension_loaded('intl') && class_exists("Spoofchecker"); if (!$spoofchecker_available && $convert_suspicious_host || !$spoofchecker_available && !$accept_suspicious_host) { throw new RuntimeException('The INTL extension needs to be installed to spoofcheck for suspicious domains.'); } $is_suspicious = false; if ($spoofchecker_available) { $spoofchecker = new Spoofchecker(); /** * Check whether two strings are visually confusable and: * - SINGLE_SCRIPT_CONFUSABLE: all of the characters from the two strings are from a single script * - MIXED_SCRIPT_CONFUSABLE: at least one string contains characters from more than one script * - WHOLE_SCRIPT_CONFUSABLE: each strings is of a single script, but they're from different scripts * - ANY_CASE: check case-sensitive confusability (even though domains are not) * - INVISIBLE: do not allow invisible characters like non-spacing marks */ $checks = (int) $this->getOption(self::OPTION_DOMAIN_SPOOFCHECKER_CHECKS, Spoofchecker::SINGLE_SCRIPT_CONFUSABLE | Spoofchecker::MIXED_SCRIPT_CONFUSABLE | Spoofchecker::WHOLE_SCRIPT_CONFUSABLE | Spoofchecker::ANY_CASE | Spoofchecker::INVISIBLE); $spoofchecker->setChecks($checks); $is_suspicious = $spoofchecker->isSuspicious($url_parts['host'], $error); if ($is_suspicious && !$accept_suspicious_host) { $this->throwError('suspicious_domain', ['value' => $val, 'error' => $error]); return false; } // TODO spoofcheck other parts of the url? } // generate URLs for filter_var test and setting as sanitized value $url = $this->getUrlFromArray($url_parts); $test_url = $this->getUrlFromArray($test_url_parts); $punycode_url = $this->getUrlFromArray($punycode_url_parts); $filter_flags = 0; if ($this->getOption(self::OPTION_REQUIRE_PATH, false)) { $filter_flags |= FILTER_FLAG_PATH_REQUIRED; } if ($this->getOption(self::OPTION_REQUIRE_QUERY, false)) { $filter_flags |= FILTER_FLAG_QUERY_REQUIRED; } if ($ipv6_host !== false) { $test = filter_var($test_url, FILTER_VALIDATE_URL, $filter_flags); } else { $test = filter_var($punycode_url, FILTER_VALIDATE_URL, $filter_flags); } if ($test === false) { $this->throwError('invalid_format', ['url' => $url, 'punycode_url' => $punycode_url, 'value' => $val]); return false; } if ($use_idn && $convert_host_to_punycode) { $this->setSanitizedValue($punycode_url); } elseif ($is_suspicious && $convert_suspicious_host) { $this->setSanitizedValue($punycode_url); } else { $this->setSanitizedValue($url); } return true; }
protected function execute($value, EntityInterface $entity = null) { if (is_null($value)) { $value = ''; } if (!is_scalar($value)) { $this->throwError('invalid_type', ['value' => $value]); return false; } $rule = null; $value_type = gettype($value); switch ($value_type) { case 'integer': $rule = new IntegerRule('integer', $this->getIntegerOptions()); break; case 'float': // in case gettype returns 'float' in future versions of php, fall trough. // in case gettype returns 'float' in future versions of php, fall trough. case 'double': $rule = new FloatRule('float', $this->getFloatOptions()); break; case 'boolean': $rule = new BooleanRule('boolean', $this->getOptions()); break; case 'string': $rule = new TextRule('text', $this->getOptions()); break; default: $this->throwError('invalid_type', ['value' => $value]); return false; } // validate value to be a valid string, integer, float or boolean if (!$rule->apply($value)) { $this->throwIncidentsAsErrors($rule); return false; } $value = $rule->getSanitizedValue(); $this->setSanitizedValue($value); return true; }
#!/usr/bin/env php <?php use Trellis\Runtime\Validator\Rule\Type\TextRule; require_once dirname(__DIR__) . '/vendor/autoload.php'; $rule = new TextRule('text', []); $times = []; $num = 10; $repeats = 100000; $string = ' this is a test string w/o very special characters… '; $string_x20 = str_repeat($string, 20); for ($t = 0; $t < $num; $t++) { $start = microtime(true); for ($i = 0; $i < $repeats; $i++) { $rule->apply($string_x20); } $end = microtime(true); $times[$t] = round(($end - $start) * 1000, 3); echo $times[$t] . 'ms '; } $sum = 0; for ($i = 0; $i < $num; $i++) { $sum += $times[$i]; } echo PHP_EOL . 'Average time for ' . $repeats . ' text rule validations: ' . round($sum / $num, 3) . PHP_EOL; // String: ' this is a test string w/o very special characters… ' & the same x20 // is_string only in TextRule: avg=183ms => w/ 20x longer text: avg=183ms // // preg_match => 10049.92 – preg_replace => 11683.078 – replace ".*?" with "[^\pC\pZ]*" => 5148ms (tests fail) // // default options on trimmable string: avg=3267ms 3205ms => w/ 20x longer text: avg=10290ms // default options on trimmable string w/o toBoolean: avg=2660ms, 2690ms => w/ 20x longer text: avg=9777ms, 9885ms