Esempio n. 1
0
 /**
  * @dataProvider provideInvalidUrls
  */
 public function testInvalidUrl($invalid, $assert_message = '')
 {
     $rule = new SpoofcheckerRule('url', []);
     $this->assertFalse($rule->apply($invalid), $assert_message . ' should be invalid');
     $this->assertNull($rule->getSanitizedValue(), $assert_message . ' should not be set as sanitized value');
 }
Esempio n. 2
0
 protected function execute($value, EntityInterface $entity = null)
 {
     if (!is_string($value)) {
         $this->throwError('non_string_value', ['value' => $value], IncidentInterface::CRITICAL);
         return false;
     }
     // @see http://hakipedia.com/index.php/Poison_Null_Byte
     $value = str_replace(chr(0), '', $value);
     // remove zero-width space character from text
     $value = str_replace("​", '', $value);
     // strip unicode characters 'RIGHT-TO-LEFT OVERRIDE' and 'LEFT-TO-RIGHT OVERRIDE'
     // which can be used to turn 'image[RTLO]gpj.exe' into 'imageexe.jpg'
     $value = str_replace("‮", '', $value);
     // 'RIGHT-TO-LEFT OVERRIDE'
     $value = str_replace("‭", '', $value);
     // 'LEFT-TO-RIGHT OVERRIDE'
     /**
      * Some links for illformed byte sequences etc.:
      *
      * @see http://php.net/manual/de/function.mb-check-encoding.php
      * @see http://www.w3.org/International/questions/qa-forms-utf-8.en.php
      * @see http://unicode.org/reports/tr36/#Ill-Formed_Subsequences
      * @see http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
      */
     // strip invalid utf8 characters
     // use mbstring here instead of iconv with '//ignore' – https://bugs.php.net/bug.php?id=61484
     // $value = iconv('UTF-8', 'UTF-8//IGNORE', $value);
     $prev = ini_set('mbstring.substitute_character', 'none');
     $value = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
     ini_set('mbstring.substitute_character', $prev);
     // trim the input string
     // note: '/(*UTF8)[[:alnum:]]/' matches 'é' while '/[[:alnum:]]/' does not
     $pattern = '/(*UTF8)^[\\pZ\\pC]*+(?P<trimmed>.*?)[\\pZ\\pC]*+$/usDS';
     if (preg_match($pattern, $value, $matches)) {
         $value = $matches['trimmed'];
     }
     // trim zero-width joiner and zero-width non-joiner (at the end of the text)
     // https://en.wikipedia.org/wiki/Zero-width_non-joiner
     $value = preg_replace("/‌\$/", '', $value);
     // zero-width non-joiner
     $value = preg_replace("/‍\$/", '', $value);
     // zero-width joiner
     // æ is a ligature in english but a distinct letter in icelandic and other languages
     // additionally remove some control characters
     // remove non-printable control characters including TAB, LINE FEED, CARRIAGE RETURN
     // $remove_pattern = "/[\x01-\x08\x09\x0A\x0B\x0C\x0D\x0E-\x1F\x7F]/u";
     $remove_chars = ["", "", "", "", "", "", "", "", "\t", "\n", "\v", "\f", "\r", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "\n", "\r\n", "\r"];
     $value = str_replace($remove_chars, '', $value);
     if (!is_string($value)) {
         $this->throwError('control_character_stripping_failed', [], IncidentInterface::CRITICAL);
         return false;
     }
     // TODO urldecode or similar to replace common %20 etc. patterns from URL downloading with characters?
     // solve relative paths like 'folder/../file.ext' – as we probably replace the '/' with '-'
     // anyways later on this might seem unnecessary, but leads to nicer filenames with less '-'
     do {
         $value = preg_replace('#[^/\\.]+/\\.\\./#', '', $value, -1, $count);
     } while ($count);
     $value = str_replace(['/./', '//'], '/', $value);
     // replace multiple occurrences of '.' with one '.'
     $value = preg_replace('/\\.{2,}/', '.', $value);
     $replace_special_chars = $this->getOption(self::OPTION_REPLACE_SPECIAL_CHARS, true);
     if ($replace_special_chars) {
         $replace_chars = ['#', '<', '$', '+', '%', '>', '!', '`', '&', '*', '‘', '|', '{', '?', '“', '=', '}', '/', ':', '\\', ' ', '@'];
         $replace_with = $this->getOption(self::OPTION_REPLACE_WITH, '-');
         $value = str_replace($replace_chars, $replace_with, $value);
         if (!is_string($value)) {
             $this->throwError('character_replacing_failed', [], IncidentInterface::CRITICAL);
             return false;
         }
     }
     // trim '.' and '-' (so regardless of LTR or RTL script the filename doesn't start
     // with a dot to prevent generating a hidden dotfile filename
     $value = trim($value, '.-');
     // check minimum string length
     if ($this->hasOption(self::OPTION_MIN_LENGTH)) {
         $min = filter_var($this->getOption(self::OPTION_MIN_LENGTH, -PHP_INT_MAX - 1), FILTER_VALIDATE_INT);
         if ($min === false) {
             throw new InvalidConfigException('Minimum string length specified is not interpretable as integer.');
         }
         if (mb_strlen($value) < $min) {
             $this->throwError(self::OPTION_MIN_LENGTH, [self::OPTION_MIN_LENGTH => $min, 'value' => $value]);
             return false;
         }
     }
     // check maximum string length
     if ($this->hasOption(self::OPTION_MAX_LENGTH)) {
         $max = filter_var($this->getOption(self::OPTION_MAX_LENGTH, PHP_INT_MAX), FILTER_VALIDATE_INT);
         if ($max === false) {
             throw new InvalidConfigException('Maximum string length specified is not interpretable as integer.');
         }
         if (mb_strlen($value) > $max) {
             $this->throwError(self::OPTION_MAX_LENGTH, [self::OPTION_MAX_LENGTH => $max, 'value' => $value]);
             return false;
         }
     }
     $lowercase = $this->getOption(self::OPTION_LOWERCASE, false);
     if ($lowercase) {
         $value = mb_strtolower($value, 'UTF-8');
         // TODO it's probably advisable to manually lowercase some more variants as mentioned
         // in this comment: http://php.net/manual/de/function.mb-strtolower.php#105753
         //$value = strtr($value, $additional_replacements);
     }
     $spoofcheck_resulting_value = $this->getOption(self::OPTION_SPOOFCHECK_RESULT, false);
     if ($spoofcheck_resulting_value) {
         $rule = new SpoofcheckerRule('spoofcheck-resulting-text', $this->getOptions());
         if (!$rule->apply($value)) {
             foreach ($rule->getIncidents() as $incident) {
                 $this->throwError($incident->getName(), $incident->getParameters(), $incident->getSeverity());
             }
             return false;
         } else {
             $value = $rule->getSanitizedValue();
         }
     }
     $this->setSanitizedValue($value);
     return true;
 }
Esempio n. 3
0
 protected function execute($value, EntityInterface $entity = null)
 {
     if (!is_string($value)) {
         $this->throwError('non_string_value', ['value' => $value], IncidentInterface::CRITICAL);
         return false;
     }
     $spoofcheck_incoming_value = $this->getOption(self::OPTION_SPOOFCHECK_INCOMING, false);
     if ($spoofcheck_incoming_value) {
         $rule = new SpoofcheckerRule('spoofcheck-incoming-text', $this->getOptions());
         if (!$rule->apply($value)) {
             foreach ($rule->getIncidents() as $incident) {
                 $this->throwError($incident->getName(), $incident->getParameters(), $incident->getSeverity());
             }
             return false;
         } else {
             $value = $rule->getSanitizedValue();
         }
     }
     // @see http://hakipedia.com/index.php/Poison_Null_Byte
     $strip_null_bytes = $this->getOption(self::OPTION_STRIP_NULL_BYTES, true);
     if ($strip_null_bytes) {
         $value = str_replace(chr(0), '', $value);
     }
     // remove zero-width space character from text
     $strip_zero_width_space = $this->getOption(self::OPTION_STRIP_ZERO_WIDTH_SPACE, false);
     if ($strip_zero_width_space) {
         $value = str_replace("​", '', $value);
     }
     // strip unicode characters 'RIGHT-TO-LEFT OVERRIDE' and 'LEFT-TO-RIGHT OVERRIDE' if necessary
     $strip_direction_overrides = $this->getOption(self::OPTION_STRIP_DIRECTION_OVERRIDES, false);
     if ($strip_direction_overrides) {
         $value = str_replace("‮", '', $value);
         // 'RIGHT-TO-LEFT OVERRIDE'
         $value = str_replace("‭", '', $value);
         // 'LEFT-TO-RIGHT OVERRIDE'
     }
     // TODO should one allow trimming of zero-width non-joiner (only at the end of text)?
     /**
      * Some links for illformed byte sequences etc.:
      *
      * @see http://php.net/manual/de/function.mb-check-encoding.php
      * @see http://www.w3.org/International/questions/qa-forms-utf-8.en.php
      * @see http://unicode.org/reports/tr36/#Ill-Formed_Subsequences
      * @see http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
      */
     // check for a valid utf8 string without certain byte sequences
     $reject_invalid_utf8 = $this->getOption(self::OPTION_REJECT_INVALID_UTF8, true);
     if ($reject_invalid_utf8) {
         if (!mb_check_encoding($value, 'UTF-8')) {
             $this->throwError('invalid_utf8', ['value' => $value, 'converted_value' => mb_convert_encoding($value, 'UTF-8', 'UTF-8')], IncidentInterface::CRITICAL);
             return false;
         }
     }
     // strip invalid utf8 characters
     // the stripping might not work as good as expected depending on php bugs etc.
     $strip_invalid_utf8 = $this->getOption(self::OPTION_STRIP_INVALID_UTF8, true);
     if ($strip_invalid_utf8) {
         // use mbstring here instead of iconv with '//ignore' – https://bugs.php.net/bug.php?id=61484
         // $value = iconv('UTF-8', 'UTF-8//IGNORE', $value);
         // might be relevant as well: https://bugs.php.net/bug.php?id=65045
         $prev = ini_set('mbstring.substitute_character', 'none');
         $value = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
         ini_set('mbstring.substitute_character', $prev);
     }
     // trim the input string if necessary
     // this might actually not trim a lot when invalid utf8 is left from prior steps
     if ($this->getOption(self::OPTION_TRIM, true)) {
         //$value = trim($value);
         // note: '/(*UTF8)[[:alnum:]]/' matches 'é' while '/[[:alnum:]]/' does not
         // \p{Z}: any kind of whitespace or invisible separator
         // \p{C}: invisible control characters and unused code points
         // "*+" is not a mistake, but a possessive quantifier
         // @see http://www.regular-expressions.info/unicode.html
         $pattern = '/(*UTF8)^[\\pZ\\pC]*+(?P<trimmed>.*?)[\\pZ\\pC]*+$/usDS';
         if (preg_match($pattern, $value, $matches)) {
             $value = $matches['trimmed'];
         }
     }
     $sanitized_value = $value;
     // additionally remove some control characters
     $strip_ctrl_chars = $this->getOption(self::OPTION_STRIP_CONTROL_CHARACTERS, true);
     if ($strip_ctrl_chars) {
         // remove non-printable control characters, but MAYBE allow TAB, LINE FEED, CARRIAGE RETURN
         // $remove_pattern = "/[\x01-\x08\x09\x0A\x0B\x0C\x0D\x0E-\x1F\x7F]/u";
         $remove_chars = ["", "", "", "", "", "", "", "", "\t", "\n", "\v", "\f", "\r", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""];
         $allow_tab = $this->getOption(self::OPTION_ALLOW_TAB, true);
         if ($allow_tab) {
             unset($remove_chars[8]);
             // "\x09"
         }
         $allow_crlf = $this->getOption(self::OPTION_ALLOW_CRLF, false);
         if ($allow_crlf) {
             unset($remove_chars[9]);
             // "\x0A"
             unset($remove_chars[12]);
             // "\x0D"
         }
         $sanitized_value = str_replace($remove_chars, '', $value);
         if (!is_string($sanitized_value)) {
             $this->throwError('control_character_stripping_failed', [], IncidentInterface::CRITICAL);
             return false;
         }
     }
     $normalize_newlines = $this->getOption(self::OPTION_NORMALIZE_NEWLINES, false);
     if ($normalize_newlines) {
         $sanitized_value = str_replace(["\r\n", "\r"], "\n", $sanitized_value);
         if (!is_string($sanitized_value)) {
             $this->throwError('normalizing_newlines_failed', [], IncidentInterface::CRITICAL);
             return false;
         }
     }
     // check minimum string length
     if ($this->hasOption(self::OPTION_MIN_LENGTH)) {
         $min = filter_var($this->getOption(self::OPTION_MIN_LENGTH, -PHP_INT_MAX - 1), FILTER_VALIDATE_INT);
         if ($min === false) {
             throw new InvalidConfigException('Minimum string length specified is not interpretable as integer.');
         }
         if (mb_strlen($sanitized_value) < $min) {
             $this->throwError(self::OPTION_MIN_LENGTH, [self::OPTION_MIN_LENGTH => $min, 'value' => $sanitized_value]);
             return false;
         }
     }
     // check maximum string length
     if ($this->hasOption(self::OPTION_MAX_LENGTH)) {
         $max = filter_var($this->getOption(self::OPTION_MAX_LENGTH, PHP_INT_MAX), FILTER_VALIDATE_INT);
         if ($max === false) {
             throw new InvalidConfigException('Maximum string length specified is not interpretable as integer.');
         }
         if (mb_strlen($sanitized_value) > $max) {
             $this->throwError(self::OPTION_MAX_LENGTH, [self::OPTION_MAX_LENGTH => $max, 'value' => $sanitized_value]);
             return false;
         }
     }
     $spoofcheck_resulting_value = $this->getOption(self::OPTION_SPOOFCHECK_RESULT, false);
     if ($spoofcheck_resulting_value) {
         $rule = new SpoofcheckerRule('spoofcheck-resulting-text', $this->getOptions());
         if (!$rule->apply($sanitized_value)) {
             foreach ($rule->getIncidents() as $incident) {
                 $this->throwError($incident->getName(), $incident->getParameters(), $incident->getSeverity());
             }
             return false;
         } else {
             $sanitized_value = $rule->getSanitizedValue();
         }
     }
     $this->setSanitizedValue($sanitized_value);
     return true;
 }