/** * Filters a string or a collection of strings according to the expected output type(s) and returns the output * value(s). * * @param mixed $inputStringOrDecodedCollection The string to be filtered or the array or map containing the * strings to be filtered. If the parameter's value is a JSON-encoded string, the output value is going to be * either an array or map. * @param reference $success **OUTPUT.** After the method is called, the value of this parameter tells whether * the filtering was successful. * * @return mixed The output value or a collection of values of the expected type(s) after having been put through * the filter. */ public function filter($inputStringOrDecodedCollection, &$success) { assert('is_cstring($inputStringOrDecodedCollection) || is_collection($inputStringOrDecodedCollection)', vs(isset($this), get_defined_vars())); $success = true; if ($this->m_expectedType != self::CARRAY && $this->m_expectedType != self::CMAP) { // The expected output type is not a collection; the input value must be of string type. if (!is_cstring($inputStringOrDecodedCollection)) { $success = false; return oop_x($this->m_defaultValue); } $inputString = $inputStringOrDecodedCollection; if ($this->m_expectedType == self::BOOL || $this->m_expectedType == self::INT || $this->m_expectedType == self::FLOAT || $this->m_expectedType == self::EMAIL || $this->m_expectedType == self::URL || $this->m_expectedType == self::IP) { // Trim the input string on both sides from whitespace, including Unicode whitespace and control // characters. $trimmingSubjectRe = CUString::TRIMMING_AND_SPACING_NORM_SUBJECT_RE; $inputString = CRegex::remove($inputString, "/^({$trimmingSubjectRe})+|({$trimmingSubjectRe})+\\z/u"); } // Pre-process the string for integer and floating-point types. $looksLikeHex; if ($this->m_expectedType == self::INT || $this->m_expectedType == self::FLOAT) { if (CString::startsWith($inputString, "+")) { // Remove the plus sign. $inputString = CString::substr($inputString, 1); } $looksLikeHex = CRegex::find($inputString, "/^-?0x/i"); if ($this->m_allowLeadingZeros && !($this->m_expectedType == self::INT && $this->m_allowHex && $looksLikeHex)) { // Remove any leading zeros (except for special cases). $inputString = CRegex::replace($inputString, "/^(\\D*)0*(?!\\b)/", "\$1"); } if ($this->m_allowComma) { $inputString = CRegex::remove($inputString, "/,(?=\\d{3}\\b)/"); } } // Validate and sanitize the value according to its expected type. if ($this->m_expectedType == self::BOOL) { if (!CRegex::find($inputString, "/^(1|true|yes|on|0|false|no|off)\\z/i")) { $success = false; return $this->m_defaultValue; } return CString::equals($inputString, "1") || CString::equalsCi($inputString, "true") || CString::equalsCi($inputString, "yes") || CString::equalsCi($inputString, "on"); } if ($this->m_expectedType == self::INT) { $value; if (!($this->m_allowHex && $looksLikeHex)) { // Regular. if (!CRegex::find($inputString, "/^-?(?!0(?!\\b))\\d+\\z/")) { $success = false; return $this->m_defaultValue; } $value = CString::toInt($inputString); } else { // Hex. if (!CRegex::find($inputString, "/^-?0x[0-9A-F]+\\z/i")) { $success = false; return $this->m_defaultValue; } $value = CString::toIntFromHex($inputString); } if (isset($this->m_intValidMin) && $value < $this->m_intValidMin || isset($this->m_intValidMax) && $value > $this->m_intValidMax) { $success = false; return $this->m_defaultValue; } if (isset($this->m_intClampingMin) && $value < $this->m_intClampingMin) { $value = $this->m_intClampingMin; } if (isset($this->m_intClampingMax) && $value > $this->m_intClampingMax) { $value = $this->m_intClampingMax; } return $value; } if ($this->m_expectedType == self::FLOAT) { if (!CRegex::find($inputString, "/^-?(?!0(?!\\b))\\d*\\.?\\d+(e[\\-+]?\\d+)?\\z/i")) { $success = false; return $this->m_defaultValue; } $value = CString::toFloat($inputString); if (isset($this->m_floatValidMin) && $value < $this->m_floatValidMin || isset($this->m_floatValidMax) && $value > $this->m_floatValidMax) { $success = false; return $this->m_defaultValue; } if (isset($this->m_floatClampingMin) && $value < $this->m_floatClampingMin) { $value = $this->m_floatClampingMin; } if (isset($this->m_floatClampingMax) && $value > $this->m_floatClampingMax) { $value = $this->m_floatClampingMax; } return $value; } if ($this->m_expectedType == self::CSTRING) { $value = $inputString; if (!CString::isValid($value)) { $success = false; return $this->m_defaultValue; } if (!$this->m_keepAbnormalNewlines) { $value = CString::normNewlines($value); } if (!$this->m_keepNonPrintable) { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/[\\x00-\\x1F\\x7F-\\xFF]/"); } else { $value = CRegex::remove($value, "/[\\x00-\\x1F\\x7F-\\xFF](?<![\\x09\\x0A\\x0D])/"); } } else { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/[\\x09\\x0A\\x0D]/"); } } if (!$this->m_keepSideSpacing) { $value = CString::trim($value); } if (!$this->m_keepExtraSpacing) { $value = CString::normSpacing($value); } return $value; } if ($this->m_expectedType == self::CUSTRING) { $value = $inputString; if (!CUString::isValid($value)) { $success = false; return $this->m_defaultValue; } if (!$this->m_keepAbnormalNewlines) { $value = CUString::normNewlines($value); } if (!$this->m_keepNonPrintable) { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/\\p{C}|\\p{Zl}|\\p{Zp}/u"); } else { $value = CRegex::remove($value, "/\\p{C}(?<!\\x{0009}|\\x{000A}|\\x{000D})/u"); } } else { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/\\x{0009}|\\x{000A}|\\x{000D}|\\p{Zl}|\\p{Zp}/u"); } } if (!$this->m_keepSideSpacing) { $value = CUString::trim($value); } if (!$this->m_keepExtraSpacing) { $value = CUString::normSpacing($value); } return $value; } if ($this->m_expectedType == self::EMAIL) { $value = filter_var($inputString, FILTER_VALIDATE_EMAIL); if (!is_cstring($value)) { $success = false; return $this->m_defaultValue; } return $value; } if ($this->m_expectedType == self::URL) { $value = $inputString; if (!CUrl::isValid($value, $this->m_ignoreProtocolAbsence)) { $success = false; return $this->m_defaultValue; } if ($this->m_ignoreProtocolAbsence) { $value = CUrl::ensureProtocol($value); } return $value; } if ($this->m_expectedType == self::IP) { $value = $inputString; $options = CBitField::ALL_UNSET; if (!$this->m_allowPrivateRange) { $options |= CIp::DISALLOW_PRIVATE_RANGE; } if (!$this->m_allowReservedRange) { $options |= CIp::DISALLOW_RESERVED_RANGE; } $isValid; if (!$this->m_ipV6 && !$this->m_ipV4OrV6) { $isValid = CIp::isValidV4($value, $options); } else { if (!$this->m_ipV4OrV6) { $isValid = CIp::isValidV6($value, $options); } else { $isValid = CIp::isValidV4($value, $options) || CIp::isValidV6($value, $options); } } if (!$isValid) { $success = false; return $this->m_defaultValue; } return $value; } } else { if ($this->m_expectedType == self::CARRAY) { if (!is_cstring($inputStringOrDecodedCollection) && !is_carray($inputStringOrDecodedCollection)) { $success = false; return oop_x($this->m_defaultValue); } $value; if (is_cstring($inputStringOrDecodedCollection)) { // Assume JSON format for the input string. $json = new CJson($inputStringOrDecodedCollection, $this->m_jsonStrictness); $value = $json->decode($success); if (!$success) { return oop_x($this->m_defaultValue); } if (!is_carray($value)) { $success = false; return oop_x($this->m_defaultValue); } } else { $value = $inputStringOrDecodedCollection; } $value = self::recurseCollectionFiltering($value, $this->m_collectionInputFilters, $success, 0); if (!$success) { return oop_x($this->m_defaultValue); } return $value; } else { if (!is_cstring($inputStringOrDecodedCollection) && !is_cmap($inputStringOrDecodedCollection)) { $success = false; return oop_x($this->m_defaultValue); } $value; if (is_cstring($inputStringOrDecodedCollection)) { // Assume JSON format for the input string. $json = new CJson($inputStringOrDecodedCollection, $this->m_jsonStrictness); $value = $json->decode($success); if (!$success) { return oop_x($this->m_defaultValue); } if (!is_cmap($value)) { $success = false; return oop_x($this->m_defaultValue); } } else { $value = $inputStringOrDecodedCollection; } $value = self::recurseCollectionFiltering($value, $this->m_collectionInputFilters, $success, 0); if (!$success) { return oop_x($this->m_defaultValue); } return $value; } } }
/** * Determines if a string is a valid Unicode string encoded in UTF-8. * * @return bool `true` if the string is a valid Unicode string encoded in UTF-8, `false` otherwise. */ public function isValid() { return CUString::isValid($this); }
/** * Decodes the JSON-encoded string provided earlier to the decoder and returns the result. * * @param reference $success **OPTIONAL. OUTPUT.** After the method is called with this parameter provided, the * parameter's value tells whether the decoding was successful. * * @return mixed The decoded value of type `CMapObject` or `CArrayObject`. */ public function decode(&$success = null) { assert('is_cstring($this->m_source)', vs(isset($this), get_defined_vars())); $success = true; $source = $this->m_source; if ($this->m_decodingStrictness == self::LENIENT && !CUString::isValid($source)) { // Change the character encoding or try fixing it. if (CEString::looksLikeLatin1($source)) { $source = CEString::convertLatin1ToUtf8($source); } else { $source = CEString::fixUtf8($source); } } if ($this->m_decodingStrictness == self::STRICT_WITH_COMMENTS || $this->m_decodingStrictness == self::LENIENT) { if (CRegex::find($source, "/\\/\\/|\\/\\*/u")) { // Remove "//..." and "/*...*/" comments. $source = CRegex::remove($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "\\/\\/.*|\\/\\*\\C*?\\*\\//u"); } } if ($this->m_decodingStrictness == self::LENIENT) { if (CRegex::find($source, "/[:\\[,]\\s*'([^\\\\']++|\\\\{2}|\\\\\\C)*'(?=\\s*[,}\\]])/u")) { // Convert single-quoted string values into double-quoted, taking care of double quotes within such // strings before and single quotes after. This needs to go in front of the rest of the leniency fixes. while (true) { $prevSource = $source; $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([:\\[,]\\s*'(?:[^\\\\'\"]++|\\\\{2}|\\\\\\C)*)\"((?:[^\\\\']++|\\\\{2}|\\\\\\C)*')/u", "\$1\\\"\$2"); if (CString::equals($source, $prevSource) || is_null($source)) { break; } } if (is_null($source)) { $source = ""; } $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([:\\[,]\\s*)'((?:[^\\\\']++|\\\\{2}|\\\\\\C)*)'(?=\\s*[,}\\]])/u", "\$1\"\$2\""); while (true) { $prevSource = $source; $source = CRegex::replace($source, "/([:\\[,]\\s*\"(?:[^\\\\\"]++|\\\\{2}|\\\\[^'])*)\\\\'((?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\")" . "(?=\\s*[,}\\]])/u", "\$1'\$2"); if (CString::equals($source, $prevSource) || is_null($source)) { break; } } if (is_null($source)) { $source = ""; } } if (CRegex::find($source, "/[{,]\\s*[\\w\\-.]+\\s*:/u")) { // Put property names in double quotes. $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([{,]\\s*)([\\w\\-.]+)(\\s*:)/u", "\$1\"\$2\"\$3"); } if (CRegex::find($source, "/[{,]\\s*'[\\w\\-.]+'\\s*:/u")) { // Put property names that are in single quotes in double quotes. $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([{,]\\s*)'([\\w\\-.]+)'(\\s*:)/u", "\$1\"\$2\"\$3"); } if (CRegex::find($source, "/,\\s*[}\\]]/u")) { // Remove trailing commas. $source = CRegex::remove($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . ",(?=\\s*[}\\]])/u"); } // Within string values, convert byte values for BS, FF, LF, CR, and HT, which are prohibited in JSON, // to their escaped equivalents. $stringValueSubjectRe = "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"/u"; $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{0008}/u", "\\b"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{000C}/u", "\\f"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{000A}/u", "\\n"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{000D}/u", "\\r"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{0009}/u", "\\t"); }); } $decodedValue = @json_decode($source, false, self::$ms_maxRecursionDepth); if (is_null($decodedValue)) { if ($this->m_decodingStrictness == self::STRICT || $this->m_decodingStrictness == self::STRICT_WITH_COMMENTS) { $success = false; } else { if (CRegex::find($source, "/^\\s*[\\w.]+\\s*\\(/u")) { // The source string appears to be a JSONP. Extract the function's argument and try decoding again. $source = CRegex::replace($source, "/^\\s*[\\w.]+\\s*\\((\\C+)\\)/u", "\$1"); $decodedValue = @json_decode($source, false, self::$ms_maxRecursionDepth); if (is_null($decodedValue)) { $success = false; } } } } if (!$success) { return; } if ($this->m_decodingStrictness == self::STRICT || $this->m_decodingStrictness == self::STRICT_WITH_COMMENTS) { if (!is_object($decodedValue) && !is_array($decodedValue)) { $success = false; return; } } // Recursively convert any object into a CMapObject/CMap and any PHP array into a CArrayObject/CArray. $decodedValue = self::recurseValueAfterDecoding($decodedValue, 0); return $decodedValue; }