public function testFind() { // ASCII. $this->assertTrue(CRegex::find("Hello there!", "/[^\\w ]/")); $foundString; $found = CRegex::find("Hello there!", "/[^\\w ]/", $foundString); $this->assertTrue($found && CString::equals($foundString, "!")); $foundString; $this->assertFalse(CRegex::find("Hello there!", "/\\d/", $foundString)); // Unicode. $this->assertTrue(CRegex::find("¡Hello there!", "/[^\\p{L} ]/u")); $foundString; $found = CRegex::find("¡Hello There!", "/[^\\p{L} ]/u", $foundString); $this->assertTrue($found && CUString::equals($foundString, "¡")); $foundString; $this->assertFalse(CRegex::find("¡Hello there!", "/\\d/u", $foundString)); }
/** * Returns only the last extension of the file located at a specified path. * * For example, the last extension of "/path/to/file.tar.gz" is "gz". * * @param string $path The path to the file (can be absolute or relative). * * @return CUStringObject The last extension of the file. */ public static function lastExtension($path) { assert('is_cstring($path)', vs(isset($this), get_defined_vars())); assert('!CString::isEmpty($path)', vs(isset($this), get_defined_vars())); $foundString; if (CRegex::find($path, "/(?<=\\.)[^\\/.]+\\z/", $foundString)) { return $foundString; } else { return ""; } }
/** * Returns the version of the HTTP protocol used by the request. * * @return CUStringObject The version of the HTTP protocol used by the request. */ public static function protocolVersion() { $httpAndVer = $_SERVER["SERVER_PROTOCOL"]; $foundString; $res = CRegex::find($httpAndVer, "/(?<=\\/).*\\z/", $foundString); assert('$res', vs(isset($this), get_defined_vars())); return $foundString; }
/** * Returns the original value of a specified PHP's configuration option that is related to digital storage, as a * floating-point value. * * @param string $optionName The name of the option. * @param enum $inUnit **OPTIONAL. Default is** `CUUnit::BYTE`. The storage unit for the output value. * * @return float The value of the option. */ public static function configOptionOrigStorageFloat($optionName, $inUnit = CUUnit::BYTE) { assert('is_cstring($optionName) && is_enum($inUnit)', vs(isset($this), get_defined_vars())); $optionValue = self::configOptionOrig($optionName); $srcUnit = self::storageUnitFromOptionValue($optionValue); $strValue; $found = CRegex::find($optionValue, "/^\\d+/", $strValue); assert('$found', vs(isset($this), get_defined_vars())); $value = CString::toFloat($strValue); return CUUnit::convertStoragef($value, $srcUnit, $inUnit); }
/** * Determines if the URL in a specified string is valid. * * @param string $url The URL string to be looked into. * @param bool $ignoreProtocolAbsence **OPTIONAL. Default is** `false`. Tells whether the URL in the string may * still be considered valid even if it does not indicate any protocol. * * @return bool `true` if the URL in the string is valid, `false` otherwise. */ public static function isValid($url, $ignoreProtocolAbsence = false) { assert('is_cstring($url) && is_bool($ignoreProtocolAbsence)', vs(isset($this), get_defined_vars())); $parsedUrl = parse_url($url); if (!is_cmap($parsedUrl)) { return false; } if ($ignoreProtocolAbsence && !CMap::hasKey($parsedUrl, "scheme")) { // No protocol seems to be specified, try with the default one. $url = self::DEFAULT_PROTOCOL . "://{$url}"; $parsedUrl = parse_url($url); if (!is_cmap($parsedUrl)) { return false; } if (!CMap::hasKey($parsedUrl, "scheme")) { return false; } } if (is_cstring(filter_var($url, FILTER_VALIDATE_URL))) { return true; } else { if (CMap::hasKey($parsedUrl, "host")) { // The `filter_var` function could fail to recognize an IPv6 as the URL's host (enclosed in square // brackets), so, in case of a valid IPv6 being the host, replace it with an IPv4 and give the URL another // try. $host = $parsedUrl["host"]; if (CRegex::find($host, "/^\\[.*\\]\\z/")) { $host = CString::substr($host, 1, CString::length($host) - 2); if (CIp::isValidV6($host)) { // Should not influence the validity if the string is present anywhere else. $url = CString::replace($url, "[{$host}]", "127.0.0.1"); if (is_cstring(filter_var($url, FILTER_VALIDATE_URL)) && is_cmap(parse_url($url))) { return true; } } } } } return false; }
/** * Executes a command pipeline, e.g. "command1 | command2", and returns the output, issuing a specified error * message if the pipeline fails. * * Using this method instead of `execCommandM` method ensures that, if a command in the pipeline fails, so does the * entire pipeline. * * @param string $commands The command pipeline to be executed. * @param mixed $message The error message to be issued if the pipeline fails or the number of the line at which * the error occurred (obtained using `__LINE__` magic constant). In the latter case, the error message is * generated automatically. * @param bool $exitOnFail **OPTIONAL. Default is** `true`. Tells whether to exit the script if the pipeline did * not succeed. * * @return CUStringObject The output of the command pipeline. */ public static function execCommandPipeM($commands, $message, $exitOnFail = true) { assert('is_cstring($commands) && (is_cstring($message) || is_int($message)) && is_bool($exitOnFail)', vs(isset($this), get_defined_vars())); settype($message, "string"); $commandsSuccess; $output = self::execCommandPipe($commands, $commandsSuccess); if (!$commandsSuccess) { if (CRegex::find($message, "/^\\d+\\z/")) { // The message is a line number to be reported. $message = "The script encountered an error while executing a command pipe:" . "\n{$commands}\nReported line: {$message}."; } self::onError($exitOnFail, $message); } return $output; }
/** * Determines if a string contains any characters from the Chinese, Japanese, or Korean scripts. * * @param string $string The string to be looked into. * * @return bool `true` if the string contains at least one CJK character, `false` otherwise. */ public static function hasCjkChar($string) { // U+2E80-U+9FFF, U+F900-U+FAFF return CRegex::find($string, "/[\\x{2E80}-\\x{9FFF}\\x{F900}-\\x{FAFF}]/u"); }
protected function removeHeader($headerName) { $headerName = CString::trim($headerName); CArray::removeByValue($this->m_requestHeaders, $headerName, function ($element0, $element1) { return CRegex::find($element0, "/^\\h*" . CRegex::enterTd($element1) . "\\h*:/i"); }); }
/** * Returns the paths to the subdirectories found in a directory by a regular expression pattern, searching only in * the name of every contained subdirectory with the pattern, also looking into the subdirectories of the directory * and so on. * * The returned paths are always absolute. * * @param string $inDirectoryPath The path to the directory to be looked into (not required to end with "/"). * @param string $regexPattern The regular expression pattern to be used for searching. * @param bool $sort **OPTIONAL. Default is** `false`. Tells whether the returned paths should be sorted, in the * ascending order. * * @return CArrayObject The paths to the subdirectories found by the regular expression pattern specified, * including ones found in the subdirectories of the specified directory and so on. */ public static function reFindDirectoriesOnNameRecursive($inDirectoryPath, $regexPattern, $sort = false) { assert('is_cstring($inDirectoryPath) && is_cstring($regexPattern) && is_bool($sort)', vs(isset($this), get_defined_vars())); $inDirectoryPath = CFilePath::frameworkPath($inDirectoryPath); return oop_a(CArray::filter(self::listDirectoriesRecursive($inDirectoryPath, $sort), function ($path) use($regexPattern) { return CRegex::find(CFilePath::name($path), $regexPattern); })); }
/** * Determines if a string contains a specified regular expression pattern, optionally reporting the substring that * matched the pattern. * * @param string $findPattern The searched pattern. * @param reference $foundString **OPTIONAL. OUTPUT.** If the pattern has been found after the method was called * with this parameter provided, the parameter's value, which is of type `CUStringObject`, is the first substring * that matched the pattern. * * @return bool `true` if the pattern was found in the string, `false` otherwise. */ public function reFind($findPattern, &$foundString = null) { $findPattern = self::ensureUModifier($findPattern); $ret = CRegex::find($this, $findPattern, $foundString); $foundString = to_oop($foundString); return $ret; }
/** * Filters a string or a collection of strings according to the expected output type(s) and returns the output * value(s). * * @param mixed $inputStringOrDecodedCollection The string to be filtered or the array or map containing the * strings to be filtered. If the parameter's value is a JSON-encoded string, the output value is going to be * either an array or map. * @param reference $success **OUTPUT.** After the method is called, the value of this parameter tells whether * the filtering was successful. * * @return mixed The output value or a collection of values of the expected type(s) after having been put through * the filter. */ public function filter($inputStringOrDecodedCollection, &$success) { assert('is_cstring($inputStringOrDecodedCollection) || is_collection($inputStringOrDecodedCollection)', vs(isset($this), get_defined_vars())); $success = true; if ($this->m_expectedType != self::CARRAY && $this->m_expectedType != self::CMAP) { // The expected output type is not a collection; the input value must be of string type. if (!is_cstring($inputStringOrDecodedCollection)) { $success = false; return oop_x($this->m_defaultValue); } $inputString = $inputStringOrDecodedCollection; if ($this->m_expectedType == self::BOOL || $this->m_expectedType == self::INT || $this->m_expectedType == self::FLOAT || $this->m_expectedType == self::EMAIL || $this->m_expectedType == self::URL || $this->m_expectedType == self::IP) { // Trim the input string on both sides from whitespace, including Unicode whitespace and control // characters. $trimmingSubjectRe = CUString::TRIMMING_AND_SPACING_NORM_SUBJECT_RE; $inputString = CRegex::remove($inputString, "/^({$trimmingSubjectRe})+|({$trimmingSubjectRe})+\\z/u"); } // Pre-process the string for integer and floating-point types. $looksLikeHex; if ($this->m_expectedType == self::INT || $this->m_expectedType == self::FLOAT) { if (CString::startsWith($inputString, "+")) { // Remove the plus sign. $inputString = CString::substr($inputString, 1); } $looksLikeHex = CRegex::find($inputString, "/^-?0x/i"); if ($this->m_allowLeadingZeros && !($this->m_expectedType == self::INT && $this->m_allowHex && $looksLikeHex)) { // Remove any leading zeros (except for special cases). $inputString = CRegex::replace($inputString, "/^(\\D*)0*(?!\\b)/", "\$1"); } if ($this->m_allowComma) { $inputString = CRegex::remove($inputString, "/,(?=\\d{3}\\b)/"); } } // Validate and sanitize the value according to its expected type. if ($this->m_expectedType == self::BOOL) { if (!CRegex::find($inputString, "/^(1|true|yes|on|0|false|no|off)\\z/i")) { $success = false; return $this->m_defaultValue; } return CString::equals($inputString, "1") || CString::equalsCi($inputString, "true") || CString::equalsCi($inputString, "yes") || CString::equalsCi($inputString, "on"); } if ($this->m_expectedType == self::INT) { $value; if (!($this->m_allowHex && $looksLikeHex)) { // Regular. if (!CRegex::find($inputString, "/^-?(?!0(?!\\b))\\d+\\z/")) { $success = false; return $this->m_defaultValue; } $value = CString::toInt($inputString); } else { // Hex. if (!CRegex::find($inputString, "/^-?0x[0-9A-F]+\\z/i")) { $success = false; return $this->m_defaultValue; } $value = CString::toIntFromHex($inputString); } if (isset($this->m_intValidMin) && $value < $this->m_intValidMin || isset($this->m_intValidMax) && $value > $this->m_intValidMax) { $success = false; return $this->m_defaultValue; } if (isset($this->m_intClampingMin) && $value < $this->m_intClampingMin) { $value = $this->m_intClampingMin; } if (isset($this->m_intClampingMax) && $value > $this->m_intClampingMax) { $value = $this->m_intClampingMax; } return $value; } if ($this->m_expectedType == self::FLOAT) { if (!CRegex::find($inputString, "/^-?(?!0(?!\\b))\\d*\\.?\\d+(e[\\-+]?\\d+)?\\z/i")) { $success = false; return $this->m_defaultValue; } $value = CString::toFloat($inputString); if (isset($this->m_floatValidMin) && $value < $this->m_floatValidMin || isset($this->m_floatValidMax) && $value > $this->m_floatValidMax) { $success = false; return $this->m_defaultValue; } if (isset($this->m_floatClampingMin) && $value < $this->m_floatClampingMin) { $value = $this->m_floatClampingMin; } if (isset($this->m_floatClampingMax) && $value > $this->m_floatClampingMax) { $value = $this->m_floatClampingMax; } return $value; } if ($this->m_expectedType == self::CSTRING) { $value = $inputString; if (!CString::isValid($value)) { $success = false; return $this->m_defaultValue; } if (!$this->m_keepAbnormalNewlines) { $value = CString::normNewlines($value); } if (!$this->m_keepNonPrintable) { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/[\\x00-\\x1F\\x7F-\\xFF]/"); } else { $value = CRegex::remove($value, "/[\\x00-\\x1F\\x7F-\\xFF](?<![\\x09\\x0A\\x0D])/"); } } else { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/[\\x09\\x0A\\x0D]/"); } } if (!$this->m_keepSideSpacing) { $value = CString::trim($value); } if (!$this->m_keepExtraSpacing) { $value = CString::normSpacing($value); } return $value; } if ($this->m_expectedType == self::CUSTRING) { $value = $inputString; if (!CUString::isValid($value)) { $success = false; return $this->m_defaultValue; } if (!$this->m_keepAbnormalNewlines) { $value = CUString::normNewlines($value); } if (!$this->m_keepNonPrintable) { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/\\p{C}|\\p{Zl}|\\p{Zp}/u"); } else { $value = CRegex::remove($value, "/\\p{C}(?<!\\x{0009}|\\x{000A}|\\x{000D})/u"); } } else { if (!$this->m_keepTabsAndNewlines) { $value = CRegex::remove($value, "/\\x{0009}|\\x{000A}|\\x{000D}|\\p{Zl}|\\p{Zp}/u"); } } if (!$this->m_keepSideSpacing) { $value = CUString::trim($value); } if (!$this->m_keepExtraSpacing) { $value = CUString::normSpacing($value); } return $value; } if ($this->m_expectedType == self::EMAIL) { $value = filter_var($inputString, FILTER_VALIDATE_EMAIL); if (!is_cstring($value)) { $success = false; return $this->m_defaultValue; } return $value; } if ($this->m_expectedType == self::URL) { $value = $inputString; if (!CUrl::isValid($value, $this->m_ignoreProtocolAbsence)) { $success = false; return $this->m_defaultValue; } if ($this->m_ignoreProtocolAbsence) { $value = CUrl::ensureProtocol($value); } return $value; } if ($this->m_expectedType == self::IP) { $value = $inputString; $options = CBitField::ALL_UNSET; if (!$this->m_allowPrivateRange) { $options |= CIp::DISALLOW_PRIVATE_RANGE; } if (!$this->m_allowReservedRange) { $options |= CIp::DISALLOW_RESERVED_RANGE; } $isValid; if (!$this->m_ipV6 && !$this->m_ipV4OrV6) { $isValid = CIp::isValidV4($value, $options); } else { if (!$this->m_ipV4OrV6) { $isValid = CIp::isValidV6($value, $options); } else { $isValid = CIp::isValidV4($value, $options) || CIp::isValidV6($value, $options); } } if (!$isValid) { $success = false; return $this->m_defaultValue; } return $value; } } else { if ($this->m_expectedType == self::CARRAY) { if (!is_cstring($inputStringOrDecodedCollection) && !is_carray($inputStringOrDecodedCollection)) { $success = false; return oop_x($this->m_defaultValue); } $value; if (is_cstring($inputStringOrDecodedCollection)) { // Assume JSON format for the input string. $json = new CJson($inputStringOrDecodedCollection, $this->m_jsonStrictness); $value = $json->decode($success); if (!$success) { return oop_x($this->m_defaultValue); } if (!is_carray($value)) { $success = false; return oop_x($this->m_defaultValue); } } else { $value = $inputStringOrDecodedCollection; } $value = self::recurseCollectionFiltering($value, $this->m_collectionInputFilters, $success, 0); if (!$success) { return oop_x($this->m_defaultValue); } return $value; } else { if (!is_cstring($inputStringOrDecodedCollection) && !is_cmap($inputStringOrDecodedCollection)) { $success = false; return oop_x($this->m_defaultValue); } $value; if (is_cstring($inputStringOrDecodedCollection)) { // Assume JSON format for the input string. $json = new CJson($inputStringOrDecodedCollection, $this->m_jsonStrictness); $value = $json->decode($success); if (!$success) { return oop_x($this->m_defaultValue); } if (!is_cmap($value)) { $success = false; return oop_x($this->m_defaultValue); } } else { $value = $inputStringOrDecodedCollection; } $value = self::recurseCollectionFiltering($value, $this->m_collectionInputFilters, $success, 0); if (!$success) { return oop_x($this->m_defaultValue); } return $value; } } }
/** * Decodes the JSON-encoded string provided earlier to the decoder and returns the result. * * @param reference $success **OPTIONAL. OUTPUT.** After the method is called with this parameter provided, the * parameter's value tells whether the decoding was successful. * * @return mixed The decoded value of type `CMapObject` or `CArrayObject`. */ public function decode(&$success = null) { assert('is_cstring($this->m_source)', vs(isset($this), get_defined_vars())); $success = true; $source = $this->m_source; if ($this->m_decodingStrictness == self::LENIENT && !CUString::isValid($source)) { // Change the character encoding or try fixing it. if (CEString::looksLikeLatin1($source)) { $source = CEString::convertLatin1ToUtf8($source); } else { $source = CEString::fixUtf8($source); } } if ($this->m_decodingStrictness == self::STRICT_WITH_COMMENTS || $this->m_decodingStrictness == self::LENIENT) { if (CRegex::find($source, "/\\/\\/|\\/\\*/u")) { // Remove "//..." and "/*...*/" comments. $source = CRegex::remove($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "\\/\\/.*|\\/\\*\\C*?\\*\\//u"); } } if ($this->m_decodingStrictness == self::LENIENT) { if (CRegex::find($source, "/[:\\[,]\\s*'([^\\\\']++|\\\\{2}|\\\\\\C)*'(?=\\s*[,}\\]])/u")) { // Convert single-quoted string values into double-quoted, taking care of double quotes within such // strings before and single quotes after. This needs to go in front of the rest of the leniency fixes. while (true) { $prevSource = $source; $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([:\\[,]\\s*'(?:[^\\\\'\"]++|\\\\{2}|\\\\\\C)*)\"((?:[^\\\\']++|\\\\{2}|\\\\\\C)*')/u", "\$1\\\"\$2"); if (CString::equals($source, $prevSource) || is_null($source)) { break; } } if (is_null($source)) { $source = ""; } $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([:\\[,]\\s*)'((?:[^\\\\']++|\\\\{2}|\\\\\\C)*)'(?=\\s*[,}\\]])/u", "\$1\"\$2\""); while (true) { $prevSource = $source; $source = CRegex::replace($source, "/([:\\[,]\\s*\"(?:[^\\\\\"]++|\\\\{2}|\\\\[^'])*)\\\\'((?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\")" . "(?=\\s*[,}\\]])/u", "\$1'\$2"); if (CString::equals($source, $prevSource) || is_null($source)) { break; } } if (is_null($source)) { $source = ""; } } if (CRegex::find($source, "/[{,]\\s*[\\w\\-.]+\\s*:/u")) { // Put property names in double quotes. $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([{,]\\s*)([\\w\\-.]+)(\\s*:)/u", "\$1\"\$2\"\$3"); } if (CRegex::find($source, "/[{,]\\s*'[\\w\\-.]+'\\s*:/u")) { // Put property names that are in single quotes in double quotes. $source = CRegex::replace($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . "([{,]\\s*)'([\\w\\-.]+)'(\\s*:)/u", "\$1\"\$2\"\$3"); } if (CRegex::find($source, "/,\\s*[}\\]]/u")) { // Remove trailing commas. $source = CRegex::remove($source, "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"(*SKIP)(*FAIL)|" . ",(?=\\s*[}\\]])/u"); } // Within string values, convert byte values for BS, FF, LF, CR, and HT, which are prohibited in JSON, // to their escaped equivalents. $stringValueSubjectRe = "/(?<!\\\\)\"(?:[^\\\\\"]++|\\\\{2}|\\\\\\C)*\"/u"; $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{0008}/u", "\\b"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{000C}/u", "\\f"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{000A}/u", "\\n"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{000D}/u", "\\r"); }); $source = CRegex::replaceWithCallback($source, $stringValueSubjectRe, function ($matches) { return CRegex::replace($matches[0], "/\\x{0009}/u", "\\t"); }); } $decodedValue = @json_decode($source, false, self::$ms_maxRecursionDepth); if (is_null($decodedValue)) { if ($this->m_decodingStrictness == self::STRICT || $this->m_decodingStrictness == self::STRICT_WITH_COMMENTS) { $success = false; } else { if (CRegex::find($source, "/^\\s*[\\w.]+\\s*\\(/u")) { // The source string appears to be a JSONP. Extract the function's argument and try decoding again. $source = CRegex::replace($source, "/^\\s*[\\w.]+\\s*\\((\\C+)\\)/u", "\$1"); $decodedValue = @json_decode($source, false, self::$ms_maxRecursionDepth); if (is_null($decodedValue)) { $success = false; } } } } if (!$success) { return; } if ($this->m_decodingStrictness == self::STRICT || $this->m_decodingStrictness == self::STRICT_WITH_COMMENTS) { if (!is_object($decodedValue) && !is_array($decodedValue)) { $success = false; return; } } // Recursively convert any object into a CMapObject/CMap and any PHP array into a CArrayObject/CArray. $decodedValue = self::recurseValueAfterDecoding($decodedValue, 0); return $decodedValue; }