/** * Defined by Zend\Filter\Filter * * @param string $value * @return string */ public function filter($value) { // a unicode safe way of converting characters to \x00\x00 notation $pregQuotedSeparator = preg_quote($this->separator, '#'); if (StringUtils::hasPcreUnicodeSupport()) { $patterns = array('#(' . $pregQuotedSeparator . ')(\\p{L}{1})#u', '#(^\\p{Ll}{1})#u'); if (!extension_loaded('mbstring')) { $replacements = array(function ($matches) { return strtoupper($matches[2]); }, function ($matches) { return strtoupper($matches[1]); }); } else { $replacements = array(function ($matches) { return mb_strtoupper($matches[2], 'UTF-8'); }, function ($matches) { return mb_strtoupper($matches[1], 'UTF-8'); }); } } else { $patterns = array('#(' . $pregQuotedSeparator . ')([A-Za-z]{1})#', '#(^[A-Za-z]{1})#'); $replacements = array(function ($matches) { return strtoupper($matches[2]); }, function ($matches) { return strtoupper($matches[1]); }); } $filtered = $value; foreach ($patterns as $index => $pattern) { $filtered = preg_replace_callback($pattern, $replacements[$index], $filtered); } return $filtered; }
public function setUp() { if (!StringUtils::hasPcreUnicodeSupport()) { return $this->markTestSkipped('PCRE is not compiled with Unicode support'); } $this->reflection = new ReflectionProperty('Zend\\Stdlib\\StringUtils', 'hasPcreUnicodeSupport'); $this->reflection->setAccessible(true); $this->reflection->setValue(false); }
/** * Defined by Zend\Filter\Filter * * @param string $value * @return string */ public function filter($value) { if (StringUtils::hasPcreUnicodeSupport()) { $pattern = array('#(?<=(?:\\p{Lu}))(\\p{Lu}\\p{Ll})#', '#(?<=(?:\\p{Ll}|\\p{Nd}))(\\p{Lu})#'); $replacement = array($this->separator . '\\1', $this->separator . '\\1'); } else { $pattern = array('#(?<=(?:[A-Z]))([A-Z]+)([A-Z][a-z])#', '#(?<=(?:[a-z0-9]))([A-Z])#'); $replacement = array('\\1' . $this->separator . '\\2', $this->separator . '\\1'); } return preg_replace($pattern, $replacement, $value); }
/** * Defined by Zend\Filter\FilterInterface * * Returns the string $value, removing all but digit characters * * @param string $value * @return string */ public function filter($value) { if (!StringUtils::hasPcreUnicodeSupport()) { // POSIX named classes are not supported, use alternative 0-9 match $pattern = '/[^0-9]/'; } elseif (extension_loaded('mbstring')) { // Filter for the value with mbstring $pattern = '/[^[:digit:]]/'; } else { // Filter for the value without mbstring $pattern = '/[\\p{^N}]/'; } return preg_replace($pattern, '', (string) $value); }
/** * Defined by Zend\Filter\Filter * * @param string|array $value * @return string|array */ public function filter($value) { if (!is_scalar($value) && !is_array($value)) { return $value; } $value = parent::filter($value); $lowerCaseFirst = 'lcfirst'; if (StringUtils::hasPcreUnicodeSupport() && extension_loaded('mbstring')) { $lowerCaseFirst = function ($value) { if (0 === mb_strlen($value)) { return $value; } return mb_strtolower(mb_substr($value, 0, 1)) . mb_substr($value, 1); }; } return is_array($value) ? array_map($lowerCaseFirst, $value) : call_user_func($lowerCaseFirst, $value); }
/** * Defined by Zend\Filter\FilterInterface * * Returns the string $value, removing all but digit characters * * If the value provided is non-scalar, the value will remain unfiltered * and an E_USER_WARNING will be raised indicating it's unfilterable. * * @param string $value * @return string|mixed */ public function filter($value) { if (null === $value) { return null; } if (!is_scalar($value)) { trigger_error(sprintf('%s expects parameter to be scalar, "%s" given; cannot filter', __METHOD__, is_object($value) ? get_class($value) : gettype($value)), E_USER_WARNING); return $value; } if (!StringUtils::hasPcreUnicodeSupport()) { // POSIX named classes are not supported, use alternative 0-9 match $pattern = '/[^0-9]/'; } elseif (extension_loaded('mbstring')) { // Filter for the value with mbstring $pattern = '/[^[:digit:]]/'; } else { // Filter for the value without mbstring $pattern = '/[\\p{^N}]/'; } return preg_replace($pattern, '', (string) $value); }
protected function getKeywords($string) { $innerPattern = StringUtils::hasPcreUnicodeSupport() ? '[^\\p{L}]' : '[^a-z0-9ßäöü ]'; $pattern = '~' . $innerPattern . '~isu'; $stripPattern = '~^' . $innerPattern . '+|' . $innerPattern . '+$~isu'; $parts = array(); $textParts = explode(' ', $string); foreach ($textParts as $part) { $part = strtolower(trim($part)); $part = preg_replace($stripPattern, '', $part); if ('' == $part) { continue; } $parts[] = $part; $tmpPart = preg_replace($pattern, ' ', $part); if ($part != $tmpPart) { $tmpParts = explode(' ', $tmpPart); $tmpParts = array_filter($tmpParts); $parts = array_merge($parts, $tmpParts); } } return $parts; }
/** * Returns true if and only if $value is a number correctly expressed with the scientific notation * * Note that it can only validate string inputs. * * @param mixed $value * @return bool */ public function isValid($value) { if (!is_scalar($value) || is_bool($value)) { $this->error(self::INVALID_INPUT); return false; } $formatter = new \NumberFormatter($this->getLocale(), \NumberFormatter::SCIENTIFIC); $flags = 'i'; $expSymbol = 'E'; if (StringUtils::hasPcreUnicodeSupport()) { $expSymbol = preg_quote($formatter->getSymbol(\NumberFormatter::EXPONENTIAL_SYMBOL)); $flags .= 'u'; } // Check that exponentation symbol is present $search = str_replace("", '', sprintf('/%s/%s', $expSymbol, $flags)); $value = str_replace("", '', $value); if (!preg_match($search, $value)) { $this->error(self::NOT_SCIENTIFIC); return false; } // Check that the number expressed in scientific notation is a valid number $float = new IsFloat(['locale' => $this->getLocale()]); if (!$float->isValid($value)) { $this->error(self::NOT_NUMBER); return false; } return true; }
/** * Get all the regex components * * @return array */ public function getRegexComponents() { if ($this->regexComponents == null) { $this->regexComponents[self::REGEX_NUMBERS] = '0-9'; $this->regexComponents[self::REGEX_FLAGS] = ''; if (StringUtils::hasPcreUnicodeSupport()) { $this->regexComponents[self::REGEX_NUMBERS] = '\\p{N}'; $this->regexComponents[self::REGEX_FLAGS] .= 'u'; } } return $this->regexComponents; }
public function testHasPcreUnicodeSupport() { ErrorHandler::start(); $expected = defined('PREG_BAD_UTF8_OFFSET_ERROR') && preg_match('/\pL/u', 'a') == 1; ErrorHandler::stop(); $this->assertSame($expected, StringUtils::hasPcreUnicodeSupport()); }
/** * @return bool * @deprecated Since 2.1.0 */ public static function hasPcreUnicodeSupport() { return StringUtils::hasPcreUnicodeSupport(); }
/** * Returns true if and only if $value is a floating-point value. Uses the formal definition of a float as described * in the PHP manual: {@link http://www.php.net/float} * * @param string $value * @return bool * @throws Exception\InvalidArgumentException */ public function isValid($value) { if (!is_scalar($value) || is_bool($value)) { $this->error(self::INVALID); return false; } $this->setValue($value); if (is_float($value) || is_int($value)) { return true; } // Need to check if this is scientific formatted string. If not, switch to decimal. $formatter = new NumberFormatter($this->getLocale(), NumberFormatter::SCIENTIFIC); if (intl_is_failure($formatter->getErrorCode())) { throw new Exception\InvalidArgumentException($formatter->getErrorMessage()); } if (StringUtils::hasPcreUnicodeSupport()) { $exponentialSymbols = '[Ee' . $formatter->getSymbol(NumberFormatter::EXPONENTIAL_SYMBOL) . ']+'; $search = '/' . $exponentialSymbols . '/u'; } else { $exponentialSymbols = '[Ee]'; $search = '/' . $exponentialSymbols . '/'; } if (!preg_match($search, $value)) { $formatter = new NumberFormatter($this->getLocale(), NumberFormatter::DECIMAL); } /** * @desc There are seperator "look-alikes" for decimal and group seperators that are more commonly used than the * official unicode chracter. We need to replace those with the real thing - or remove it. */ $groupSeparator = $formatter->getSymbol(NumberFormatter::GROUPING_SEPARATOR_SYMBOL); $decSeparator = $formatter->getSymbol(NumberFormatter::DECIMAL_SEPARATOR_SYMBOL); //NO-BREAK SPACE and ARABIC THOUSANDS SEPARATOR if ($groupSeparator == " ") { $value = str_replace(' ', $groupSeparator, $value); } elseif ($groupSeparator == "٬") { //NumberFormatter doesn't have grouping at all for Arabic-Indic $value = str_replace(array('\'', $groupSeparator), '', $value); } //ARABIC DECIMAL SEPARATOR if ($decSeparator == "٫") { $value = str_replace(',', $decSeparator, $value); } $groupSeparatorPosition = $this->wrapper->strpos($value, $groupSeparator); $decSeparatorPosition = $this->wrapper->strpos($value, $decSeparator); //We have seperators, and they are flipped. i.e. 2.000,000 for en-US if ($groupSeparatorPosition && $decSeparatorPosition && $groupSeparatorPosition > $decSeparatorPosition) { return false; } //If we have Unicode support, we can use the real graphemes, otherwise, just the ASCII characters $decimal = '[' . preg_quote($decSeparator, '/') . ']'; $prefix = '[+-]'; $exp = $exponentialSymbols; $numberRange = '0-9'; $useUnicode = ''; $suffix = ''; if (StringUtils::hasPcreUnicodeSupport()) { $prefix = '[' . preg_quote($formatter->getTextAttribute(NumberFormatter::POSITIVE_PREFIX) . $formatter->getTextAttribute(NumberFormatter::NEGATIVE_PREFIX) . $formatter->getSymbol(NumberFormatter::PLUS_SIGN_SYMBOL) . $formatter->getSymbol(NumberFormatter::MINUS_SIGN_SYMBOL), '/') . ']{0,3}'; $suffix = $formatter->getTextAttribute(NumberFormatter::NEGATIVE_SUFFIX) ? '[' . preg_quote($formatter->getTextAttribute(NumberFormatter::POSITIVE_SUFFIX) . $formatter->getTextAttribute(NumberFormatter::NEGATIVE_SUFFIX) . $formatter->getSymbol(NumberFormatter::PLUS_SIGN_SYMBOL) . $formatter->getSymbol(NumberFormatter::MINUS_SIGN_SYMBOL), '/') . ']{0,3}' : ''; $numberRange = '\\p{N}'; $useUnicode = 'u'; } /** * @desc Match against the formal definition of a float. The * exponential number check is modified for RTL non-Latin number * systems (Arabic-Indic numbering). I'm also switching out the period * for the decimal separator. The formal definition leaves out +- from * the integer and decimal notations so add that. This also checks * that a grouping sperator is not in the last GROUPING_SIZE graphemes * of the string - i.e. 10,6 is not valid for en-US. * @see http://www.php.net/float */ $lnum = '[' . $numberRange . ']+'; $dnum = '(([' . $numberRange . ']*' . $decimal . $lnum . ')|(' . $lnum . $decimal . '[' . $numberRange . ']*))'; $expDnum = '((' . $prefix . '((' . $lnum . '|' . $dnum . ')' . $exp . $prefix . $lnum . ')' . $suffix . ')|' . '(' . $suffix . '(' . $lnum . $prefix . $exp . '(' . $dnum . '|' . $lnum . '))' . $prefix . '))'; // LEFT-TO-RIGHT MARK (U+200E) is messing up everything for the handful // of locales that have it $lnumSearch = str_replace("", '', '/^' . $prefix . $lnum . $suffix . '$/' . $useUnicode); $dnumSearch = str_replace("", '', '/^' . $prefix . $dnum . $suffix . '$/' . $useUnicode); $expDnumSearch = str_replace("", '', '/^' . $expDnum . '$/' . $useUnicode); $value = str_replace("", '', $value); $unGroupedValue = str_replace($groupSeparator, '', $value); // No strrpos() in wrappers yet. ICU 4.x doesn't have grouping size for // everything. ICU 52 has 3 for ALL locales. $groupSize = $formatter->getAttribute(NumberFormatter::GROUPING_SIZE) ? $formatter->getAttribute(NumberFormatter::GROUPING_SIZE) : 3; $lastStringGroup = $this->wrapper->substr($value, -$groupSize); if ((preg_match($lnumSearch, $unGroupedValue) || preg_match($dnumSearch, $unGroupedValue) || preg_match($expDnumSearch, $unGroupedValue)) && false === $this->wrapper->strpos($lastStringGroup, $groupSeparator)) { return true; } return false; }
/** * _inflectName * * @param string $name * @return string */ private function _inflectName($name) { if (StringUtils::hasPcreUnicodeSupport()) { $pattern = ['#(?<=(?:\\p{Lu}))(\\p{Lu}\\p{Ll})#', '#(?<=(?:\\p{Ll}|\\p{Nd}))(\\p{Lu})#']; $replacement = ['-\\1', '-\\1']; } else { $pattern = ['#(?<=(?:[A-Z]))([A-Z]+)([A-Z][a-z])#', '#(?<=(?:[a-z0-9]))([A-Z])#']; $replacement = ['\\1-\\2', '-\\1']; } $name = preg_replace($pattern, $replacement, $name); return strtolower($name); }
protected function camelCaseToUnderscore($value) { if (!is_scalar($value) && !is_array($value)) { return $value; } if (StringUtils::hasPcreUnicodeSupport()) { $pattern = ['#(?<=(?:\\p{Lu}))(\\p{Lu}\\p{Ll})#', '#(?<=(?:\\p{Ll}|\\p{Nd}))(\\p{Lu})#']; $replacement = ['_\\1', '_\\1']; } else { $pattern = ['#(?<=(?:[A-Z]))([A-Z]+)([A-Z][a-z])#', '#(?<=(?:[a-z0-9]))([A-Z])#']; $replacement = ['\\1_\\2', '_\\1']; } return preg_replace($pattern, $replacement, $value); }