public static function toJS($regexp, $isGlobal = \false)
 {
     $regexpInfo = RegexpParser::parse($regexp);
     $dotAll = \strpos($regexpInfo['modifiers'], 's') !== \false;
     $regexp = '';
     $pos = 0;
     foreach ($regexpInfo['tokens'] as $tok) {
         $regexp .= self::convertUnicodeCharacters(\substr($regexpInfo['regexp'], $pos, $tok['pos'] - $pos), \false, $dotAll);
         switch ($tok['type']) {
             case 'option':
                 if ($tok['options'] !== 'J') {
                     throw new RuntimeException('Regexp options are not supported');
                 }
                 break;
             case 'capturingSubpatternStart':
                 $regexp .= '(';
                 break;
             case 'nonCapturingSubpatternStart':
                 if (!empty($tok['options'])) {
                     throw new RuntimeException('Subpattern options are not supported');
                 }
                 $regexp .= '(?:';
                 break;
             case 'capturingSubpatternEnd':
             case 'nonCapturingSubpatternEnd':
                 $regexp .= ')' . \substr($tok['quantifiers'], 0, 1);
                 break;
             case 'characterClass':
                 $regexp .= '[';
                 $regexp .= self::convertUnicodeCharacters($tok['content'], \true, \false);
                 $regexp .= ']' . \substr($tok['quantifiers'], 0, 1);
                 break;
             case 'lookaheadAssertionStart':
                 $regexp .= '(?=';
                 break;
             case 'negativeLookaheadAssertionStart':
                 $regexp .= '(?!';
                 break;
             case 'lookaheadAssertionEnd':
             case 'negativeLookaheadAssertionEnd':
                 $regexp .= ')';
                 break;
             default:
                 throw new RuntimeException("Unsupported token type '" . $tok['type'] . "'");
         }
         $pos = $tok['pos'] + $tok['len'];
     }
     $regexp .= self::convertUnicodeCharacters(\substr($regexpInfo['regexp'], $pos), \false, $dotAll);
     if ($regexpInfo['delimiter'] !== '/') {
         $regexp = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*+)/#', '$1\\/', $regexp);
     }
     $modifiers = \preg_replace('#[^im]#', '', $regexpInfo['modifiers']);
     if ($isGlobal) {
         $modifiers .= 'g';
     }
     return '/' . self::escapeLineTerminators($regexp) . '/' . $modifiers;
 }
Example #2
0
 public function isSafeInCSS()
 {
     try {
         $regexp = RegexpParser::getAllowedCharacterRegexp($this->vars['regexp']);
         foreach (ContextSafeness::getDisallowedCharactersInCSS() as $char) {
             if (\preg_match($regexp, $char)) {
                 return \false;
             }
         }
         return \true;
     } catch (Exception $e) {
         return \false;
     }
 }
Example #3
0
 /**
  * Return all the named captures with a standalone regexp that matches them
  *
  * @return array Array of [capture name => regexp]
  */
 public function getNamedCaptures()
 {
     $captures = [];
     $regexpInfo = RegexpParser::parse($this->regexp);
     // Prepare the start/end of the regexp and ensure that we use the D modifier
     $start = $regexpInfo['delimiter'] . '^';
     $end = '$' . $regexpInfo['delimiter'] . $regexpInfo['modifiers'];
     if (strpos($regexpInfo['modifiers'], 'D') === false) {
         $end .= 'D';
     }
     foreach ($this->getNamedCapturesExpressions($regexpInfo['tokens']) as $name => $expr) {
         $captures[$name] = $start . $expr . $end;
     }
     return $captures;
 }
Example #4
0
 /**
  * {@inheritdoc}
  */
 public function isSafeInCSS()
 {
     try {
         // Test whether this regexp could allow any character that's disallowed in URLs
         $regexp = RegexpParser::getAllowedCharacterRegexp($this->vars['regexp']);
         foreach (ContextSafeness::getDisallowedCharactersInCSS() as $char) {
             if (preg_match($regexp, $char)) {
                 return false;
             }
         }
         return true;
     } catch (Exception $e) {
         // If anything unexpected happens, we'll consider this filter is not safe
         return false;
     }
 }
Example #5
0
 /**
  * Parse a regexp and return its info
  *
  * @param  string $regexp
  * @return array
  */
 protected function getRegexpInfo($regexp)
 {
     $valid = false;
     try {
         $valid = @preg_match_all($regexp, '', $m);
     } catch (Exception $e) {
         // Nothing to do here
     }
     if ($valid === false) {
         throw new InvalidArgumentException('Invalid regexp');
     }
     return RegexpParser::parse($regexp);
 }
Example #6
0
 /**
  * Make an entire regexp optional through the use of the ? quantifier
  *
  * @param  string $regexp
  * @return string
  */
 protected static function makeRegexpOptional($regexp)
 {
     // .+ and .+? become .* and .*?
     if (preg_match('#^\\.\\+\\??$#', $regexp)) {
         return str_replace('+', '*', $regexp);
     }
     // Special case: xx? becomes x?x?, \w\w? becomes \w?\w?
     // It covers only the most common case of repetition, it's not a panacea
     if (preg_match('#^(\\\\?.)((?:\\1\\?)+)$#Du', $regexp, $m)) {
         return $m[1] . '?' . $m[2];
     }
     // Optional assertions are a no-op
     if (preg_match('#^(?:[$^]|\\\\[bBAZzGQEK])$#', $regexp)) {
         return '';
     }
     // One single character, optionally escaped
     if (preg_match('#^\\\\?.$#Dus', $regexp)) {
         $isAtomic = true;
     } elseif (preg_match('#^[^[(].#s', $regexp)) {
         $isAtomic = false;
     } else {
         $def = RegexpParser::parse('#' . $regexp . '#');
         $tokens = $def['tokens'];
         switch (count($tokens)) {
             // One character class
             case 1:
                 $startPos = $tokens[0]['pos'];
                 $len = $tokens[0]['len'];
                 $isAtomic = (bool) ($startPos === 0 && $len === strlen($regexp));
                 // If the regexp is [..]+ it becomes [..]* (to which a ? will be appended)
                 if ($isAtomic && $tokens[0]['type'] === 'characterClass') {
                     $regexp = rtrim($regexp, '+*?');
                     if (!empty($tokens[0]['quantifiers']) && $tokens[0]['quantifiers'] !== '?') {
                         $regexp .= '*';
                     }
                 }
                 break;
                 // One subpattern covering the entire regexp
             // One subpattern covering the entire regexp
             case 2:
                 if ($tokens[0]['type'] === 'nonCapturingSubpatternStart' && $tokens[1]['type'] === 'nonCapturingSubpatternEnd') {
                     $startPos = $tokens[0]['pos'];
                     $len = $tokens[1]['pos'] + $tokens[1]['len'];
                     $isAtomic = (bool) ($startPos === 0 && $len === strlen($regexp));
                     // If the tokens are not a non-capturing subpattern, we let it fall through
                     break;
                 }
                 // no break; here
             // no break; here
             default:
                 $isAtomic = false;
         }
     }
     if (!$isAtomic) {
         $regexp = (self::canUseAtomicGrouping($regexp) ? '(?>' : '(?:') . $regexp . ')';
     }
     $regexp .= '?';
     return $regexp;
 }
 /**
  * @testdox getCaptureNames() works
  * @dataProvider getGetCaptureNamesTests
  */
 public function testGetCaptureNamesTests($regexp, array $expected)
 {
     $this->assertSame($expected, RegexpParser::getCaptureNames($regexp));
 }