public static function toJS($regexp, $isGlobal = \false) { $regexpInfo = RegexpParser::parse($regexp); $dotAll = \strpos($regexpInfo['modifiers'], 's') !== \false; $regexp = ''; $pos = 0; foreach ($regexpInfo['tokens'] as $tok) { $regexp .= self::convertUnicodeCharacters(\substr($regexpInfo['regexp'], $pos, $tok['pos'] - $pos), \false, $dotAll); switch ($tok['type']) { case 'option': if ($tok['options'] !== 'J') { throw new RuntimeException('Regexp options are not supported'); } break; case 'capturingSubpatternStart': $regexp .= '('; break; case 'nonCapturingSubpatternStart': if (!empty($tok['options'])) { throw new RuntimeException('Subpattern options are not supported'); } $regexp .= '(?:'; break; case 'capturingSubpatternEnd': case 'nonCapturingSubpatternEnd': $regexp .= ')' . \substr($tok['quantifiers'], 0, 1); break; case 'characterClass': $regexp .= '['; $regexp .= self::convertUnicodeCharacters($tok['content'], \true, \false); $regexp .= ']' . \substr($tok['quantifiers'], 0, 1); break; case 'lookaheadAssertionStart': $regexp .= '(?='; break; case 'negativeLookaheadAssertionStart': $regexp .= '(?!'; break; case 'lookaheadAssertionEnd': case 'negativeLookaheadAssertionEnd': $regexp .= ')'; break; default: throw new RuntimeException("Unsupported token type '" . $tok['type'] . "'"); } $pos = $tok['pos'] + $tok['len']; } $regexp .= self::convertUnicodeCharacters(\substr($regexpInfo['regexp'], $pos), \false, $dotAll); if ($regexpInfo['delimiter'] !== '/') { $regexp = \preg_replace('#(?<!\\\\)((?:\\\\\\\\)*+)/#', '$1\\/', $regexp); } $modifiers = \preg_replace('#[^im]#', '', $regexpInfo['modifiers']); if ($isGlobal) { $modifiers .= 'g'; } return '/' . self::escapeLineTerminators($regexp) . '/' . $modifiers; }
public function isSafeInCSS() { try { $regexp = RegexpParser::getAllowedCharacterRegexp($this->vars['regexp']); foreach (ContextSafeness::getDisallowedCharactersInCSS() as $char) { if (\preg_match($regexp, $char)) { return \false; } } return \true; } catch (Exception $e) { return \false; } }
/** * Return all the named captures with a standalone regexp that matches them * * @return array Array of [capture name => regexp] */ public function getNamedCaptures() { $captures = []; $regexpInfo = RegexpParser::parse($this->regexp); // Prepare the start/end of the regexp and ensure that we use the D modifier $start = $regexpInfo['delimiter'] . '^'; $end = '$' . $regexpInfo['delimiter'] . $regexpInfo['modifiers']; if (strpos($regexpInfo['modifiers'], 'D') === false) { $end .= 'D'; } foreach ($this->getNamedCapturesExpressions($regexpInfo['tokens']) as $name => $expr) { $captures[$name] = $start . $expr . $end; } return $captures; }
/** * {@inheritdoc} */ public function isSafeInCSS() { try { // Test whether this regexp could allow any character that's disallowed in URLs $regexp = RegexpParser::getAllowedCharacterRegexp($this->vars['regexp']); foreach (ContextSafeness::getDisallowedCharactersInCSS() as $char) { if (preg_match($regexp, $char)) { return false; } } return true; } catch (Exception $e) { // If anything unexpected happens, we'll consider this filter is not safe return false; } }
/** * Parse a regexp and return its info * * @param string $regexp * @return array */ protected function getRegexpInfo($regexp) { $valid = false; try { $valid = @preg_match_all($regexp, '', $m); } catch (Exception $e) { // Nothing to do here } if ($valid === false) { throw new InvalidArgumentException('Invalid regexp'); } return RegexpParser::parse($regexp); }
/** * Make an entire regexp optional through the use of the ? quantifier * * @param string $regexp * @return string */ protected static function makeRegexpOptional($regexp) { // .+ and .+? become .* and .*? if (preg_match('#^\\.\\+\\??$#', $regexp)) { return str_replace('+', '*', $regexp); } // Special case: xx? becomes x?x?, \w\w? becomes \w?\w? // It covers only the most common case of repetition, it's not a panacea if (preg_match('#^(\\\\?.)((?:\\1\\?)+)$#Du', $regexp, $m)) { return $m[1] . '?' . $m[2]; } // Optional assertions are a no-op if (preg_match('#^(?:[$^]|\\\\[bBAZzGQEK])$#', $regexp)) { return ''; } // One single character, optionally escaped if (preg_match('#^\\\\?.$#Dus', $regexp)) { $isAtomic = true; } elseif (preg_match('#^[^[(].#s', $regexp)) { $isAtomic = false; } else { $def = RegexpParser::parse('#' . $regexp . '#'); $tokens = $def['tokens']; switch (count($tokens)) { // One character class case 1: $startPos = $tokens[0]['pos']; $len = $tokens[0]['len']; $isAtomic = (bool) ($startPos === 0 && $len === strlen($regexp)); // If the regexp is [..]+ it becomes [..]* (to which a ? will be appended) if ($isAtomic && $tokens[0]['type'] === 'characterClass') { $regexp = rtrim($regexp, '+*?'); if (!empty($tokens[0]['quantifiers']) && $tokens[0]['quantifiers'] !== '?') { $regexp .= '*'; } } break; // One subpattern covering the entire regexp // One subpattern covering the entire regexp case 2: if ($tokens[0]['type'] === 'nonCapturingSubpatternStart' && $tokens[1]['type'] === 'nonCapturingSubpatternEnd') { $startPos = $tokens[0]['pos']; $len = $tokens[1]['pos'] + $tokens[1]['len']; $isAtomic = (bool) ($startPos === 0 && $len === strlen($regexp)); // If the tokens are not a non-capturing subpattern, we let it fall through break; } // no break; here // no break; here default: $isAtomic = false; } } if (!$isAtomic) { $regexp = (self::canUseAtomicGrouping($regexp) ? '(?>' : '(?:') . $regexp . ')'; } $regexp .= '?'; return $regexp; }
/** * @testdox getCaptureNames() works * @dataProvider getGetCaptureNamesTests */ public function testGetCaptureNamesTests($regexp, array $expected) { $this->assertSame($expected, RegexpParser::getCaptureNames($regexp)); }