public function setMap(array $map, $caseSensitive = \false, $strict = \false) { if (!\is_bool($caseSensitive)) { throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean'); } if (!\is_bool($strict)) { throw new InvalidArgumentException('Argument 3 passed to ' . __METHOD__ . ' must be a boolean'); } $this->resetSafeness(); if ($strict) { $this->assessSafeness($map); } $valueKeys = array(); foreach ($map as $key => $value) { $valueKeys[$value][] = $key; } $map = array(); foreach ($valueKeys as $value => $keys) { $regexp = RegexpBuilder::fromList($keys, array('delimiter' => '/', 'caseInsensitive' => !$caseSensitive)); $regexp = '/^' . $regexp . '$/D'; if (!$caseSensitive) { $regexp .= 'i'; } if (!\preg_match('#^[[:ascii:]]*$#D', $regexp)) { $regexp .= 'u'; } $map[] = array(new Regexp($regexp), $value); } if ($strict) { $map[] = array('//', \false); } $this->vars['map'] = $map; }
protected function parse($usage) { $tag = new Tag(); $bbcode = new BBCode(); $config = array('tag' => $tag, 'bbcode' => $bbcode, 'passthroughToken' => \null); $usage = \preg_replace_callback('#(\\{(?>HASH)?MAP=)([^:]+:[^,;}]+(?>,[^:]+:[^,;}]+)*)(?=[;}])#', function ($m) { return $m[1] . \base64_encode($m[2]); }, $usage); $usage = \preg_replace_callback('#(\\{(?:PARSE|REGEXP)=)(' . self::REGEXP . '(?:,' . self::REGEXP . ')*)#', function ($m) { return $m[1] . \base64_encode($m[2]); }, $usage); $regexp = '(^' . '\\[(?<bbcodeName>\\S+?)' . '(?<defaultAttribute>=\\S+?)?' . '(?<attributes>(?:\\s+[^=]+=\\S+?)*?)?' . '\\s*(?:/?\\]|\\]\\s*(?<content>.*?)\\s*(?<endTag>\\[/\\1]))$)i'; if (!\preg_match($regexp, \trim($usage), $m)) { throw new InvalidArgumentException('Cannot interpret the BBCode definition'); } $config['bbcodeName'] = BBCode::normalizeName($m['bbcodeName']); $definitions = \preg_split('#\\s+#', \trim($m['attributes']), -1, \PREG_SPLIT_NO_EMPTY); if (!empty($m['defaultAttribute'])) { \array_unshift($definitions, $m['bbcodeName'] . $m['defaultAttribute']); } if (!empty($m['content'])) { $regexp = '#^\\{' . RegexpBuilder::fromList($this->unfilteredTokens) . '[0-9]*\\}$#D'; if (\preg_match($regexp, $m['content'])) { $config['passthroughToken'] = \substr($m['content'], 1, -1); } else { $definitions[] = 'content=' . $m['content']; $bbcode->contentAttributes[] = 'content'; } } $attributeDefinitions = array(); foreach ($definitions as $definition) { $pos = \strpos($definition, '='); $name = \substr($definition, 0, $pos); $value = \substr($definition, 1 + $pos); $value = \preg_replace_callback('#(\\{(?>HASHMAP|MAP|PARSE|REGEXP)=)([A-Za-z0-9+/]+=*)#', function ($m) { return $m[1] . \base64_decode($m[2]); }, $value); if ($name[0] === '$') { $optionName = \substr($name, 1); $bbcode->{$optionName} = $this->convertValue($value); } elseif ($name[0] === '#') { $ruleName = \substr($name, 1); foreach (\explode(',', $value) as $value) { $tag->rules->{$ruleName}($this->convertValue($value)); } } else { $attrName = \strtolower(\trim($name)); $attributeDefinitions[] = array($attrName, $value); } } $tokens = $this->addAttributes($attributeDefinitions, $bbcode, $tag); if (isset($tokens[$config['passthroughToken']])) { $config['passthroughToken'] = \null; } $config['tokens'] = \array_filter($tokens); return $config; }
/** * Return a regexp that matches the list of hostnames * * @return string */ public function getRegexp() { $hosts = []; foreach ($this->items as $host) { $hosts[] = $this->normalizeHostmask($host); } $regexp = RegexpBuilder::fromList($hosts, ['specialChars' => ['*' => '.*', '^' => '^', '$' => '$']]); return '/' . $regexp . '/DSis'; }
public function asConfig() { $anchor = RegexpBuilder::fromList($this->configurator->urlConfig->getAllowedSchemes()) . '://'; if ($this->matchWww) { $anchor = '(?:' . $anchor . '|www\\.)'; } $config = array('attrName' => $this->attrName, 'regexp' => '#' . $anchor . '\\S(?>[^\\s\\[\\]]*(?>\\[\\w*\\])?)++#iS', 'tagName' => $this->tagName); if (!$this->matchWww) { $config['quickMatch'] = '://'; } return $config; }
public function asConfig() { if (!$this->captureURLs || !\count($this->collection)) { return; } $regexp = 'https?:\\/\\/'; $schemes = $this->getSchemes(); if (!empty($schemes)) { $regexp = '(?>' . RegexpBuilder::fromList($schemes) . ':|' . $regexp . ')'; } return array('quickMatch' => empty($schemes) ? '://' : ':', 'regexp' => '/\\b' . $regexp . '[^["\'\\s]+/S'); }
public function asConfig() { $config = array('attrName' => $this->attrName, 'tagName' => $this->tagName); if (!empty($this->aliases)) { $aliases = \array_keys($this->aliases); $regexp = '/' . RegexpBuilder::fromList($aliases) . '/'; $config['aliases'] = $this->aliases; $config['aliasesRegexp'] = new Regexp($regexp, \true); $quickMatch = ConfigHelper::generateQuickMatchFromList($aliases); if ($quickMatch !== \false) { $config['aliasesQuickMatch'] = $quickMatch; } } return $config; }
public function setValues(array $values, $caseSensitive = \false) { if (!\is_bool($caseSensitive)) { throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean'); } $regexp = RegexpBuilder::fromList($values, array('delimiter' => '/')); $regexp = '/^' . $regexp . '$/D'; if (!$caseSensitive) { $regexp .= 'i'; } if (!\preg_match('#^[[:ascii:]]*$#D', $regexp)) { $regexp .= 'u'; } $this->setRegexp($regexp); }
/** * Set the list of allowed values * * @param array $values List of allowed values * @param bool $caseSensitive Whether the choice is case-sensitive * @return void */ public function setValues(array $values, $caseSensitive = false) { if (!is_bool($caseSensitive)) { throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean'); } // Create a regexp based on the list of allowed values $regexp = RegexpBuilder::fromList($values, ['delimiter' => '/']); $regexp = '/^' . $regexp . '$/D'; // Add the case-insensitive flag if applicable if (!$caseSensitive) { $regexp .= 'i'; } // Add the Unicode flag if the regexp isn't purely ASCII if (!preg_match('#^[[:ascii:]]*$#D', $regexp)) { $regexp .= 'u'; } // Set the regexp associated with this list of values $this->setRegexp($regexp); }
/** * Set the content of this map * * @param array $map Associative array in the form [word => replacement] * @param bool $caseSensitive Whether this map is case-sensitive * @param bool $strict Whether this map is strict (values with no match are invalid) * @return void */ public function setMap(array $map, $caseSensitive = false, $strict = false) { if (!is_bool($caseSensitive)) { throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean'); } if (!is_bool($strict)) { throw new InvalidArgumentException('Argument 3 passed to ' . __METHOD__ . ' must be a boolean'); } // Reset the template safeness marks for the new map $this->resetSafeness(); // If the map is strict, we can assess its safeness if ($strict) { $this->assessSafeness($map); } // Group values by keys $valueKeys = []; foreach ($map as $key => $value) { $valueKeys[$value][] = $key; } // Now create a regexp and an entry in the map for each group $map = []; foreach ($valueKeys as $value => $keys) { $regexp = RegexpBuilder::fromList($keys, ['delimiter' => '/', 'caseInsensitive' => !$caseSensitive]); $regexp = '/^' . $regexp . '$/D'; // Add the case-insensitive flag if applicable if (!$caseSensitive) { $regexp .= 'i'; } // Add the Unicode flag if the regexp isn't purely ASCII if (!preg_match('#^[[:ascii:]]*$#D', $regexp)) { $regexp .= 'u'; } // Add the [regexp,value] pair to the map $map[] = [new Regexp($regexp), $value]; } // If the "strict" option is enabled, a catch-all regexp which replaces the value with FALSE // is appended to the list if ($strict) { $map[] = ['//', false]; } // Record the map in this filter's variables $this->vars['map'] = $map; }
public function asConfig() { if (!\count($this->collection)) { return; } $codes = \array_keys(\iterator_to_array($this->collection)); $regexp = '/'; if ($this->notAfter !== '') { $regexp .= '(?<!' . $this->notAfter . ')'; } $regexp .= RegexpBuilder::fromList($codes); if ($this->notBefore !== '') { $regexp .= '(?!' . $this->notBefore . ')'; } $regexp .= '/S'; if (\preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) { $regexp .= 'u'; } $regexp = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp); $config = array('quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName); if ($this->notAfter !== '') { $lpos = 6 + \strlen($this->notAfter); $rpos = \strrpos($regexp, '/'); $jsRegexp = RegexpConvertor::toJS('/' . \substr($regexp, $lpos, $rpos - $lpos) . '/', \true); $config['regexp'] = new Variant($regexp); $config['regexp']->set('JS', $jsRegexp); $config['notAfter'] = new Variant(); $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/')); } if ($this->quickMatch === \false) { $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes); } return $config; }
/** * {@inheritdoc} */ public function asConfig() { if (!count($this->collection)) { return; } $config = ['attrName' => $this->attrName, 'tagName' => $this->tagName]; if (!empty($this->onlyFirst)) { $config['onlyFirst'] = $this->onlyFirst; } // Sort keywords in order to keep keywords that start with the same characters together. We // also remove duplicates that would otherwise skew the length computation done below $keywords = array_unique(iterator_to_array($this->collection)); sort($keywords); // Group keywords by chunks of ~30KB to remain below PCRE's limit $groups = []; $groupKey = 0; $groupLen = 0; foreach ($keywords as $keyword) { // NOTE: the value 4 is a guesstimate for the cost of each alternation $keywordLen = 4 + strlen($keyword); $groupLen += $keywordLen; if ($groupLen > 30000) { $groupLen = $keywordLen; ++$groupKey; } $groups[$groupKey][] = $keyword; } foreach ($groups as $keywords) { $regexp = RegexpBuilder::fromList($keywords, ['caseInsensitive' => !$this->caseSensitive]); $regexp = '/\\b' . $regexp . '\\b/S'; if (!$this->caseSensitive) { $regexp .= 'i'; } if (preg_match('/[^[:ascii:]]/', $regexp)) { $regexp .= 'u'; } $config['regexps'][] = new Regexp($regexp, true); } return $config; }
/** * Generate a regexp that matches the given list of words * * @param array $words List of words * @return Variant Regexp in a Variant container, with a JS variant */ protected function getWordsRegexp(array $words) { $expr = RegexpBuilder::fromList($words, $this->regexpOptions); // Force atomic grouping for performance. Theorically it could prevent some matches but in // practice it shouldn't happen $expr = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr); // JavaScript regexps don't support Unicode properties, so instead of Unicode letters // we'll accept any non-whitespace, non-common punctuation $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu'; $jsRegexp = '/(?:^|\\W)' . str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi'; return new Variant($regexp, ['JS' => RegexpConvertor::toJS($jsRegexp, true)]); }
/** * Generate this plugin's config * * @return array|null */ public function asConfig() { if (empty($this->elements) && empty($this->aliases)) { return; } /** * Regexp used to match an attributes definition (name + value if applicable) * * @link http://dev.w3.org/html5/spec/syntax.html#attributes-0 */ $attrRegexp = '[a-z][-a-z0-9]*(?>\\s*=\\s*(?>"[^"]*"|\'[^\']*\'|[^\\s"\'=<>`]+))?'; $tagRegexp = RegexpBuilder::fromList(array_merge(array_keys($this->aliases), array_keys($this->elements))); $endTagRegexp = '/(' . $tagRegexp . ')'; $startTagRegexp = '(' . $tagRegexp . ')((?>\\s+' . $attrRegexp . ')*+)\\s*/?'; $regexp = '#<(?>' . $endTagRegexp . '|' . $startTagRegexp . ')\\s*>#i'; $config = ['quickMatch' => $this->quickMatch, 'prefix' => $this->prefix, 'regexp' => $regexp]; if (!empty($this->aliases)) { // Preserve the aliases array's keys in JavaScript $config['aliases'] = new Dictionary(); foreach ($this->aliases as $elName => $aliases) { $config['aliases'][$elName] = new Dictionary($aliases); } } return $config; }
protected function getWordsRegexp(array $words) { $expr = RegexpBuilder::fromList($words, $this->regexpOptions); $expr = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr); $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu'; $jsRegexp = '/(?:^|\\W)' . \str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi'; return new Variant($regexp, array('JS' => RegexpConvertor::toJS($jsRegexp, \true))); }
public function asConfig() { if (!\count($this->collection)) { return; } $config = array('attrName' => $this->attrName, 'tagName' => $this->tagName); if (!empty($this->onlyFirst)) { $config['onlyFirst'] = $this->onlyFirst; } $keywords = \array_unique(\iterator_to_array($this->collection)); \sort($keywords); $groups = array(); $groupKey = 0; $groupLen = 0; foreach ($keywords as $keyword) { $keywordLen = 4 + \strlen($keyword); $groupLen += $keywordLen; if ($groupLen > 30000) { $groupLen = $keywordLen; ++$groupKey; } $groups[$groupKey][] = $keyword; } foreach ($groups as $keywords) { $regexp = RegexpBuilder::fromList($keywords, array('caseInsensitive' => !$this->caseSensitive)); $regexp = '/\\b' . $regexp . '\\b/S'; if (!$this->caseSensitive) { $regexp .= 'i'; } if (\preg_match('/[^[:ascii:]]/', $regexp)) { $regexp .= 'u'; } $config['regexps'][] = new Regexp($regexp, \true); } return $config; }
/** * Create a BBCode based on its reference usage * * @param string $usage BBCode usage, e.g. [B]{TEXT}[/b] * @return array */ protected function parse($usage) { $tag = new Tag(); $bbcode = new BBCode(); // This is the config we will return $config = ['tag' => $tag, 'bbcode' => $bbcode, 'passthroughToken' => null]; // Encode maps to avoid special characters to interfere with definitions $usage = preg_replace_callback('#(\\{(?>HASH)?MAP=)([^:]+:[^,;}]+(?>,[^:]+:[^,;}]+)*)(?=[;}])#', function ($m) { return $m[1] . base64_encode($m[2]); }, $usage); // Encode regexps to avoid special characters to interfere with definitions $usage = preg_replace_callback('#(\\{(?:PARSE|REGEXP)=)(' . self::REGEXP . '(?:,' . self::REGEXP . ')*)#', function ($m) { return $m[1] . base64_encode($m[2]); }, $usage); $regexp = '(^' . '\\[(?<bbcodeName>\\S+?)' . '(?<defaultAttribute>=\\S+?)?' . '(?<attributes>(?:\\s+[^=]+=\\S+?)*?)?' . '\\s*(?:/?\\]|\\]\\s*(?<content>.*?)\\s*(?<endTag>\\[/\\1]))' . '$)i'; if (!preg_match($regexp, trim($usage), $m)) { throw new InvalidArgumentException('Cannot interpret the BBCode definition'); } // Save the BBCode's name $config['bbcodeName'] = BBCode::normalizeName($m['bbcodeName']); // Prepare the attributes definition, e.g. "foo={BAR}" $definitions = preg_split('#\\s+#', trim($m['attributes']), -1, PREG_SPLIT_NO_EMPTY); // If there's a default attribute, we prepend it to the list using the BBCode's name as // attribute name if (!empty($m['defaultAttribute'])) { array_unshift($definitions, $m['bbcodeName'] . $m['defaultAttribute']); } // Append the content token to the attributes list under the name "content" if it's anything // but raw {TEXT} (or other unfiltered tokens) if (!empty($m['content'])) { $regexp = '#^\\{' . RegexpBuilder::fromList($this->unfilteredTokens) . '[0-9]*\\}$#D'; if (preg_match($regexp, $m['content'])) { $config['passthroughToken'] = substr($m['content'], 1, -1); } else { $definitions[] = 'content=' . $m['content']; $bbcode->contentAttributes[] = 'content'; } } // Separate the attribute definitions from the BBCode options $attributeDefinitions = []; foreach ($definitions as $definition) { $pos = strpos($definition, '='); $name = substr($definition, 0, $pos); $value = substr($definition, 1 + $pos); // Decode base64-encoded tokens $value = preg_replace_callback('#(\\{(?>HASHMAP|MAP|PARSE|REGEXP)=)([A-Za-z0-9+/]+=*)#', function ($m) { return $m[1] . base64_decode($m[2]); }, $value); // If name starts with $ then it's a BBCode options, if it starts with # it's a rule and // otherwise it's an attribute definition if ($name[0] === '$') { $optionName = substr($name, 1); $bbcode->{$optionName} = $this->convertValue($value); } elseif ($name[0] === '#') { $ruleName = substr($name, 1); // Supports #denyChild=foo,bar foreach (explode(',', $value) as $value) { $tag->rules->{$ruleName}($this->convertValue($value)); } } else { $attrName = strtolower(trim($name)); $attributeDefinitions[] = [$attrName, $value]; } } // Add the attributes and get the token translation table $tokens = $this->addAttributes($attributeDefinitions, $bbcode, $tag); // Test whether the passthrough token is used for something else, in which case we need // to unset it if (isset($tokens[$config['passthroughToken']])) { $config['passthroughToken'] = null; } // Add the list of known (and only the known) tokens to the config $config['tokens'] = array_filter($tokens); return $config; }
$attributes[$type][] = $attrName; } } } } // Prefill with known attributes from HTML 5.0 and HTML 4.01 $regexps = ['CSS' => ['^style$'], 'JS' => ['^on', '^data-s9e-livepreview-postprocess$'], 'URL' => ['^action$', '^cite$', '^data$', '^formaction$', '^href$', '^icon$', '^manifest$', '^pluginspage$', '^poster$', 'src$']]; foreach ($attributes as $type => $attrNames) { foreach ($attrNames as $attrName) { foreach ($regexps[$type] as $regexp) { // Test whether this attribute is already covered if (preg_match('/' . $regexp . '/i', $attrName)) { continue 2; } } $regexps[$type][] = '^' . $attrName . '$'; } } $filepath = __DIR__ . '/../src/Configurator/Helpers/TemplateHelper.php'; $file = file_get_contents($filepath); foreach ($regexps as $type => $typeRegexps) { $regexp = RegexpBuilder::fromList($typeRegexps, ['delimiter' => '/', 'specialChars' => ['^' => '^', '$' => '$']]); $file = preg_replace_callback('/(function get' . $type . 'Nodes\\(.*?\\$regexp = )\'.*?\'/s', function ($m) use($regexp) { return $m[1] . var_export('/' . $regexp . '/i', true); }, $file, 1, $cnt); if ($cnt !== 1) { die("Could not find {$type}\n"); } } file_put_contents($filepath, $file); die("Done.\n");
/** * @return array */ public function asConfig() { if (!count($this->collection)) { return; } // Grab the emoticons from the collection $codes = array_keys(iterator_to_array($this->collection)); // Build the regexp used to match emoticons $regexp = '/'; if ($this->notAfter !== '') { $regexp .= '(?<!' . $this->notAfter . ')'; } $regexp .= RegexpBuilder::fromList($codes); if ($this->notBefore !== '') { $regexp .= '(?!' . $this->notBefore . ')'; } $regexp .= '/S'; // Set the Unicode mode if Unicode properties are used if (preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) { $regexp .= 'u'; } // Force the regexp to use atomic grouping for performance $regexp = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp); // Prepare the config array $config = ['quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName]; // If notAfter is used, we need to create a JavaScript-specific regexp that does not use a // lookbehind assertion, and we add the notAfter subpattern to the config as a variant if ($this->notAfter !== '') { // Skip the first assertion by skipping the first N characters, where N equals the // length of $this->notAfter plus 1 for the first "/" and 5 for "(?<!)" $lpos = 6 + strlen($this->notAfter); $rpos = strrpos($regexp, '/'); $jsRegexp = RegexpConvertor::toJS('/' . substr($regexp, $lpos, $rpos - $lpos) . '/', true); $config['regexp'] = new Variant($regexp); $config['regexp']->set('JS', $jsRegexp); $config['notAfter'] = new Variant(); $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/')); } // Try to find a quickMatch if none is set if ($this->quickMatch === false) { $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes); } return $config; }
/** * Get a string suitable as a preg_replace() replacement for given PHP code * * @param string $php Original code * @return array|bool Array of [regexp, replacement] if possible, or FALSE otherwise */ protected static function getDynamicRendering($php) { $rendering = ''; $literal = "(?<literal>'((?>[^'\\\\]+|\\\\['\\\\])*)')"; $attribute = "(?<attribute>htmlspecialchars\\(\\\$node->getAttribute\\('([^']+)'\\),2\\))"; $value = "(?<value>{$literal}|{$attribute})"; $output = "(?<output>\\\$this->out\\.={$value}(?:\\.(?&value))*;)"; $copyOfAttribute = "(?<copyOfAttribute>if\\(\\\$node->hasAttribute\\('([^']+)'\\)\\)\\{\\\$this->out\\.=' \\g-1=\"'\\.htmlspecialchars\\(\\\$node->getAttribute\\('\\g-1'\\),2\\)\\.'\"';\\})"; $regexp = '(^(' . $output . '|' . $copyOfAttribute . ')*$)'; if (!preg_match($regexp, $php, $m)) { return false; } // Attributes that are copied in the replacement $copiedAttributes = []; // Attributes whose value is used in the replacement $usedAttributes = []; $regexp = '(' . $output . '|' . $copyOfAttribute . ')A'; $offset = 0; while (preg_match($regexp, $php, $m, 0, $offset)) { // Test whether it's normal output or a copy of attribute if ($m['output']) { // 12 === strlen('$this->out.=') $offset += 12; while (preg_match('(' . $value . ')A', $php, $m, 0, $offset)) { // Test whether it's a literal or an attribute value if ($m['literal']) { // Unescape the literal $str = stripslashes(substr($m[0], 1, -1)); // Escape special characters $rendering .= preg_replace('([\\\\$](?=\\d))', '\\\\$0', $str); } else { $attrName = end($m); // Generate a unique ID for this attribute name, we'll use it as a // placeholder until we have the full list of captures and we can replace it // with the capture number if (!isset($usedAttributes[$attrName])) { $usedAttributes[$attrName] = uniqid($attrName, true); } $rendering .= $usedAttributes[$attrName]; } // Skip the match plus the next . or ; $offset += 1 + strlen($m[0]); } } else { $attrName = end($m); if (!isset($copiedAttributes[$attrName])) { $copiedAttributes[$attrName] = uniqid($attrName, true); } $rendering .= $copiedAttributes[$attrName]; $offset += strlen($m[0]); } } // Gather the names of the attributes used in the replacement either by copy or by value $attrNames = array_keys($copiedAttributes + $usedAttributes); // Sort them alphabetically sort($attrNames); // Keep a copy of the attribute names to be used in the fillter subpattern $remainingAttributes = array_combine($attrNames, $attrNames); // Prepare the final regexp $regexp = '(^[^ ]+'; $index = 0; foreach ($attrNames as $attrName) { // Add a subpattern that matches (and skips) any attribute definition that is not one of // the remaining attributes we're trying to match $regexp .= '(?> (?!' . RegexpBuilder::fromList($remainingAttributes) . '=)[^=]+="[^"]*")*'; unset($remainingAttributes[$attrName]); $regexp .= '('; if (isset($copiedAttributes[$attrName])) { self::replacePlaceholder($rendering, $copiedAttributes[$attrName], ++$index); } else { $regexp .= '?>'; } $regexp .= ' ' . $attrName . '="'; if (isset($usedAttributes[$attrName])) { $regexp .= '('; self::replacePlaceholder($rendering, $usedAttributes[$attrName], ++$index); } $regexp .= '[^"]*'; if (isset($usedAttributes[$attrName])) { $regexp .= ')'; } $regexp .= '")?'; } $regexp .= '.*)s'; return [$regexp, $rendering]; }
/** * Return a regexp that matches all used dictionary keys * * @return string */ protected function getReplacementRegexp() { return '/' . RegexpBuilder::fromList(array_keys($this->dictionary)) . '/g'; }
/** * Return this scheme list as a regexp * * @return Regexp */ public function asConfig() { return new Regexp('/^' . RegexpBuilder::fromList($this->items) . '$/Di'); }
public function asConfig() { if (empty($this->elements) && empty($this->aliases)) { return; } $attrRegexp = '[a-z][-a-z0-9]*(?>\\s*=\\s*(?>"[^"]*"|\'[^\']*\'|[^\\s"\'=<>`]+))?'; $tagRegexp = RegexpBuilder::fromList(\array_merge(\array_keys($this->aliases), \array_keys($this->elements))); $endTagRegexp = '/(' . $tagRegexp . ')'; $startTagRegexp = '(' . $tagRegexp . ')((?>\\s+' . $attrRegexp . ')*+)\\s*/?'; $regexp = '#<(?>' . $endTagRegexp . '|' . $startTagRegexp . ')\\s*>#i'; $config = array('quickMatch' => $this->quickMatch, 'prefix' => $this->prefix, 'regexp' => $regexp); if (!empty($this->aliases)) { $config['aliases'] = new Dictionary(); foreach ($this->aliases as $elName => $aliases) { $config['aliases'][$elName] = new Dictionary($aliases); } } return $config; }
protected function getWordsRegexp(array $words) { $expr = RegexpBuilder::fromList($words, $this->regexpOptions); $expr = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr); $regexp = new Regexp('/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu'); $regexp->setJS('/(?:^|\\W)' . \str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi'); return $regexp; }
/** * Get the regexp used to remove meta elements from the intermediate representation * * @param array $templates * @return string */ public static function getMetaElementsRegexp(array $templates) { $exprs = []; // Coalesce all templates and load them into DOM $xsl = '<xsl:template xmlns:xsl="http://www.w3.org/1999/XSL/Transform">' . implode('', $templates) . '</xsl:template>'; $dom = new DOMDocument(); $dom->loadXML($xsl); $xpath = new DOMXPath($dom); // Collect the values of all the "match", "select" and "test" attributes of XSL elements $query = '//xsl:*/@*[contains("matchselectest", name())]'; foreach ($xpath->query($query) as $attribute) { $exprs[] = $attribute->value; } // Collect the XPath expressions used in all the attributes of non-XSL elements $query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]/@*'; foreach ($xpath->query($query) as $attribute) { foreach (AVTHelper::parse($attribute->value) as $token) { if ($token[0] === 'expression') { $exprs[] = $token[1]; } } } // Names of the meta elements $tagNames = ['e' => true, 'i' => true, 's' => true]; // In the highly unlikely event the meta elements are rendered, we remove them from the list foreach (array_keys($tagNames) as $tagName) { if (isset($templates[$tagName]) && $templates[$tagName] !== '') { unset($tagNames[$tagName]); } } // Create a regexp that matches the tag names used as element names, e.g. "s" in "//s" but // not in "@s" or "$s" $regexp = '(\\b(?<![$@])(' . implode('|', array_keys($tagNames)) . ')(?!-)\\b)'; // Now look into all of the expressions that we've collected preg_match_all($regexp, implode("\n", $exprs), $m); foreach ($m[0] as $tagName) { unset($tagNames[$tagName]); } if (empty($tagNames)) { // Always-false regexp return '((?!))'; } return '(<' . RegexpBuilder::fromList(array_keys($tagNames)) . '>[^<]*</[^>]+>)'; }