예제 #1
0
 public function setMap(array $map, $caseSensitive = \false, $strict = \false)
 {
     if (!\is_bool($caseSensitive)) {
         throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean');
     }
     if (!\is_bool($strict)) {
         throw new InvalidArgumentException('Argument 3 passed to ' . __METHOD__ . ' must be a boolean');
     }
     $this->resetSafeness();
     if ($strict) {
         $this->assessSafeness($map);
     }
     $valueKeys = array();
     foreach ($map as $key => $value) {
         $valueKeys[$value][] = $key;
     }
     $map = array();
     foreach ($valueKeys as $value => $keys) {
         $regexp = RegexpBuilder::fromList($keys, array('delimiter' => '/', 'caseInsensitive' => !$caseSensitive));
         $regexp = '/^' . $regexp . '$/D';
         if (!$caseSensitive) {
             $regexp .= 'i';
         }
         if (!\preg_match('#^[[:ascii:]]*$#D', $regexp)) {
             $regexp .= 'u';
         }
         $map[] = array(new Regexp($regexp), $value);
     }
     if ($strict) {
         $map[] = array('//', \false);
     }
     $this->vars['map'] = $map;
 }
예제 #2
0
 protected function parse($usage)
 {
     $tag = new Tag();
     $bbcode = new BBCode();
     $config = array('tag' => $tag, 'bbcode' => $bbcode, 'passthroughToken' => \null);
     $usage = \preg_replace_callback('#(\\{(?>HASH)?MAP=)([^:]+:[^,;}]+(?>,[^:]+:[^,;}]+)*)(?=[;}])#', function ($m) {
         return $m[1] . \base64_encode($m[2]);
     }, $usage);
     $usage = \preg_replace_callback('#(\\{(?:PARSE|REGEXP)=)(' . self::REGEXP . '(?:,' . self::REGEXP . ')*)#', function ($m) {
         return $m[1] . \base64_encode($m[2]);
     }, $usage);
     $regexp = '(^' . '\\[(?<bbcodeName>\\S+?)' . '(?<defaultAttribute>=\\S+?)?' . '(?<attributes>(?:\\s+[^=]+=\\S+?)*?)?' . '\\s*(?:/?\\]|\\]\\s*(?<content>.*?)\\s*(?<endTag>\\[/\\1]))$)i';
     if (!\preg_match($regexp, \trim($usage), $m)) {
         throw new InvalidArgumentException('Cannot interpret the BBCode definition');
     }
     $config['bbcodeName'] = BBCode::normalizeName($m['bbcodeName']);
     $definitions = \preg_split('#\\s+#', \trim($m['attributes']), -1, \PREG_SPLIT_NO_EMPTY);
     if (!empty($m['defaultAttribute'])) {
         \array_unshift($definitions, $m['bbcodeName'] . $m['defaultAttribute']);
     }
     if (!empty($m['content'])) {
         $regexp = '#^\\{' . RegexpBuilder::fromList($this->unfilteredTokens) . '[0-9]*\\}$#D';
         if (\preg_match($regexp, $m['content'])) {
             $config['passthroughToken'] = \substr($m['content'], 1, -1);
         } else {
             $definitions[] = 'content=' . $m['content'];
             $bbcode->contentAttributes[] = 'content';
         }
     }
     $attributeDefinitions = array();
     foreach ($definitions as $definition) {
         $pos = \strpos($definition, '=');
         $name = \substr($definition, 0, $pos);
         $value = \substr($definition, 1 + $pos);
         $value = \preg_replace_callback('#(\\{(?>HASHMAP|MAP|PARSE|REGEXP)=)([A-Za-z0-9+/]+=*)#', function ($m) {
             return $m[1] . \base64_decode($m[2]);
         }, $value);
         if ($name[0] === '$') {
             $optionName = \substr($name, 1);
             $bbcode->{$optionName} = $this->convertValue($value);
         } elseif ($name[0] === '#') {
             $ruleName = \substr($name, 1);
             foreach (\explode(',', $value) as $value) {
                 $tag->rules->{$ruleName}($this->convertValue($value));
             }
         } else {
             $attrName = \strtolower(\trim($name));
             $attributeDefinitions[] = array($attrName, $value);
         }
     }
     $tokens = $this->addAttributes($attributeDefinitions, $bbcode, $tag);
     if (isset($tokens[$config['passthroughToken']])) {
         $config['passthroughToken'] = \null;
     }
     $config['tokens'] = \array_filter($tokens);
     return $config;
 }
예제 #3
0
 /**
  * Return a regexp that matches the list of hostnames
  *
  * @return string
  */
 public function getRegexp()
 {
     $hosts = [];
     foreach ($this->items as $host) {
         $hosts[] = $this->normalizeHostmask($host);
     }
     $regexp = RegexpBuilder::fromList($hosts, ['specialChars' => ['*' => '.*', '^' => '^', '$' => '$']]);
     return '/' . $regexp . '/DSis';
 }
예제 #4
0
 public function asConfig()
 {
     $anchor = RegexpBuilder::fromList($this->configurator->urlConfig->getAllowedSchemes()) . '://';
     if ($this->matchWww) {
         $anchor = '(?:' . $anchor . '|www\\.)';
     }
     $config = array('attrName' => $this->attrName, 'regexp' => '#' . $anchor . '\\S(?>[^\\s\\[\\]]*(?>\\[\\w*\\])?)++#iS', 'tagName' => $this->tagName);
     if (!$this->matchWww) {
         $config['quickMatch'] = '://';
     }
     return $config;
 }
예제 #5
0
 public function asConfig()
 {
     if (!$this->captureURLs || !\count($this->collection)) {
         return;
     }
     $regexp = 'https?:\\/\\/';
     $schemes = $this->getSchemes();
     if (!empty($schemes)) {
         $regexp = '(?>' . RegexpBuilder::fromList($schemes) . ':|' . $regexp . ')';
     }
     return array('quickMatch' => empty($schemes) ? '://' : ':', 'regexp' => '/\\b' . $regexp . '[^["\'\\s]+/S');
 }
예제 #6
0
 public function asConfig()
 {
     $config = array('attrName' => $this->attrName, 'tagName' => $this->tagName);
     if (!empty($this->aliases)) {
         $aliases = \array_keys($this->aliases);
         $regexp = '/' . RegexpBuilder::fromList($aliases) . '/';
         $config['aliases'] = $this->aliases;
         $config['aliasesRegexp'] = new Regexp($regexp, \true);
         $quickMatch = ConfigHelper::generateQuickMatchFromList($aliases);
         if ($quickMatch !== \false) {
             $config['aliasesQuickMatch'] = $quickMatch;
         }
     }
     return $config;
 }
예제 #7
0
 public function setValues(array $values, $caseSensitive = \false)
 {
     if (!\is_bool($caseSensitive)) {
         throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean');
     }
     $regexp = RegexpBuilder::fromList($values, array('delimiter' => '/'));
     $regexp = '/^' . $regexp . '$/D';
     if (!$caseSensitive) {
         $regexp .= 'i';
     }
     if (!\preg_match('#^[[:ascii:]]*$#D', $regexp)) {
         $regexp .= 'u';
     }
     $this->setRegexp($regexp);
 }
예제 #8
0
 /**
  * Set the list of allowed values
  *
  * @param  array $values        List of allowed values
  * @param  bool  $caseSensitive Whether the choice is case-sensitive
  * @return void
  */
 public function setValues(array $values, $caseSensitive = false)
 {
     if (!is_bool($caseSensitive)) {
         throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean');
     }
     // Create a regexp based on the list of allowed values
     $regexp = RegexpBuilder::fromList($values, ['delimiter' => '/']);
     $regexp = '/^' . $regexp . '$/D';
     // Add the case-insensitive flag if applicable
     if (!$caseSensitive) {
         $regexp .= 'i';
     }
     // Add the Unicode flag if the regexp isn't purely ASCII
     if (!preg_match('#^[[:ascii:]]*$#D', $regexp)) {
         $regexp .= 'u';
     }
     // Set the regexp associated with this list of values
     $this->setRegexp($regexp);
 }
예제 #9
0
 /**
  * Set the content of this map
  *
  * @param  array $map           Associative array in the form [word => replacement]
  * @param  bool  $caseSensitive Whether this map is case-sensitive
  * @param  bool  $strict        Whether this map is strict (values with no match are invalid)
  * @return void
  */
 public function setMap(array $map, $caseSensitive = false, $strict = false)
 {
     if (!is_bool($caseSensitive)) {
         throw new InvalidArgumentException('Argument 2 passed to ' . __METHOD__ . ' must be a boolean');
     }
     if (!is_bool($strict)) {
         throw new InvalidArgumentException('Argument 3 passed to ' . __METHOD__ . ' must be a boolean');
     }
     // Reset the template safeness marks for the new map
     $this->resetSafeness();
     // If the map is strict, we can assess its safeness
     if ($strict) {
         $this->assessSafeness($map);
     }
     // Group values by keys
     $valueKeys = [];
     foreach ($map as $key => $value) {
         $valueKeys[$value][] = $key;
     }
     // Now create a regexp and an entry in the map for each group
     $map = [];
     foreach ($valueKeys as $value => $keys) {
         $regexp = RegexpBuilder::fromList($keys, ['delimiter' => '/', 'caseInsensitive' => !$caseSensitive]);
         $regexp = '/^' . $regexp . '$/D';
         // Add the case-insensitive flag if applicable
         if (!$caseSensitive) {
             $regexp .= 'i';
         }
         // Add the Unicode flag if the regexp isn't purely ASCII
         if (!preg_match('#^[[:ascii:]]*$#D', $regexp)) {
             $regexp .= 'u';
         }
         // Add the [regexp,value] pair to the map
         $map[] = [new Regexp($regexp), $value];
     }
     // If the "strict" option is enabled, a catch-all regexp which replaces the value with FALSE
     // is appended to the list
     if ($strict) {
         $map[] = ['//', false];
     }
     // Record the map in this filter's variables
     $this->vars['map'] = $map;
 }
예제 #10
0
 public function asConfig()
 {
     if (!\count($this->collection)) {
         return;
     }
     $codes = \array_keys(\iterator_to_array($this->collection));
     $regexp = '/';
     if ($this->notAfter !== '') {
         $regexp .= '(?<!' . $this->notAfter . ')';
     }
     $regexp .= RegexpBuilder::fromList($codes);
     if ($this->notBefore !== '') {
         $regexp .= '(?!' . $this->notBefore . ')';
     }
     $regexp .= '/S';
     if (\preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) {
         $regexp .= 'u';
     }
     $regexp = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp);
     $config = array('quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName);
     if ($this->notAfter !== '') {
         $lpos = 6 + \strlen($this->notAfter);
         $rpos = \strrpos($regexp, '/');
         $jsRegexp = RegexpConvertor::toJS('/' . \substr($regexp, $lpos, $rpos - $lpos) . '/', \true);
         $config['regexp'] = new Variant($regexp);
         $config['regexp']->set('JS', $jsRegexp);
         $config['notAfter'] = new Variant();
         $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/'));
     }
     if ($this->quickMatch === \false) {
         $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes);
     }
     return $config;
 }
예제 #11
0
 /**
  * {@inheritdoc}
  */
 public function asConfig()
 {
     if (!count($this->collection)) {
         return;
     }
     $config = ['attrName' => $this->attrName, 'tagName' => $this->tagName];
     if (!empty($this->onlyFirst)) {
         $config['onlyFirst'] = $this->onlyFirst;
     }
     // Sort keywords in order to keep keywords that start with the same characters together. We
     // also remove duplicates that would otherwise skew the length computation done below
     $keywords = array_unique(iterator_to_array($this->collection));
     sort($keywords);
     // Group keywords by chunks of ~30KB to remain below PCRE's limit
     $groups = [];
     $groupKey = 0;
     $groupLen = 0;
     foreach ($keywords as $keyword) {
         // NOTE: the value 4 is a guesstimate for the cost of each alternation
         $keywordLen = 4 + strlen($keyword);
         $groupLen += $keywordLen;
         if ($groupLen > 30000) {
             $groupLen = $keywordLen;
             ++$groupKey;
         }
         $groups[$groupKey][] = $keyword;
     }
     foreach ($groups as $keywords) {
         $regexp = RegexpBuilder::fromList($keywords, ['caseInsensitive' => !$this->caseSensitive]);
         $regexp = '/\\b' . $regexp . '\\b/S';
         if (!$this->caseSensitive) {
             $regexp .= 'i';
         }
         if (preg_match('/[^[:ascii:]]/', $regexp)) {
             $regexp .= 'u';
         }
         $config['regexps'][] = new Regexp($regexp, true);
     }
     return $config;
 }
예제 #12
0
 /**
  * Generate a regexp that matches the given list of words
  *
  * @param  array   $words List of words
  * @return Variant        Regexp in a Variant container, with a JS variant
  */
 protected function getWordsRegexp(array $words)
 {
     $expr = RegexpBuilder::fromList($words, $this->regexpOptions);
     // Force atomic grouping for performance. Theorically it could prevent some matches but in
     // practice it shouldn't happen
     $expr = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr);
     // JavaScript regexps don't support Unicode properties, so instead of Unicode letters
     // we'll accept any non-whitespace, non-common punctuation
     $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu';
     $jsRegexp = '/(?:^|\\W)' . str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi';
     return new Variant($regexp, ['JS' => RegexpConvertor::toJS($jsRegexp, true)]);
 }
예제 #13
0
 /**
  * Generate this plugin's config
  *
  * @return array|null
  */
 public function asConfig()
 {
     if (empty($this->elements) && empty($this->aliases)) {
         return;
     }
     /**
      * Regexp used to match an attributes definition (name + value if applicable)
      *
      * @link http://dev.w3.org/html5/spec/syntax.html#attributes-0
      */
     $attrRegexp = '[a-z][-a-z0-9]*(?>\\s*=\\s*(?>"[^"]*"|\'[^\']*\'|[^\\s"\'=<>`]+))?';
     $tagRegexp = RegexpBuilder::fromList(array_merge(array_keys($this->aliases), array_keys($this->elements)));
     $endTagRegexp = '/(' . $tagRegexp . ')';
     $startTagRegexp = '(' . $tagRegexp . ')((?>\\s+' . $attrRegexp . ')*+)\\s*/?';
     $regexp = '#<(?>' . $endTagRegexp . '|' . $startTagRegexp . ')\\s*>#i';
     $config = ['quickMatch' => $this->quickMatch, 'prefix' => $this->prefix, 'regexp' => $regexp];
     if (!empty($this->aliases)) {
         // Preserve the aliases array's keys in JavaScript
         $config['aliases'] = new Dictionary();
         foreach ($this->aliases as $elName => $aliases) {
             $config['aliases'][$elName] = new Dictionary($aliases);
         }
     }
     return $config;
 }
예제 #14
0
 protected function getWordsRegexp(array $words)
 {
     $expr = RegexpBuilder::fromList($words, $this->regexpOptions);
     $expr = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr);
     $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu';
     $jsRegexp = '/(?:^|\\W)' . \str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi';
     return new Variant($regexp, array('JS' => RegexpConvertor::toJS($jsRegexp, \true)));
 }
예제 #15
0
 public function asConfig()
 {
     if (!\count($this->collection)) {
         return;
     }
     $config = array('attrName' => $this->attrName, 'tagName' => $this->tagName);
     if (!empty($this->onlyFirst)) {
         $config['onlyFirst'] = $this->onlyFirst;
     }
     $keywords = \array_unique(\iterator_to_array($this->collection));
     \sort($keywords);
     $groups = array();
     $groupKey = 0;
     $groupLen = 0;
     foreach ($keywords as $keyword) {
         $keywordLen = 4 + \strlen($keyword);
         $groupLen += $keywordLen;
         if ($groupLen > 30000) {
             $groupLen = $keywordLen;
             ++$groupKey;
         }
         $groups[$groupKey][] = $keyword;
     }
     foreach ($groups as $keywords) {
         $regexp = RegexpBuilder::fromList($keywords, array('caseInsensitive' => !$this->caseSensitive));
         $regexp = '/\\b' . $regexp . '\\b/S';
         if (!$this->caseSensitive) {
             $regexp .= 'i';
         }
         if (\preg_match('/[^[:ascii:]]/', $regexp)) {
             $regexp .= 'u';
         }
         $config['regexps'][] = new Regexp($regexp, \true);
     }
     return $config;
 }
예제 #16
0
 /**
  * Create a BBCode based on its reference usage
  *
  * @param  string $usage BBCode usage, e.g. [B]{TEXT}[/b]
  * @return array
  */
 protected function parse($usage)
 {
     $tag = new Tag();
     $bbcode = new BBCode();
     // This is the config we will return
     $config = ['tag' => $tag, 'bbcode' => $bbcode, 'passthroughToken' => null];
     // Encode maps to avoid special characters to interfere with definitions
     $usage = preg_replace_callback('#(\\{(?>HASH)?MAP=)([^:]+:[^,;}]+(?>,[^:]+:[^,;}]+)*)(?=[;}])#', function ($m) {
         return $m[1] . base64_encode($m[2]);
     }, $usage);
     // Encode regexps to avoid special characters to interfere with definitions
     $usage = preg_replace_callback('#(\\{(?:PARSE|REGEXP)=)(' . self::REGEXP . '(?:,' . self::REGEXP . ')*)#', function ($m) {
         return $m[1] . base64_encode($m[2]);
     }, $usage);
     $regexp = '(^' . '\\[(?<bbcodeName>\\S+?)' . '(?<defaultAttribute>=\\S+?)?' . '(?<attributes>(?:\\s+[^=]+=\\S+?)*?)?' . '\\s*(?:/?\\]|\\]\\s*(?<content>.*?)\\s*(?<endTag>\\[/\\1]))' . '$)i';
     if (!preg_match($regexp, trim($usage), $m)) {
         throw new InvalidArgumentException('Cannot interpret the BBCode definition');
     }
     // Save the BBCode's name
     $config['bbcodeName'] = BBCode::normalizeName($m['bbcodeName']);
     // Prepare the attributes definition, e.g. "foo={BAR}"
     $definitions = preg_split('#\\s+#', trim($m['attributes']), -1, PREG_SPLIT_NO_EMPTY);
     // If there's a default attribute, we prepend it to the list using the BBCode's name as
     // attribute name
     if (!empty($m['defaultAttribute'])) {
         array_unshift($definitions, $m['bbcodeName'] . $m['defaultAttribute']);
     }
     // Append the content token to the attributes list under the name "content" if it's anything
     // but raw {TEXT} (or other unfiltered tokens)
     if (!empty($m['content'])) {
         $regexp = '#^\\{' . RegexpBuilder::fromList($this->unfilteredTokens) . '[0-9]*\\}$#D';
         if (preg_match($regexp, $m['content'])) {
             $config['passthroughToken'] = substr($m['content'], 1, -1);
         } else {
             $definitions[] = 'content=' . $m['content'];
             $bbcode->contentAttributes[] = 'content';
         }
     }
     // Separate the attribute definitions from the BBCode options
     $attributeDefinitions = [];
     foreach ($definitions as $definition) {
         $pos = strpos($definition, '=');
         $name = substr($definition, 0, $pos);
         $value = substr($definition, 1 + $pos);
         // Decode base64-encoded tokens
         $value = preg_replace_callback('#(\\{(?>HASHMAP|MAP|PARSE|REGEXP)=)([A-Za-z0-9+/]+=*)#', function ($m) {
             return $m[1] . base64_decode($m[2]);
         }, $value);
         // If name starts with $ then it's a BBCode options, if it starts with # it's a rule and
         // otherwise it's an attribute definition
         if ($name[0] === '$') {
             $optionName = substr($name, 1);
             $bbcode->{$optionName} = $this->convertValue($value);
         } elseif ($name[0] === '#') {
             $ruleName = substr($name, 1);
             // Supports #denyChild=foo,bar
             foreach (explode(',', $value) as $value) {
                 $tag->rules->{$ruleName}($this->convertValue($value));
             }
         } else {
             $attrName = strtolower(trim($name));
             $attributeDefinitions[] = [$attrName, $value];
         }
     }
     // Add the attributes and get the token translation table
     $tokens = $this->addAttributes($attributeDefinitions, $bbcode, $tag);
     // Test whether the passthrough token is used for something else, in which case we need
     // to unset it
     if (isset($tokens[$config['passthroughToken']])) {
         $config['passthroughToken'] = null;
     }
     // Add the list of known (and only the known) tokens to the config
     $config['tokens'] = array_filter($tokens);
     return $config;
 }
예제 #17
0
                $attributes[$type][] = $attrName;
            }
        }
    }
}
// Prefill with known attributes from HTML 5.0 and HTML 4.01
$regexps = ['CSS' => ['^style$'], 'JS' => ['^on', '^data-s9e-livepreview-postprocess$'], 'URL' => ['^action$', '^cite$', '^data$', '^formaction$', '^href$', '^icon$', '^manifest$', '^pluginspage$', '^poster$', 'src$']];
foreach ($attributes as $type => $attrNames) {
    foreach ($attrNames as $attrName) {
        foreach ($regexps[$type] as $regexp) {
            // Test whether this attribute is already covered
            if (preg_match('/' . $regexp . '/i', $attrName)) {
                continue 2;
            }
        }
        $regexps[$type][] = '^' . $attrName . '$';
    }
}
$filepath = __DIR__ . '/../src/Configurator/Helpers/TemplateHelper.php';
$file = file_get_contents($filepath);
foreach ($regexps as $type => $typeRegexps) {
    $regexp = RegexpBuilder::fromList($typeRegexps, ['delimiter' => '/', 'specialChars' => ['^' => '^', '$' => '$']]);
    $file = preg_replace_callback('/(function get' . $type . 'Nodes\\(.*?\\$regexp = )\'.*?\'/s', function ($m) use($regexp) {
        return $m[1] . var_export('/' . $regexp . '/i', true);
    }, $file, 1, $cnt);
    if ($cnt !== 1) {
        die("Could not find {$type}\n");
    }
}
file_put_contents($filepath, $file);
die("Done.\n");
예제 #18
0
 /**
  * @return array
  */
 public function asConfig()
 {
     if (!count($this->collection)) {
         return;
     }
     // Grab the emoticons from the collection
     $codes = array_keys(iterator_to_array($this->collection));
     // Build the regexp used to match emoticons
     $regexp = '/';
     if ($this->notAfter !== '') {
         $regexp .= '(?<!' . $this->notAfter . ')';
     }
     $regexp .= RegexpBuilder::fromList($codes);
     if ($this->notBefore !== '') {
         $regexp .= '(?!' . $this->notBefore . ')';
     }
     $regexp .= '/S';
     // Set the Unicode mode if Unicode properties are used
     if (preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) {
         $regexp .= 'u';
     }
     // Force the regexp to use atomic grouping for performance
     $regexp = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp);
     // Prepare the config array
     $config = ['quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName];
     // If notAfter is used, we need to create a JavaScript-specific regexp that does not use a
     // lookbehind assertion, and we add the notAfter subpattern to the config as a variant
     if ($this->notAfter !== '') {
         // Skip the first assertion by skipping the first N characters, where N equals the
         // length of $this->notAfter plus 1 for the first "/" and 5 for "(?<!)"
         $lpos = 6 + strlen($this->notAfter);
         $rpos = strrpos($regexp, '/');
         $jsRegexp = RegexpConvertor::toJS('/' . substr($regexp, $lpos, $rpos - $lpos) . '/', true);
         $config['regexp'] = new Variant($regexp);
         $config['regexp']->set('JS', $jsRegexp);
         $config['notAfter'] = new Variant();
         $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/'));
     }
     // Try to find a quickMatch if none is set
     if ($this->quickMatch === false) {
         $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes);
     }
     return $config;
 }
예제 #19
0
 /**
  * Get a string suitable as a preg_replace() replacement for given PHP code
  *
  * @param  string     $php Original code
  * @return array|bool      Array of [regexp, replacement] if possible, or FALSE otherwise
  */
 protected static function getDynamicRendering($php)
 {
     $rendering = '';
     $literal = "(?<literal>'((?>[^'\\\\]+|\\\\['\\\\])*)')";
     $attribute = "(?<attribute>htmlspecialchars\\(\\\$node->getAttribute\\('([^']+)'\\),2\\))";
     $value = "(?<value>{$literal}|{$attribute})";
     $output = "(?<output>\\\$this->out\\.={$value}(?:\\.(?&value))*;)";
     $copyOfAttribute = "(?<copyOfAttribute>if\\(\\\$node->hasAttribute\\('([^']+)'\\)\\)\\{\\\$this->out\\.=' \\g-1=\"'\\.htmlspecialchars\\(\\\$node->getAttribute\\('\\g-1'\\),2\\)\\.'\"';\\})";
     $regexp = '(^(' . $output . '|' . $copyOfAttribute . ')*$)';
     if (!preg_match($regexp, $php, $m)) {
         return false;
     }
     // Attributes that are copied in the replacement
     $copiedAttributes = [];
     // Attributes whose value is used in the replacement
     $usedAttributes = [];
     $regexp = '(' . $output . '|' . $copyOfAttribute . ')A';
     $offset = 0;
     while (preg_match($regexp, $php, $m, 0, $offset)) {
         // Test whether it's normal output or a copy of attribute
         if ($m['output']) {
             // 12 === strlen('$this->out.=')
             $offset += 12;
             while (preg_match('(' . $value . ')A', $php, $m, 0, $offset)) {
                 // Test whether it's a literal or an attribute value
                 if ($m['literal']) {
                     // Unescape the literal
                     $str = stripslashes(substr($m[0], 1, -1));
                     // Escape special characters
                     $rendering .= preg_replace('([\\\\$](?=\\d))', '\\\\$0', $str);
                 } else {
                     $attrName = end($m);
                     // Generate a unique ID for this attribute name, we'll use it as a
                     // placeholder until we have the full list of captures and we can replace it
                     // with the capture number
                     if (!isset($usedAttributes[$attrName])) {
                         $usedAttributes[$attrName] = uniqid($attrName, true);
                     }
                     $rendering .= $usedAttributes[$attrName];
                 }
                 // Skip the match plus the next . or ;
                 $offset += 1 + strlen($m[0]);
             }
         } else {
             $attrName = end($m);
             if (!isset($copiedAttributes[$attrName])) {
                 $copiedAttributes[$attrName] = uniqid($attrName, true);
             }
             $rendering .= $copiedAttributes[$attrName];
             $offset += strlen($m[0]);
         }
     }
     // Gather the names of the attributes used in the replacement either by copy or by value
     $attrNames = array_keys($copiedAttributes + $usedAttributes);
     // Sort them alphabetically
     sort($attrNames);
     // Keep a copy of the attribute names to be used in the fillter subpattern
     $remainingAttributes = array_combine($attrNames, $attrNames);
     // Prepare the final regexp
     $regexp = '(^[^ ]+';
     $index = 0;
     foreach ($attrNames as $attrName) {
         // Add a subpattern that matches (and skips) any attribute definition that is not one of
         // the remaining attributes we're trying to match
         $regexp .= '(?> (?!' . RegexpBuilder::fromList($remainingAttributes) . '=)[^=]+="[^"]*")*';
         unset($remainingAttributes[$attrName]);
         $regexp .= '(';
         if (isset($copiedAttributes[$attrName])) {
             self::replacePlaceholder($rendering, $copiedAttributes[$attrName], ++$index);
         } else {
             $regexp .= '?>';
         }
         $regexp .= ' ' . $attrName . '="';
         if (isset($usedAttributes[$attrName])) {
             $regexp .= '(';
             self::replacePlaceholder($rendering, $usedAttributes[$attrName], ++$index);
         }
         $regexp .= '[^"]*';
         if (isset($usedAttributes[$attrName])) {
             $regexp .= ')';
         }
         $regexp .= '")?';
     }
     $regexp .= '.*)s';
     return [$regexp, $rendering];
 }
 /**
  * Return a regexp that matches all used dictionary keys
  *
  * @return string
  */
 protected function getReplacementRegexp()
 {
     return '/' . RegexpBuilder::fromList(array_keys($this->dictionary)) . '/g';
 }
예제 #21
0
 /**
  * Return this scheme list as a regexp
  *
  * @return Regexp
  */
 public function asConfig()
 {
     return new Regexp('/^' . RegexpBuilder::fromList($this->items) . '$/Di');
 }
예제 #22
0
 public function asConfig()
 {
     if (empty($this->elements) && empty($this->aliases)) {
         return;
     }
     $attrRegexp = '[a-z][-a-z0-9]*(?>\\s*=\\s*(?>"[^"]*"|\'[^\']*\'|[^\\s"\'=<>`]+))?';
     $tagRegexp = RegexpBuilder::fromList(\array_merge(\array_keys($this->aliases), \array_keys($this->elements)));
     $endTagRegexp = '/(' . $tagRegexp . ')';
     $startTagRegexp = '(' . $tagRegexp . ')((?>\\s+' . $attrRegexp . ')*+)\\s*/?';
     $regexp = '#<(?>' . $endTagRegexp . '|' . $startTagRegexp . ')\\s*>#i';
     $config = array('quickMatch' => $this->quickMatch, 'prefix' => $this->prefix, 'regexp' => $regexp);
     if (!empty($this->aliases)) {
         $config['aliases'] = new Dictionary();
         foreach ($this->aliases as $elName => $aliases) {
             $config['aliases'][$elName] = new Dictionary($aliases);
         }
     }
     return $config;
 }
예제 #23
0
 protected function getWordsRegexp(array $words)
 {
     $expr = RegexpBuilder::fromList($words, $this->regexpOptions);
     $expr = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr);
     $regexp = new Regexp('/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu');
     $regexp->setJS('/(?:^|\\W)' . \str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi');
     return $regexp;
 }
예제 #24
0
 /**
  * Get the regexp used to remove meta elements from the intermediate representation
  *
  * @param  array  $templates
  * @return string
  */
 public static function getMetaElementsRegexp(array $templates)
 {
     $exprs = [];
     // Coalesce all templates and load them into DOM
     $xsl = '<xsl:template xmlns:xsl="http://www.w3.org/1999/XSL/Transform">' . implode('', $templates) . '</xsl:template>';
     $dom = new DOMDocument();
     $dom->loadXML($xsl);
     $xpath = new DOMXPath($dom);
     // Collect the values of all the "match", "select" and "test" attributes of XSL elements
     $query = '//xsl:*/@*[contains("matchselectest", name())]';
     foreach ($xpath->query($query) as $attribute) {
         $exprs[] = $attribute->value;
     }
     // Collect the XPath expressions used in all the attributes of non-XSL elements
     $query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]/@*';
     foreach ($xpath->query($query) as $attribute) {
         foreach (AVTHelper::parse($attribute->value) as $token) {
             if ($token[0] === 'expression') {
                 $exprs[] = $token[1];
             }
         }
     }
     // Names of the meta elements
     $tagNames = ['e' => true, 'i' => true, 's' => true];
     // In the highly unlikely event the meta elements are rendered, we remove them from the list
     foreach (array_keys($tagNames) as $tagName) {
         if (isset($templates[$tagName]) && $templates[$tagName] !== '') {
             unset($tagNames[$tagName]);
         }
     }
     // Create a regexp that matches the tag names used as element names, e.g. "s" in "//s" but
     // not in "@s" or "$s"
     $regexp = '(\\b(?<![$@])(' . implode('|', array_keys($tagNames)) . ')(?!-)\\b)';
     // Now look into all of the expressions that we've collected
     preg_match_all($regexp, implode("\n", $exprs), $m);
     foreach ($m[0] as $tagName) {
         unset($tagNames[$tagName]);
     }
     if (empty($tagNames)) {
         // Always-false regexp
         return '((?!))';
     }
     return '(<' . RegexpBuilder::fromList(array_keys($tagNames)) . '>[^<]*</[^>]+>)';
 }