/**
  * @return array
  */
 public function asConfig()
 {
     if (!count($this->collection)) {
         return;
     }
     // Grab the emoticons from the collection
     $codes = array_keys(iterator_to_array($this->collection));
     // Build the regexp used to match emoticons
     $regexp = '/';
     if ($this->notAfter !== '') {
         $regexp .= '(?<!' . $this->notAfter . ')';
     }
     $regexp .= RegexpBuilder::fromList($codes);
     if ($this->notBefore !== '') {
         $regexp .= '(?!' . $this->notBefore . ')';
     }
     $regexp .= '/S';
     // Set the Unicode mode if Unicode properties are used
     if (preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) {
         $regexp .= 'u';
     }
     // Force the regexp to use atomic grouping for performance
     $regexp = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp);
     // Prepare the config array
     $config = ['quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName];
     // If notAfter is used, we need to create a JavaScript-specific regexp that does not use a
     // lookbehind assertion, and we add the notAfter subpattern to the config as a variant
     if ($this->notAfter !== '') {
         // Skip the first assertion by skipping the first N characters, where N equals the
         // length of $this->notAfter plus 1 for the first "/" and 5 for "(?<!)"
         $lpos = 6 + strlen($this->notAfter);
         $rpos = strrpos($regexp, '/');
         $jsRegexp = RegexpConvertor::toJS('/' . substr($regexp, $lpos, $rpos - $lpos) . '/', true);
         $config['regexp'] = new Variant($regexp);
         $config['regexp']->set('JS', $jsRegexp);
         $config['notAfter'] = new Variant();
         $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/'));
     }
     // Try to find a quickMatch if none is set
     if ($this->quickMatch === false) {
         $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes);
     }
     return $config;
 }
 public function asConfig()
 {
     $words = $this->getWords();
     if (empty($words)) {
         return;
     }
     $config = array('attrName' => $this->attrName, 'regexp' => $this->getWordsRegexp(\array_keys($words)), 'tagName' => $this->tagName);
     $replacementWords = array();
     foreach ($words as $word => $replacement) {
         if (isset($replacement) && $replacement !== $this->defaultReplacement) {
             $replacementWords[$replacement][] = $word;
         }
     }
     foreach ($replacementWords as $replacement => $words) {
         $wordsRegexp = '/^' . RegexpBuilder::fromList($words, $this->regexpOptions) . '$/Diu';
         $regexp = new Regexp($wordsRegexp);
         $regexp->setJS(RegexpConvertor::toJS(\str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $wordsRegexp)));
         $config['replacements'][] = array($regexp, $replacement);
     }
     if (!empty($this->allowed)) {
         $config['allowed'] = $this->getWordsRegexp(\array_keys($this->allowed));
     }
     return $config;
 }
Beispiel #3
0
 protected function getWordsRegexp(array $words)
 {
     $expr = RegexpBuilder::fromList($words, $this->regexpOptions);
     $expr = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr);
     $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu';
     $jsRegexp = '/(?:^|\\W)' . \str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi';
     return new Variant($regexp, array('JS' => RegexpConvertor::toJS($jsRegexp, \true)));
 }
 /**
  * Generate a regexp that matches the given list of words
  *
  * @param  array   $words List of words
  * @return Variant        Regexp in a Variant container, with a JS variant
  */
 protected function getWordsRegexp(array $words)
 {
     $expr = RegexpBuilder::fromList($words, $this->regexpOptions);
     // Force atomic grouping for performance. Theorically it could prevent some matches but in
     // practice it shouldn't happen
     $expr = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr);
     // JavaScript regexps don't support Unicode properties, so instead of Unicode letters
     // we'll accept any non-whitespace, non-common punctuation
     $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu';
     $jsRegexp = '/(?:^|\\W)' . str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi';
     return new Variant($regexp, ['JS' => RegexpConvertor::toJS($jsRegexp, true)]);
 }
Beispiel #5
0
    protected function getPluginsConfig()
    {
        $plugins = new Dictionary();
        foreach ($this->config['plugins'] as $pluginName => $pluginConfig) {
            if (!isset($pluginConfig['parser'])) {
                continue;
            }
            unset($pluginConfig['className']);
            if (isset($pluginConfig['quickMatch'])) {
                $valid = array('[[:ascii:]]', '[\\xC0-\\xDF][\\x80-\\xBF]', '[\\xE0-\\xEF][\\x80-\\xBF]{2}', '[\\xF0-\\xF7][\\x80-\\xBF]{3}');
                $regexp = '#(?>' . \implode('|', $valid) . ')+#';
                if (\preg_match($regexp, $pluginConfig['quickMatch'], $m)) {
                    $pluginConfig['quickMatch'] = $m[0];
                } else {
                    unset($pluginConfig['quickMatch']);
                }
            }
            $globalKeys = array('parser' => 1, 'quickMatch' => 1, 'regexp' => 1, 'regexpLimit' => 1);
            $globalConfig = \array_intersect_key($pluginConfig, $globalKeys);
            $localConfig = \array_diff_key($pluginConfig, $globalKeys);
            if (isset($globalConfig['regexp']) && !$globalConfig['regexp'] instanceof Code) {
                $globalConfig['regexp'] = RegexpConvertor::toJS($globalConfig['regexp'], \true);
            }
            $globalConfig['parser'] = new Code('/**
				* @param {!string} text
				* @param {!Array.<Array>} matches
				*/
				function(text, matches)
				{
					/** @const */
					var config=' . $this->encode($localConfig) . ';
					' . $globalConfig['parser'] . '
				}');
            $plugins[$pluginName] = $globalConfig;
        }
        return $plugins;
    }
Beispiel #6
0
 public function asConfig()
 {
     if (!\count($this->collection)) {
         return;
     }
     $codes = \array_keys(\iterator_to_array($this->collection));
     $regexp = '/';
     if ($this->notAfter !== '') {
         $regexp .= '(?<!' . $this->notAfter . ')';
     }
     $regexp .= RegexpBuilder::fromList($codes);
     if ($this->notBefore !== '') {
         $regexp .= '(?!' . $this->notBefore . ')';
     }
     $regexp .= '/S';
     if (\preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) {
         $regexp .= 'u';
     }
     $regexp = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp);
     $config = array('quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName);
     if ($this->notAfter !== '') {
         $lpos = 6 + \strlen($this->notAfter);
         $rpos = \strrpos($regexp, '/');
         $jsRegexp = RegexpConvertor::toJS('/' . \substr($regexp, $lpos, $rpos - $lpos) . '/', \true);
         $config['regexp'] = new Variant($regexp);
         $config['regexp']->set('JS', $jsRegexp);
         $config['notAfter'] = new Variant();
         $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/'));
     }
     if ($this->quickMatch === \false) {
         $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes);
     }
     return $config;
 }
 /**
  * @testdox toJS() escapes backslashes that precede literal line terminators
  */
 public function testBackslashBeforeLiteralLineTerminators()
 {
     $this->assertEquals('/\\\\\\n/', RegexpConvertor::toJS("/\\\n/"));
 }
Beispiel #8
0
    /**
     * Return the plugins' config
     *
     * @return Dictionary
     */
    protected function getPluginsConfig()
    {
        $plugins = new Dictionary();
        foreach ($this->config['plugins'] as $pluginName => $pluginConfig) {
            if (!isset($pluginConfig['parser'])) {
                // Skip this plugin
                continue;
            }
            // Not needed in JavaScript
            unset($pluginConfig['className']);
            // Ensure that quickMatch is UTF-8 if present
            if (isset($pluginConfig['quickMatch'])) {
                // Well-formed UTF-8 sequences
                $valid = ['[[:ascii:]]', '[\\xC0-\\xDF][\\x80-\\xBF]', '[\\xE0-\\xEF][\\x80-\\xBF]{2}', '[\\xF0-\\xF7][\\x80-\\xBF]{3}'];
                $regexp = '#(?>' . implode('|', $valid) . ')+#';
                // Keep only the first valid sequence of UTF-8, or unset quickMatch if none is found
                if (preg_match($regexp, $pluginConfig['quickMatch'], $m)) {
                    $pluginConfig['quickMatch'] = $m[0];
                } else {
                    unset($pluginConfig['quickMatch']);
                }
            }
            /**
             * @var array Keys of elements that are kept in the global scope. Everything else will be
             *            moved into the plugin's parser
             */
            $globalKeys = ['parser' => 1, 'quickMatch' => 1, 'regexp' => 1, 'regexpLimit' => 1];
            $globalConfig = array_intersect_key($pluginConfig, $globalKeys);
            $localConfig = array_diff_key($pluginConfig, $globalKeys);
            if (isset($globalConfig['regexp']) && !$globalConfig['regexp'] instanceof Code) {
                $globalConfig['regexp'] = RegexpConvertor::toJS($globalConfig['regexp'], true);
            }
            $globalConfig['parser'] = new Code('/**
				* @param {!string} text
				* @param {!Array.<Array>} matches
				*/
				function(text, matches)
				{
					/** @const */
					var config=' . $this->encode($localConfig) . ';
					' . $globalConfig['parser'] . '
				}');
            $plugins[$pluginName] = $globalConfig;
        }
        return $plugins;
    }
Beispiel #9
0
 /**
  * Return this regexp as JavaScript code
  *
  * @return \s9e\TextFormatter\Configurator\JavaScript\Code
  */
 public function toJS()
 {
     return RegexpConvertor::toJS($this->regexp, $this->isGlobal);
 }