/** * @return array */ public function asConfig() { if (!count($this->collection)) { return; } // Grab the emoticons from the collection $codes = array_keys(iterator_to_array($this->collection)); // Build the regexp used to match emoticons $regexp = '/'; if ($this->notAfter !== '') { $regexp .= '(?<!' . $this->notAfter . ')'; } $regexp .= RegexpBuilder::fromList($codes); if ($this->notBefore !== '') { $regexp .= '(?!' . $this->notBefore . ')'; } $regexp .= '/S'; // Set the Unicode mode if Unicode properties are used if (preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) { $regexp .= 'u'; } // Force the regexp to use atomic grouping for performance $regexp = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp); // Prepare the config array $config = ['quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName]; // If notAfter is used, we need to create a JavaScript-specific regexp that does not use a // lookbehind assertion, and we add the notAfter subpattern to the config as a variant if ($this->notAfter !== '') { // Skip the first assertion by skipping the first N characters, where N equals the // length of $this->notAfter plus 1 for the first "/" and 5 for "(?<!)" $lpos = 6 + strlen($this->notAfter); $rpos = strrpos($regexp, '/'); $jsRegexp = RegexpConvertor::toJS('/' . substr($regexp, $lpos, $rpos - $lpos) . '/', true); $config['regexp'] = new Variant($regexp); $config['regexp']->set('JS', $jsRegexp); $config['notAfter'] = new Variant(); $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/')); } // Try to find a quickMatch if none is set if ($this->quickMatch === false) { $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes); } return $config; }
public function asConfig() { $words = $this->getWords(); if (empty($words)) { return; } $config = array('attrName' => $this->attrName, 'regexp' => $this->getWordsRegexp(\array_keys($words)), 'tagName' => $this->tagName); $replacementWords = array(); foreach ($words as $word => $replacement) { if (isset($replacement) && $replacement !== $this->defaultReplacement) { $replacementWords[$replacement][] = $word; } } foreach ($replacementWords as $replacement => $words) { $wordsRegexp = '/^' . RegexpBuilder::fromList($words, $this->regexpOptions) . '$/Diu'; $regexp = new Regexp($wordsRegexp); $regexp->setJS(RegexpConvertor::toJS(\str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $wordsRegexp))); $config['replacements'][] = array($regexp, $replacement); } if (!empty($this->allowed)) { $config['allowed'] = $this->getWordsRegexp(\array_keys($this->allowed)); } return $config; }
protected function getWordsRegexp(array $words) { $expr = RegexpBuilder::fromList($words, $this->regexpOptions); $expr = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr); $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu'; $jsRegexp = '/(?:^|\\W)' . \str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi'; return new Variant($regexp, array('JS' => RegexpConvertor::toJS($jsRegexp, \true))); }
/** * Generate a regexp that matches the given list of words * * @param array $words List of words * @return Variant Regexp in a Variant container, with a JS variant */ protected function getWordsRegexp(array $words) { $expr = RegexpBuilder::fromList($words, $this->regexpOptions); // Force atomic grouping for performance. Theorically it could prevent some matches but in // practice it shouldn't happen $expr = preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $expr); // JavaScript regexps don't support Unicode properties, so instead of Unicode letters // we'll accept any non-whitespace, non-common punctuation $regexp = '/(?<![\\pL\\pN])' . $expr . '(?![\\pL\\pN])/Siu'; $jsRegexp = '/(?:^|\\W)' . str_replace('[\\pL\\pN]', '[^\\s!-\\/:-?]', $expr) . '(?!\\w)/gi'; return new Variant($regexp, ['JS' => RegexpConvertor::toJS($jsRegexp, true)]); }
protected function getPluginsConfig() { $plugins = new Dictionary(); foreach ($this->config['plugins'] as $pluginName => $pluginConfig) { if (!isset($pluginConfig['parser'])) { continue; } unset($pluginConfig['className']); if (isset($pluginConfig['quickMatch'])) { $valid = array('[[:ascii:]]', '[\\xC0-\\xDF][\\x80-\\xBF]', '[\\xE0-\\xEF][\\x80-\\xBF]{2}', '[\\xF0-\\xF7][\\x80-\\xBF]{3}'); $regexp = '#(?>' . \implode('|', $valid) . ')+#'; if (\preg_match($regexp, $pluginConfig['quickMatch'], $m)) { $pluginConfig['quickMatch'] = $m[0]; } else { unset($pluginConfig['quickMatch']); } } $globalKeys = array('parser' => 1, 'quickMatch' => 1, 'regexp' => 1, 'regexpLimit' => 1); $globalConfig = \array_intersect_key($pluginConfig, $globalKeys); $localConfig = \array_diff_key($pluginConfig, $globalKeys); if (isset($globalConfig['regexp']) && !$globalConfig['regexp'] instanceof Code) { $globalConfig['regexp'] = RegexpConvertor::toJS($globalConfig['regexp'], \true); } $globalConfig['parser'] = new Code('/** * @param {!string} text * @param {!Array.<Array>} matches */ function(text, matches) { /** @const */ var config=' . $this->encode($localConfig) . '; ' . $globalConfig['parser'] . ' }'); $plugins[$pluginName] = $globalConfig; } return $plugins; }
public function asConfig() { if (!\count($this->collection)) { return; } $codes = \array_keys(\iterator_to_array($this->collection)); $regexp = '/'; if ($this->notAfter !== '') { $regexp .= '(?<!' . $this->notAfter . ')'; } $regexp .= RegexpBuilder::fromList($codes); if ($this->notBefore !== '') { $regexp .= '(?!' . $this->notBefore . ')'; } $regexp .= '/S'; if (\preg_match('/\\\\[pP](?>\\{\\^?\\w+\\}|\\w\\w?)/', $regexp)) { $regexp .= 'u'; } $regexp = \preg_replace('/(?<!\\\\)((?>\\\\\\\\)*)\\(\\?:/', '$1(?>', $regexp); $config = array('quickMatch' => $this->quickMatch, 'regexp' => $regexp, 'tagName' => $this->tagName); if ($this->notAfter !== '') { $lpos = 6 + \strlen($this->notAfter); $rpos = \strrpos($regexp, '/'); $jsRegexp = RegexpConvertor::toJS('/' . \substr($regexp, $lpos, $rpos - $lpos) . '/', \true); $config['regexp'] = new Variant($regexp); $config['regexp']->set('JS', $jsRegexp); $config['notAfter'] = new Variant(); $config['notAfter']->set('JS', RegexpConvertor::toJS('/' . $this->notAfter . '/')); } if ($this->quickMatch === \false) { $config['quickMatch'] = ConfigHelper::generateQuickMatchFromList($codes); } return $config; }
/** * @testdox toJS() escapes backslashes that precede literal line terminators */ public function testBackslashBeforeLiteralLineTerminators() { $this->assertEquals('/\\\\\\n/', RegexpConvertor::toJS("/\\\n/")); }
/** * Return the plugins' config * * @return Dictionary */ protected function getPluginsConfig() { $plugins = new Dictionary(); foreach ($this->config['plugins'] as $pluginName => $pluginConfig) { if (!isset($pluginConfig['parser'])) { // Skip this plugin continue; } // Not needed in JavaScript unset($pluginConfig['className']); // Ensure that quickMatch is UTF-8 if present if (isset($pluginConfig['quickMatch'])) { // Well-formed UTF-8 sequences $valid = ['[[:ascii:]]', '[\\xC0-\\xDF][\\x80-\\xBF]', '[\\xE0-\\xEF][\\x80-\\xBF]{2}', '[\\xF0-\\xF7][\\x80-\\xBF]{3}']; $regexp = '#(?>' . implode('|', $valid) . ')+#'; // Keep only the first valid sequence of UTF-8, or unset quickMatch if none is found if (preg_match($regexp, $pluginConfig['quickMatch'], $m)) { $pluginConfig['quickMatch'] = $m[0]; } else { unset($pluginConfig['quickMatch']); } } /** * @var array Keys of elements that are kept in the global scope. Everything else will be * moved into the plugin's parser */ $globalKeys = ['parser' => 1, 'quickMatch' => 1, 'regexp' => 1, 'regexpLimit' => 1]; $globalConfig = array_intersect_key($pluginConfig, $globalKeys); $localConfig = array_diff_key($pluginConfig, $globalKeys); if (isset($globalConfig['regexp']) && !$globalConfig['regexp'] instanceof Code) { $globalConfig['regexp'] = RegexpConvertor::toJS($globalConfig['regexp'], true); } $globalConfig['parser'] = new Code('/** * @param {!string} text * @param {!Array.<Array>} matches */ function(text, matches) { /** @const */ var config=' . $this->encode($localConfig) . '; ' . $globalConfig['parser'] . ' }'); $plugins[$pluginName] = $globalConfig; } return $plugins; }
/** * Return this regexp as JavaScript code * * @return \s9e\TextFormatter\Configurator\JavaScript\Code */ public function toJS() { return RegexpConvertor::toJS($this->regexp, $this->isGlobal); }