/** * Merges several regular expressions into one, using the indicated 'glue'. * * This function takes care of individual modifiers so it's safe to use * <i>different</i> modifiers on the individual expressions. The order of * sub-matches is preserved as well. Numbered back-references are adapted to * the new overall sub-match count. This means that it's safe to use numbered * back-refences in the individual expressions! * If {@link $names} is given, the individual expressions are captured in * named sub-matches using the contents of that array as names. * Matching pair-delimiters (e.g. <var>"{…}"</var>) are currently * <b>not</b> supported. * * The function assumes that all regular expressions are well-formed. * Behaviour is undefined if they aren't. * * This function was created after a * {@link http://stackoverflow.com/questions/244959/ StackOverflow discussion}. * Much of it was written or thought of by “porneL” and “eyelidlessness”. Many * thanks to both of them. * * @param string $glue A string to insert between the individual expressions. * This should usually be either the empty string, indicating * concatenation, or the pipe (<var>"|"</var>), indicating alternation. * Notice that this string might have to be escaped since it is treated * as a normal character in a regular expression (i.e. <var>"/"</var> will * end the expression and result in an invalid output). * @param array $expressions The expressions to merge. The expressions may * have arbitrary different delimiters and modifiers. * @param array $names Optional. This is either an empty array or an array of * strings of the same length as {@link $expressions}. In that case, * the strings of this array are used to create named sub-matches for the * expressions. * @return string An string representing a regular expression equivalent to the * merged expressions. Returns <var>FALSE</var> if an error occurred. */ function preg_merge($glue, array $expressions, array $names = array()) { // … then, a miracle occurs. // Sanity check … $use_names = ($names !== null and count($names) !== 0); if ($use_names and count($names) !== count($expressions) or !is_string($glue)) { return false; } $result = array(); // For keeping track of the names for sub-matches. $names_count = 0; // For keeping track of *all* captures to re-adjust backreferences. $capture_count = 0; foreach ($expressions as $expression) { if ($use_names) { $name = str_replace(' ', '_', $names[$names_count++]); } // Get delimiters and modifiers: $stripped = preg_strip($expression); if ($stripped === false) { return false; } list($sub_expr, $modifiers) = $stripped; // Re-adjust backreferences: // We assume that the expression is correct and therefore don't check // for matching parentheses. $number_of_captures = preg_match_all('/\\([^?]|\\(\\?[^:]/', $sub_expr, $_); if ($number_of_captures === false) { return false; } if ($number_of_captures > 0) { // NB: This looks NP-hard. Consider replacing. $backref_expr = '/ ( # Only match when not escaped: [^\\\\] # guarantee an even number of backslashes (\\\\*?)\\2 # (twice n, preceded by something else). ) \\\\ (\\d) # Backslash followed by a digit. /x'; $sub_expr = preg_replace_callback($backref_expr, create_function('$m', 'return $m[1] . "\\\\" . ((int)$m[3] + ' . $capture_count . ');'), $sub_expr); $capture_count += $number_of_captures; } // Last, construct the new sub-match: $modifiers = implode('', $modifiers); $sub_modifiers = "(?{$modifiers})"; if ($sub_modifiers === '(?)') { $sub_modifiers = ''; } $sub_name = $use_names ? "?<{$name}>" : '?:'; $new_expr = "({$sub_name}{$sub_modifiers}{$sub_expr})"; $result[] = $new_expr; } return '/' . implode($glue, $result) . '/'; }
/** * Merges several regular expressions into one, using the indicated 'glue'. * * This function takes care of individual modifiers so it's safe to use * <i>different</i> modifiers on the individual expressions. The order of * sub-matches is preserved as well. Numbered back-references are adapted to * the new overall sub-match count. This means that it's safe to use numbered * back-refences in the individual expressions! * If {@link $names} is given, the individual expressions are captured in * named sub-matches using the contents of that array as names. * Matching pair-delimiters (e.g. <var>"{…}"</var>) are currently * <b>not</b> supported. * * The function assumes that all regular expressions are well-formed. * Behaviour is undefined if they aren't. * * This function was created after a * {@link http://stackoverflow.com/questions/244959/ StackOverflow discussion}. * Much of it was written or thought of by “porneL” and “eyelidlessness”. Many * thanks to both of them. * * @param string $glue A string to insert between the individual expressions. * This should usually be either the empty string, indicating * concatenation, or the pipe (<var>"|"</var>), indicating alternation. * Notice that this string might have to be escaped since it is treated * as a normal character in a regular expression (i.e. <var>"/"</var> will * end the expression and result in an invalid output). * @param array $expressions The expressions to merge. The expressions may * have arbitrary different delimiters and modifiers. * @param array $names Optional. This is either an empty array or an array of * strings of the same length as {@link $expressions}. In that case, * the strings of this array are used to create named sub-matches for the * expressions. * @return string An string representing a regular expression equivalent to the * merged expressions. Returns <var>FALSE</var> if an error occurred. */ function preg_merge($glue, array $expressions, array $names = array()) { // … then, a miracle occurs. // Sanity check … $use_names = ($names !== null and count($names) !== 0); if ($use_names and count($names) !== count($expressions) or !is_string($glue)) { return false; } $result = array(); // For keeping track of the names for sub-matches. $names_count = 0; // For keeping track of *all* captures to re-adjust backreferences. $capture_count = 0; foreach ($expressions as $expression) { if ($use_names) { $name = str_replace(' ', '_', $names[$names_count++]); } // Get delimiters and modifiers: $stripped = preg_strip($expression); if ($stripped === false) { return false; } list($sub_expr, $modifiers) = $stripped; // Re-adjust backreferences: // TODO What about \R backreferences (\0 isn't allowed, though)? // We assume that the expression is correct and therefore don't check // for matching parentheses. $number_of_captures = preg_match_all('/\\([^?]|\\(\\?[^:]/', $sub_expr, $_); if ($number_of_captures === false) { return false; } if ($number_of_captures > 0) { $backref_expr = '/ (?<!\\\\) # Not preceded by a backslash, ((?:\\\\\\\\)*?) # zero or more escaped backslashes, \\\\ (\\d+) # followed by backslash plus digits. /x'; $sub_expr = preg_replace_callback($backref_expr, create_function('$m', 'return $m[1] . "\\\\" . ((int)$m[2] + ' . $capture_count . ');'), $sub_expr); $capture_count += $number_of_captures; } // Last, construct the new sub-match: $modifiers = implode('', $modifiers); $sub_modifiers = "(?{$modifiers})"; if ($sub_modifiers === '(?)') { $sub_modifiers = ''; } $sub_name = $use_names ? "?<{$name}>" : '?:'; $new_expr = "({$sub_name}{$sub_modifiers}{$sub_expr})"; $result[] = $new_expr; } return '/' . implode($glue, $result) . '/'; }
public function __construct() { $this->setInfo(array(parent::NAME => 'CSS', parent::VERSION => '0.8', parent::AUTHOR => array(parent::NAME => 'Konrad Rudolph', parent::WEBSITE => 'madrat.net', parent::EMAIL => '*****@*****.**'))); $this->setExtensions(array('css')); // The following does not conform to the specs but it is necessary // else numbers wouldn't be recognized any more. $nmstart = '-?[a-z]'; $nmchar = '[a-z0-9-]'; $hex = '[0-9a-f]'; list($string, $strmod) = preg_strip(Rule::STRING); $strmod = implode('', $strmod); $this->addStates(array('init' => array('comment', 'uri', 'meta', 'id', 'class', 'pseudoclass', 'element', 'block', 'constraint', 'string'), 'block' => array('comment', 'attribute', 'value'), 'constraint' => array('identifier', 'string'), 'value' => array('comment', 'string', 'color', 'number', 'uri', 'identifier', 'important'))); $this->addRules(array('attribute' => "/{$nmstart}{$nmchar}*/i", 'value' => new Rule('/:/', '/;|(?=\\})/'), 'comment' => Rule::C_MULTILINECOMMENT, 'meta' => "/@{$nmstart}{$nmchar}*/i", 'id' => "/#{$nmstart}{$nmchar}*/i", 'class' => "/\\.{$nmstart}{$nmchar}*/", 'pseudoclass' => "/(?<!:):{$nmstart}{$nmchar}*/", 'element' => "/{$nmstart}{$nmchar}*/i", 'block' => new Rule('/\\{/', '/\\}/'), 'constraint' => new Rule('/\\[/', '/\\]/'), 'number' => '/[+-]?(?:\\d+(\\.\\d+)?|\\d*\\.\\d+)(%|em|ex|px|pt|in|cm|mm|pc|deg|g?rad|m?s|k?Hz)?/', 'uri' => "/url\\(\\s*(?:{$string}|[^\\)]*)\\s*\\)/{$strmod}", 'identifier' => "/{$nmstart}{$nmchar}*/i", 'string' => "/{$string}/{$strmod}", 'color' => "/#{$hex}{3}(?:{$hex}{3})?/i", 'important' => '/!\\s*important/')); $this->addMappings(array('element' => 'keyword', 'id' => 'keyword type', 'class' => 'keyword builtin', 'pseudoclass' => 'preprocessor', 'block' => '', 'constraint' => '', 'value' => '', 'color' => 'string', 'uri' => 'char', 'meta' => 'keyword')); }