Exemplo n.º 1
0
/**
 * Merges several regular expressions into one, using the indicated 'glue'.
 *
 * This function takes care of individual modifiers so it's safe to use
 * <i>different</i> modifiers on the individual expressions. The order of
 * sub-matches is preserved as well. Numbered back-references are adapted to
 * the new overall sub-match count. This means that it's safe to use numbered
 * back-refences in the individual expressions!
 * If {@link $names} is given, the individual expressions are captured in
 * named sub-matches using the contents of that array as names.
 * Matching pair-delimiters (e.g. <var>"{…}"</var>) are currently
 * <b>not</b> supported.
 *
 * The function assumes that all regular expressions are well-formed.
 * Behaviour is undefined if they aren't.
 *
 * This function was created after a
 * {@link http://stackoverflow.com/questions/244959/ StackOverflow discussion}.
 * Much of it was written or thought of by “porneL” and “eyelidlessness”. Many
 * thanks to both of them.
 *
 * @param string $glue  A string to insert between the individual expressions.
 *      This should usually be either the empty string, indicating
 *      concatenation, or the pipe (<var>"|"</var>), indicating alternation.
 *      Notice that this string might have to be escaped since it is treated
 *      as a normal character in a regular expression (i.e. <var>"/"</var> will
 *      end the expression and result in an invalid output).
 * @param array $expressions    The expressions to merge. The expressions may
 *      have arbitrary different delimiters and modifiers.
 * @param array $names  Optional. This is either an empty array or an array of
 *      strings of the same length as {@link $expressions}. In that case,
 *      the strings of this array are used to create named sub-matches for the
 *      expressions.
 * @return string An string representing a regular expression equivalent to the
 *      merged expressions. Returns <var>FALSE</var> if an error occurred.
 */
function preg_merge($glue, array $expressions, array $names = array())
{
    // … then, a miracle occurs.
    // Sanity check …
    $use_names = ($names !== null and count($names) !== 0);
    if ($use_names and count($names) !== count($expressions) or !is_string($glue)) {
        return false;
    }
    $result = array();
    // For keeping track of the names for sub-matches.
    $names_count = 0;
    // For keeping track of *all* captures to re-adjust backreferences.
    $capture_count = 0;
    foreach ($expressions as $expression) {
        if ($use_names) {
            $name = str_replace(' ', '_', $names[$names_count++]);
        }
        // Get delimiters and modifiers:
        $stripped = preg_strip($expression);
        if ($stripped === false) {
            return false;
        }
        list($sub_expr, $modifiers) = $stripped;
        // Re-adjust backreferences:
        // We assume that the expression is correct and therefore don't check
        // for matching parentheses.
        $number_of_captures = preg_match_all('/\\([^?]|\\(\\?[^:]/', $sub_expr, $_);
        if ($number_of_captures === false) {
            return false;
        }
        if ($number_of_captures > 0) {
            // NB: This looks NP-hard. Consider replacing.
            $backref_expr = '/
                (                # Only match when not escaped:
                    [^\\\\]      # guarantee an even number of backslashes
                    (\\\\*?)\\2  # (twice n, preceded by something else).
                )
                \\\\ (\\d)        # Backslash followed by a digit.
            /x';
            $sub_expr = preg_replace_callback($backref_expr, create_function('$m', 'return $m[1] . "\\\\" . ((int)$m[3] + ' . $capture_count . ');'), $sub_expr);
            $capture_count += $number_of_captures;
        }
        // Last, construct the new sub-match:
        $modifiers = implode('', $modifiers);
        $sub_modifiers = "(?{$modifiers})";
        if ($sub_modifiers === '(?)') {
            $sub_modifiers = '';
        }
        $sub_name = $use_names ? "?<{$name}>" : '?:';
        $new_expr = "({$sub_name}{$sub_modifiers}{$sub_expr})";
        $result[] = $new_expr;
    }
    return '/' . implode($glue, $result) . '/';
}
Exemplo n.º 2
0
/**
 * Merges several regular expressions into one, using the indicated 'glue'.
 *
 * This function takes care of individual modifiers so it's safe to use
 * <i>different</i> modifiers on the individual expressions. The order of
 * sub-matches is preserved as well. Numbered back-references are adapted to
 * the new overall sub-match count. This means that it's safe to use numbered
 * back-refences in the individual expressions!
 * If {@link $names} is given, the individual expressions are captured in
 * named sub-matches using the contents of that array as names.
 * Matching pair-delimiters (e.g. <var>"{…}"</var>) are currently
 * <b>not</b> supported.
 *
 * The function assumes that all regular expressions are well-formed.
 * Behaviour is undefined if they aren't.
 *
 * This function was created after a
 * {@link http://stackoverflow.com/questions/244959/ StackOverflow discussion}.
 * Much of it was written or thought of by “porneL” and “eyelidlessness”. Many
 * thanks to both of them.
 *
 * @param string $glue  A string to insert between the individual expressions.
 *      This should usually be either the empty string, indicating
 *      concatenation, or the pipe (<var>"|"</var>), indicating alternation.
 *      Notice that this string might have to be escaped since it is treated
 *      as a normal character in a regular expression (i.e. <var>"/"</var> will
 *      end the expression and result in an invalid output).
 * @param array $expressions    The expressions to merge. The expressions may
 *      have arbitrary different delimiters and modifiers.
 * @param array $names  Optional. This is either an empty array or an array of
 *      strings of the same length as {@link $expressions}. In that case,
 *      the strings of this array are used to create named sub-matches for the
 *      expressions.
 * @return string An string representing a regular expression equivalent to the
 *      merged expressions. Returns <var>FALSE</var> if an error occurred.
 */
function preg_merge($glue, array $expressions, array $names = array())
{
    // … then, a miracle occurs.
    // Sanity check …
    $use_names = ($names !== null and count($names) !== 0);
    if ($use_names and count($names) !== count($expressions) or !is_string($glue)) {
        return false;
    }
    $result = array();
    // For keeping track of the names for sub-matches.
    $names_count = 0;
    // For keeping track of *all* captures to re-adjust backreferences.
    $capture_count = 0;
    foreach ($expressions as $expression) {
        if ($use_names) {
            $name = str_replace(' ', '_', $names[$names_count++]);
        }
        // Get delimiters and modifiers:
        $stripped = preg_strip($expression);
        if ($stripped === false) {
            return false;
        }
        list($sub_expr, $modifiers) = $stripped;
        // Re-adjust backreferences:
        // TODO What about \R backreferences (\0 isn't allowed, though)?
        // We assume that the expression is correct and therefore don't check
        // for matching parentheses.
        $number_of_captures = preg_match_all('/\\([^?]|\\(\\?[^:]/', $sub_expr, $_);
        if ($number_of_captures === false) {
            return false;
        }
        if ($number_of_captures > 0) {
            $backref_expr = '/
                (?<!\\\\)        # Not preceded by a backslash,
                ((?:\\\\\\\\)*?) # zero or more escaped backslashes,
                \\\\ (\\d+)       # followed by backslash plus digits.
            /x';
            $sub_expr = preg_replace_callback($backref_expr, create_function('$m', 'return $m[1] . "\\\\" . ((int)$m[2] + ' . $capture_count . ');'), $sub_expr);
            $capture_count += $number_of_captures;
        }
        // Last, construct the new sub-match:
        $modifiers = implode('', $modifiers);
        $sub_modifiers = "(?{$modifiers})";
        if ($sub_modifiers === '(?)') {
            $sub_modifiers = '';
        }
        $sub_name = $use_names ? "?<{$name}>" : '?:';
        $new_expr = "({$sub_name}{$sub_modifiers}{$sub_expr})";
        $result[] = $new_expr;
    }
    return '/' . implode($glue, $result) . '/';
}
Exemplo n.º 3
0
 public function __construct()
 {
     $this->setInfo(array(parent::NAME => 'CSS', parent::VERSION => '0.8', parent::AUTHOR => array(parent::NAME => 'Konrad Rudolph', parent::WEBSITE => 'madrat.net', parent::EMAIL => '*****@*****.**')));
     $this->setExtensions(array('css'));
     // The following does not conform to the specs but it is necessary
     // else numbers wouldn't be recognized any more.
     $nmstart = '-?[a-z]';
     $nmchar = '[a-z0-9-]';
     $hex = '[0-9a-f]';
     list($string, $strmod) = preg_strip(Rule::STRING);
     $strmod = implode('', $strmod);
     $this->addStates(array('init' => array('comment', 'uri', 'meta', 'id', 'class', 'pseudoclass', 'element', 'block', 'constraint', 'string'), 'block' => array('comment', 'attribute', 'value'), 'constraint' => array('identifier', 'string'), 'value' => array('comment', 'string', 'color', 'number', 'uri', 'identifier', 'important')));
     $this->addRules(array('attribute' => "/{$nmstart}{$nmchar}*/i", 'value' => new Rule('/:/', '/;|(?=\\})/'), 'comment' => Rule::C_MULTILINECOMMENT, 'meta' => "/@{$nmstart}{$nmchar}*/i", 'id' => "/#{$nmstart}{$nmchar}*/i", 'class' => "/\\.{$nmstart}{$nmchar}*/", 'pseudoclass' => "/(?<!:):{$nmstart}{$nmchar}*/", 'element' => "/{$nmstart}{$nmchar}*/i", 'block' => new Rule('/\\{/', '/\\}/'), 'constraint' => new Rule('/\\[/', '/\\]/'), 'number' => '/[+-]?(?:\\d+(\\.\\d+)?|\\d*\\.\\d+)(%|em|ex|px|pt|in|cm|mm|pc|deg|g?rad|m?s|k?Hz)?/', 'uri' => "/url\\(\\s*(?:{$string}|[^\\)]*)\\s*\\)/{$strmod}", 'identifier' => "/{$nmstart}{$nmchar}*/i", 'string' => "/{$string}/{$strmod}", 'color' => "/#{$hex}{3}(?:{$hex}{3})?/i", 'important' => '/!\\s*important/'));
     $this->addMappings(array('element' => 'keyword', 'id' => 'keyword type', 'class' => 'keyword builtin', 'pseudoclass' => 'preprocessor', 'block' => '', 'constraint' => '', 'value' => '', 'color' => 'string', 'uri' => 'char', 'meta' => 'keyword'));
 }