Exemplo n.º 1
0
 /**
  * @brief Performs a search for the given pattern past the given index.
  * @param $search the pattern to search for
  * @param $index the minimum string index (offset) of a result
  * @param $matches a reference to the return location of the match groups
  * @return the index or false if no match is found.
  */
 public function match($search, $index, &$matches)
 {
     $r = false;
     // return value
     if (isset($this->cache[$search])) {
         $a = $this->cache[$search];
         if ($a === false) {
             return false;
         }
         // no more results
         $r = $a[0];
         $matches = $a[1];
         assert($matches !== null);
         if ($r >= $index) {
             // cache is good!
             return $r;
         }
     }
     // cache not set, or out of date, we have to perform the match
     if (!($ret = preg_match($search, $this->string, $matches_, PREG_OFFSET_CAPTURE, $index))) {
         if ($ret === false && LUMINOUS_DEBUG) {
             throw new Exception('preg_match returned false for pattern: "' . $search . '", with code: ' . LuminousUtils::pcre_error_decode(preg_last_error()));
         }
         $this->cache[$search] = false;
         return false;
     }
     $r = $matches_[0][1];
     // strip the offsets from the match_groups
     foreach ($matches_ as $i => &$v) {
         $v = $v[0];
     }
     $this->cache[$search] = array($r, $matches_);
     $matches = $matches_;
     return $r;
 }
Exemplo n.º 2
0
function _test_escape_token($token)
{
    $escaped = LuminousUtils::escape_token($token);
    // name should be unchanged
    assert($token[0] === $escaped[0]);
    $expected = $token[2] ? $token[1] : LuminousUtils::escape_string($token[1]);
    assert($escaped[1] === $expected);
    assert($escaped[2]);
}
Exemplo n.º 3
0
 static function str_filter($token)
 {
     if (strpos($token[1], '~') == false) {
         return $token;
     }
     $token = LuminousUtils::escape_token($token);
     $token[1] = preg_replace('/~(?:\\d+|.)/', '<INTERPOLATION>$0</INTERPOLATION>', $token[1]);
     return $token;
 }
Exemplo n.º 4
0
 static function comment_filter($token)
 {
     $token = LuminousUtils::escape_token($token);
     $str =& $token[1];
     // It pays to run the strpos checks first.
     if (strpos(substr($str, 1), '"') !== false) {
         $str = preg_replace('/(?<!^)"(?>[^"]*)"/', "<STRING>\$0</STRING>", $str);
     }
     if (strpos($str, ':') !== false) {
         $str = preg_replace('/(?<=^")((?>\\W*))((?>[A-Z]\\w+(?>(?>\\s+\\w+)*)))(:\\s*)(.*)/', '$1<DOCTAG>$2</DOCTAG>$3<DOCSTR>$4</DOCSTR>', $str);
     }
     return $token;
 }
Exemplo n.º 5
0
 static function str_filter($token)
 {
     if ($token[1][0] !== '"' && $token[0] !== 'HEREDOC') {
         return $token;
     } elseif (strpos($token[1], '$') === false) {
         return $token;
     }
     $token = LuminousUtils::escape_token($token);
     // matches $var, ${var} and {$var} syntax
     $token[1] = preg_replace('/
   (?: \\$\\{ | \\{\\$ ) [^}]++ \\}
   |
   \\$\\$?[a-zA-Z_]\\w*
   /x', '<VARIABLE>$0</VARIABLE>', $token[1]);
     return $token;
 }
Exemplo n.º 6
0
 public static function string_filter($token)
 {
     $token = LuminousUtils::escape_token($token);
     $token[1] = preg_replace("/\\\$(?:\\w+|\\{[^}\n]+\\})/", '<VARIABLE>$0</VARIABLE>', $token[1]);
     return $token;
 }
Exemplo n.º 7
0
 function str_override($matches)
 {
     $this->pos($this->pos() + strlen($matches[0]));
     $this->record($matches[0], 'DELIMITER');
     $f = $matches[1];
     $type = 'STRING';
     if ($f === 'm' || $f === 'qr' || $f === 's' || $f === 'tr' || $f === 'y') {
         $type = 'REGEX';
     } elseif ($f === 'qw') {
         $type = 'SPLIT_STRING';
     }
     $this->consume_string($matches[3], $type);
     if ($f === 's' || $f === 'tr' || $f === 'y') {
         // s/tr/y take two strings, e.g. s/something/somethingelse/, so we
         // have to consume the next delimiter (if it exists) and consume the
         // string, again.
         // if delims were balanced, there's a new delimiter right here, e.g.
         // s[something][somethingelse]
         $this->skip_whitespace();
         $balanced = LuminousUtils::balance_delimiter($matches[3]) !== $matches[3];
         if ($balanced) {
             $delim2 = $this->scan('/[^a-zA-Z0-9]/');
             if ($delim2 !== null) {
                 $this->record($delim2, 'DELIMITER');
                 $this->consume_string($delim2, 'STRING');
             }
         } else {
             $this->consume_string($matches[3], 'STRING');
         }
     }
     if ($type === 'REGEX' && $this->scan('/[cgimosxpe]+/')) {
         $this->record($this->match(), 'KEYWORD');
     }
 }
Exemplo n.º 8
0
 function main()
 {
     // we're aiming to handle context, unified and normal diff all at once here
     // because it doesn't really seem that hard.
     $child = null;
     $last_index = -1;
     while (!$this->eos()) {
         $index = $this->pos();
         assert($index > $last_index);
         $last_index = $index;
         assert($this->bol());
         $tok = null;
         if ($this->scan('/diff\\s.*$/m') !== null) {
             $tok = 'KEYWORD';
         } elseif ($this->scan($this->patterns['range']) !== null) {
             $tok = 'DIFF_RANGE';
         } elseif ($this->scan("/-{3}[ \t]*\$/m")) {
             $tok = null;
         } elseif ($this->scan('/(?:\\**|=*|\\w.*)$/m') !== null) {
             $tok = 'KEYWORD';
         } elseif ($this->scan("@[+\\-\\*]{3}(\\s+([^\\s]*)([ \t]|\$))?.*@m") !== null) {
             $m = $this->match_groups();
             // unified uses +++, context uses *
             if ($m[0][0] === '+' || $m[0][0] === '*') {
                 $tok = 'DIFF_HEADER_NEW';
             } else {
                 $tok = 'DIFF_HEADER_OLD';
             }
             if (isset($m[2])) {
                 $filename = preg_replace('@.*\\\\/@', '', $m[2]);
                 $child = self::get_child_scanner($filename);
             }
         } elseif ($this->scan('/\\\\.*/') !== null) {
             $tok = null;
         } elseif ($this->scan($this->patterns['codeblock']) !== null) {
             // this is actual source code.
             // we're going to format this here.
             // we're going to extract the block, and try to re-assemble it as
             // verbatim code, then highlight it via a child scanner, then split up
             // the lines, re-apply the necessary prefixes (e.g. + or -) to them,
             // and store them as being a DIFF_ token.
             // we have to do it like this, rather than line by line, otherwise
             // multiline tokens aren't going to work properly. There's stilla  risk
             // that the diff will be fragmented such the child scanner gets it
             // wrong but that can't be helped.
             // TODO restructure this so the complicated bits aren't done if there's
             // no child scanner to pass it down to
             $block = $this->match();
             if (!strlen($block)) {
                 assert(0);
             }
             $lines = explode("\n", $block);
             $verbatim = array();
             $verbatim_ = '';
             $types = array();
             $prefixes = array();
             foreach ($lines as $l) {
                 if (!strlen($l) || $l[0] === ' ') {
                     $types[] = 'DIFF_UNCHANGED';
                 } elseif ($l[0] === '+' || $l[0] === '>') {
                     $types[] = 'DIFF_NEW';
                 } elseif ($l[0] === '!' || $l[0] === '<' || $l[0] === '-') {
                     $types[] = 'DIFF_OLD';
                 } else {
                     assert(0);
                 }
                 $prefixes[] = isset($l[0]) ? $l[0] : '';
                 $verbatim_[] = substr($l, 1);
             }
             $verbatim = implode("\n", $verbatim_);
             $escaped = false;
             $tagged;
             if ($child !== null) {
                 $c = new $child();
                 $c->init();
                 $c->string($verbatim);
                 $c->main();
                 $tagged = $c->tagged();
                 $escaped = true;
             } else {
                 $tagged = $verbatim;
             }
             $exp = explode("\n", $tagged);
             assert(count($exp) === count($prefixes));
             foreach ($exp as $i => $v) {
                 $t = $types[$i];
                 // if the sub-scanner escaped the line, we also need to escape the
                 // prefix for consistency
                 $prefix = $prefixes[$i];
                 if ($escaped) {
                     $prefix = LuminousUtils::escape_string($prefix);
                 }
                 $text = $prefix . $v;
                 $this->record($text, $t, $escaped);
                 if ($i < count($exp) - 1) {
                     $this->record("\n", null);
                 }
             }
             if ($this->eol()) {
                 $this->record($this->get(), null);
             }
             continue;
         } else {
             $this->scan('/.*/');
         }
         // previous else clause can capture empty strings
         if ($this->match() !== '') {
             $this->record($this->match(), $tok);
         }
         assert($this->eol());
         // consume newline
         if (!$this->eos()) {
             $this->record($this->get(), null);
         }
     }
 }
Exemplo n.º 9
0
 /**
  * Recursive function to collapse the token tree into XML
  * @internal
  */
 protected function collapse_token_tree($node)
 {
     $text = '';
     foreach ($node['children'] as $c) {
         if (is_string($c)) {
             $text .= LuminousUtils::escape_string($c);
         } else {
             $text .= $this->collapse_token_tree($c);
         }
     }
     $token_name = $node['token_name'];
     $token = array($node['token_name'], $text, true);
     $token_ = $this->rule_mapper_filter(array($token));
     $token = $token_[0];
     if (isset($this->filters[$token_name])) {
         foreach ($this->filters[$token_name] as $filter) {
             $token = call_user_func($filter[1], $token);
         }
     }
     list($token_name, $text, ) = $token;
     return $token_name === null ? $text : LuminousUtils::tag_block($token_name, $text);
 }
Exemplo n.º 10
0
 /**
  * @brief Tries to highlight PCRE style regular expression syntax
  */
 static function pcre($token, $delimited = true)
 {
     $token = self::string($token);
     $token = LuminousUtils::escape_token($token);
     $str =& $token[1];
     $flags = array();
     if ($delimited) {
         $str = preg_replace('/^[^[:alnum:]<>\\s]/', '<DELIMITER>$0</DELIMITER>', $str);
         if (preg_match("/[[:alpha:]]+\$/", $str, $matches)) {
             $m = $matches[0];
             $flags = str_split($m);
             $str = preg_replace("/((?<!\\A)[^[:alnum:]\\s<>])([[:alpha:]]+)\$/", "<DELIMITER>\$1</DELIMITER><KEYWORD>\$2</KEYWORD>", $str);
         } else {
             $str = preg_replace('/[^[:alnum:]<>]$/', '<DELIMITER>$0</DELIMITER>', $str);
         }
     }
     $str = preg_replace("/((?<!\\\\)[\\*\\+\\.|])|((?<![\\(\\\\])\\?)/", "<REGEX_OPERATOR>\$0</REGEX_OPERATOR>", $str);
     $str = preg_replace("/(?<=\\()\\?(?:(?:[a-zA-Z:!|=])|(?:(?:&lt;)[=!]))/", "<REGEX_SUBPATTERN>\$0</REGEX_SUBPATTERN>", $str);
     $str = preg_replace("/(?<!\\\\)[\\(\\)]/", "<REGEX_SUBPATTERN_MARKER>\$0</REGEX_SUBPATTERN_MARKER>", $str);
     $str = preg_replace("/(?<!\\\\)[\\[\\]]/", "<REGEX_CLASS_MARKER>\$0</REGEX_CLASS_MARKER>", $str);
     $str = preg_replace("/(?<!\\\\)\n      \\{\n        (\n          ((?>\\d+)(,(?>\\d+)?)?)\n          |\n          (,(?>\\d+))\n        )\n      \\}/x", "<REGEX_REPEAT_MARKER>\$0</REGEX_REPEAT_MARKER>", $str);
     // extended regex: # signifies a comment
     if (in_array('x', $flags)) {
         $str = preg_replace('/(?<!\\\\)#.*$/m', '<COMMENT>$0</COMMENT>', $str);
     }
     return $token;
 }
Exemplo n.º 11
0
 static function preprocessor_filter($token)
 {
     $token = LuminousUtils::escape_token($token);
     $token[1] = preg_replace_callback("@\n      (?P<STR>  \" (?> [^\\\\\n\"]+ | \\\\. )* (?: \"|\$) | (?<=&lt;) .*? (?=&gt;))\n      | // .*\n      | /\\* (?s:.*?) (\\*/ | \$)\n    @x", array('LuminousCppScanner', 'preprocessor_filter_cb'), $token[1]);
     return $token;
 }
Exemplo n.º 12
0
   public function main()
   {
       while (!$this->eos()) {
           if ($this->bol() && !empty($this->heredocs)) {
               $this->do_heredoc();
           }
           if ($this->interpolation) {
               $c = $this->peek();
               if ($c === '{') {
                   $this->curley_braces++;
               } elseif ($c === '}') {
                   $this->curley_braces--;
                   if ($this->curley_braces <= 0) {
                       break;
                   }
               }
           }
           if ($this->rails && $this->check('/-?%>/')) {
               break;
           }
           $c = $this->peek();
           if ($c === '=' && $this->scan('/^=begin .*? (^=end|\\z)/msx')) {
               $this->record($this->match(), 'DOCCOMMENT');
           } elseif ($c === '#' && $this->scan($this->comment_regex)) {
               $this->record($this->match(), 'COMMENT');
           } elseif ($this->scan($this->numeric) !== null) {
               $this->record($this->match(), 'NUMERIC');
           } elseif ($c === '$' && $this->scan('/\\$
 (?:
   (?:[!@`\'\\+1~=\\/\\\\,;\\._0\\*\\$\\?:"&<>])
   |
   (?: -[0adFiIlpvw])
   |
   (?:DEBUG|FILENAME|LOAD_PATH|stderr|stdin|stdout|VERBOSE)
 )/x') || $this->scan('/(\\$|@@?)\\w+/')) {
               $this->record($this->match(), 'VARIABLE');
           } elseif ($this->scan('/:\\w+/')) {
               $this->record($this->match(), 'VALUE');
           } elseif ($c === '<' && $this->scan('/(<<(-?))([\'"`]?)([A-Z_]\\w*)(\\3)/i')) {
               $m = $this->match_groups();
               $this->record($m[0], 'DELIMITER');
               $hdoc = array($m[4], $m[2] === '-', $m[3] !== "'");
               $this->heredocs[] = $hdoc;
           } elseif (($c === '"' || $c === "'" || $c === '`' || $c === '%') && $this->scan('/[\'"`]|%( [qQrswWx](?![[:alnum:]]|$) | (?![[:alnum:]\\s]|$))/xm') || $c === '/' && $this->is_regex()) {
               $interpolation = false;
               $type = 'STRING';
               $delimiter;
               $pos;
               $fancy_delim = false;
               $split = false;
               if ($c === '/') {
                   $interpolation = true;
                   $type = 'REGEX';
                   $delimiter = $c;
                   $pos = $this->pos();
                   $this->get();
               } else {
                   $pos = $this->match_pos();
                   $delimiter = $this->match();
                   if ($delimiter === '"') {
                       $interpolation = true;
                   } elseif ($delimiter === "'") {
                   } elseif ($delimiter === '`') {
                       $type = 'FUNCTION';
                   } else {
                       $delimiter = $this->get();
                       $m1 = $this->match_group(1);
                       if ($m1 === 'Q' || $m1 === 'r' || $m1 === 'W' || $m1 === 'x') {
                           $interpolation = true;
                       }
                       if ($m1 === 'w' || $m1 === 'W') {
                           $split = true;
                       }
                       if ($m1 === 'x') {
                           $type = 'FUNCTION';
                       } elseif ($m1 === 'r') {
                           $type = 'REGEX';
                       }
                       $fancy_delim = true;
                       $this->record($this->match() . $delimiter, 'DELIMITER');
                       $pos = $this->pos();
                   }
               }
               $data = array($type, $delimiter, LuminousUtils::balance_delimiter($delimiter), $pos, $interpolation, $fancy_delim, $split);
               $this->do_string($data);
           } elseif ((ctype_alpha($c) || $c === '_') && ($m = $this->scan('/[_a-zA-Z]\\w*[!?]?/')) !== null) {
               $this->record($m, ctype_upper($m[0]) ? 'CONSTANT' : 'IDENT');
               if ($m === '__END__') {
                   if (!$this->interpolation) {
                       $this->record($this->rest(), null);
                       $this->terminate();
                   }
                   break;
               }
           } elseif ($this->scan($this->operator_regex)) {
               $this->record($this->match(), 'OPERATOR');
           } elseif ($this->scan("/[ \t]+/")) {
               $this->record($this->match(), null);
           } else {
               $this->record($this->get(), null);
           }
       }
       // In case not everything was popped
       if (isset($this->state_[0])) {
           $this->record(substr($this->string(), $this->state_[0][3], $this->pos() - $this->state_[0][3]), $this->state_[0][0]);
           $this->terminate();
       }
   }