Esempio n. 1
0
 /**
  * @brief Performs a search for the given pattern past the given index.
  * @param $search the pattern to search for
  * @param $index the minimum string index (offset) of a result
  * @param $matches a reference to the return location of the match groups
  * @return the index or false if no match is found.
  */
 public function match($search, $index, &$matches)
 {
     $r = false;
     // return value
     if (isset($this->cache[$search])) {
         $a = $this->cache[$search];
         if ($a === false) {
             return false;
             // no more results
         }
         $r = $a[0];
         $matches = $a[1];
         assert($matches !== null);
         if ($r >= $index) {
             // cache is good!
             return $r;
         }
     }
     // cache not set, or out of date, we have to perform the match
     if (!($ret = preg_match($search, $this->string, $matches_, PREG_OFFSET_CAPTURE, $index))) {
         if ($ret === false && LUMINOUS_DEBUG) {
             throw new Exception('preg_match returned false for pattern: "' . $search . '", with code: ' . Utils::pcreErrorDecode(preg_last_error()) . " with string length " . strlen($this->string) . " and offset " . $index);
         }
         $this->cache[$search] = false;
         return false;
     }
     $r = $matches_[0][1];
     // strip the offsets from the match_groups
     foreach ($matches_ as $i => &$v) {
         $v = $v[0];
     }
     $this->cache[$search] = array($r, $matches_);
     $matches = $matches_;
     return $r;
 }
Esempio n. 2
0
 public static function strFilter($token)
 {
     if (strpos($token[1], '~') == false) {
         return $token;
     }
     $token = Utils::escapeToken($token);
     $token[1] = preg_replace('/~(?:\\d+|.)/', '<INTERPOLATION>$0</INTERPOLATION>', $token[1]);
     return $token;
 }
Esempio n. 3
0
 public static function commentFilter($token)
 {
     $token = Utils::escapeToken($token);
     $str =& $token[1];
     // It pays to run the strpos checks first.
     if (strpos(substr($str, 1), '"') !== false) {
         $str = preg_replace('/(?<!^)"(?>[^"]*)"/', "<STRING>\$0</STRING>", $str);
     }
     if (strpos($str, ':') !== false) {
         $str = preg_replace('/(?<=^")((?>\\W*))((?>[A-Z]\\w+(?>(?>\\s+\\w+)*)))(:\\s*)(.*)/', '$1<DOCTAG>$2</DOCTAG>$3<DOCSTR>$4</DOCSTR>', $str);
     }
     return $token;
 }
Esempio n. 4
0
 public function strFilter($TOKEN)
 {
     $TOKEN = Utils::ESCAPETOKEN($TOKEN);
     $STR =& $TOKEN[1];
     $STR = PREG_REPLACE('/:
             (?:
                 (?:[\\)o":]|&gt;)
                 |\\([a-fA-F0-9]*\\)
                 |\\[[A-Z ]*\\]
                 |\\{\\w*\\}
             )
         /x', '<VARIABLE>$0</VARIABLE>', $STR);
     return $TOKEN;
 }
Esempio n. 5
0
 public static function strFilter($token)
 {
     if ($token[1][0] !== '"' && $token[0] !== 'HEREDOC') {
         return $token;
     } elseif (strpos($token[1], '$') === false) {
         return $token;
     }
     $token = Utils::escapeToken($token);
     // matches $var, ${var} and {$var} syntax
     $token[1] = preg_replace('/
             (?: \\$\\{ | \\{\\$ ) [^}]++ \\}
             |
             \\$\\$?[a-zA-Z_]\\w*
         /x', '<VARIABLE>$0</VARIABLE>', $token[1]);
     return $token;
 }
Esempio n. 6
0
function testEscapeToken()
{
    $tokens = array(array('NAME', '<>&', false), array('NAME', 'no html entities here', false), array('NAME', '&lt;&gt;&amp;', false), array('NAME', '<>&', true), array('NAME', 'no html entities here', true), array('NAME', '&lt;&gt;&amp;', true));
    foreach ($tokens as $t) {
        $escaped = Utils::escapeToken($t);
        // name should be unchanged
        assert($t[0] === $escaped[0]);
        if ($t[2]) {
            $expected = $t[1];
            // already escaped, should be unchanged
        } else {
            $expected = Utils::escapeString($t[1]);
        }
        assert($escaped[1] === $expected);
        assert($escaped[2]);
    }
}
Esempio n. 7
0
 public function strOverride($matches)
 {
     $this->pos($this->pos() + strlen($matches[0]));
     $this->record($matches[0], 'DELIMITER');
     $f = $matches[1];
     $type = 'STRING';
     if ($f === 'm' || $f === 'qr' || $f === 's' || $f === 'tr' || $f === 'y') {
         $type = 'REGEX';
     } elseif ($f === 'qw') {
         $type = 'SPLIT_STRING';
     }
     $this->consumeString($matches[3], $type);
     if ($f === 's' || $f === 'tr' || $f === 'y') {
         // s/tr/y take two strings, e.g. s/something/somethingelse/, so we
         // have to consume the next delimiter (if it exists) and consume the
         // string, again.
         // if delims were balanced, there's a new delimiter right here, e.g.
         // s[something][somethingelse]
         $this->skipWhitespace();
         $balanced = Utils::balanceDelimiter($matches[3]) !== $matches[3];
         if ($balanced) {
             $delim2 = $this->scan('/[^a-zA-Z0-9]/');
             if ($delim2 !== null) {
                 $this->record($delim2, 'DELIMITER');
                 $this->consumeString($delim2, 'STRING');
             }
         } else {
             // if they weren't balanced then the delimiter is the same, and has
             // already been consumed as the end-delim to the first pattern
             $this->consumeString($matches[3], 'STRING');
         }
     }
     if ($type === 'REGEX' && $this->scan('/[cgimosxpe]+/')) {
         $this->record($this->match(), 'KEYWORD');
     }
 }
Esempio n. 8
0
 public function main()
 {
     // we're aiming to handle context, unified and normal diff all at once here
     // because it doesn't really seem that hard.
     $child = null;
     $lastIndex = -1;
     while (!$this->eos()) {
         $index = $this->pos();
         assert($index > $lastIndex);
         $lastIndex = $index;
         assert($this->bol());
         $tok = null;
         if ($this->scan('/diff\\s.*$/m') !== null) {
             $tok = 'KEYWORD';
         } elseif ($this->scan($this->patterns['range']) !== null) {
             // normal, context and unified ranges
             $tok = 'DIFF_RANGE';
         } elseif ($this->scan("/-{3}[ \t]*\$/m")) {
             $tok = null;
         } elseif ($this->scan('/(?:\\**|=*|\\w.*)$/m') !== null) {
             $tok = 'KEYWORD';
         } elseif ($this->scan("@[+\\-\\*]{3}(\\s+([^\\s]*)([ \t]|\$))?.*@m") !== null) {
             // this is a header line which may contain a file path. If it does,
             // update the child scanner according to its extension.
             $m = $this->matchGroups();
             // unified uses +++, context uses *
             if ($m[0][0] === '+' || $m[0][0] === '*') {
                 $tok = 'DIFF_HEADER_NEW';
             } else {
                 $tok = 'DIFF_HEADER_OLD';
             }
             if (isset($m[2])) {
                 $filename = preg_replace('@.*\\\\/@', '', $m[2]);
                 $child = self::getChildScanner($filename);
             }
         } elseif ($this->scan('/\\\\.*/') !== null) {
             $tok = null;
         } elseif ($this->scan($this->patterns['codeblock']) !== null) {
             // this is actual source code.
             // we're going to format this here.
             // we're going to extract the block, and try to re-assemble it as
             // verbatim code, then highlight it via a child scanner, then split up
             // the lines, re-apply the necessary prefixes (e.g. + or -) to them,
             // and store them as being a DIFF_ token.
             // we have to do it like this, rather than line by line, otherwise
             // multiline tokens aren't going to work properly. There's stilla  risk
             // that the diff will be fragmented such the child scanner gets it
             // wrong but that can't be helped.
             // TODO restructure this so the complicated bits aren't done if there's
             // no child scanner to pass it down to
             $block = $this->match();
             if (!strlen($block)) {
                 assert(0);
             }
             $lines = explode("\n", $block);
             $verbatim = array();
             $verbatim_ = '';
             $types = array();
             $prefixes = array();
             foreach ($lines as $l) {
                 if (!strlen($l) || $l[0] === ' ') {
                     $types[] = 'DIFF_UNCHANGED';
                 } elseif ($l[0] === '+' || $l[0] === '>') {
                     $types[] = 'DIFF_NEW';
                 } elseif ($l[0] === '!' || $l[0] === '<' || $l[0] === '-') {
                     $types[] = 'DIFF_OLD';
                 } else {
                     assert(0);
                 }
                 $prefixes[] = isset($l[0]) ? $l[0] : '';
                 $verbatim_[] = substr($l, 1);
             }
             $verbatim = implode("\n", $verbatim_);
             $escaped = false;
             $tagged;
             if ($child !== null) {
                 $c = new $child();
                 $c->init();
                 $c->string($verbatim);
                 $c->main();
                 $tagged = $c->tagged();
                 $escaped = true;
             } else {
                 $tagged = $verbatim;
             }
             $exp = explode("\n", $tagged);
             assert(count($exp) === count($prefixes));
             foreach ($exp as $i => $v) {
                 $t = $types[$i];
                 // if the sub-scanner escaped the line, we also need to escape the
                 // prefix for consistency
                 $prefix = $prefixes[$i];
                 if ($escaped) {
                     $prefix = Utils::escapeString($prefix);
                 }
                 $text = $prefix . $v;
                 $this->record($text, $t, $escaped);
                 if ($i < count($exp) - 1) {
                     $this->record("\n", null);
                 }
             }
             if ($this->eol()) {
                 $this->record($this->get(), null);
             }
             continue;
         } else {
             $this->scan('/.*/');
         }
         // previous else clause can capture empty strings
         if ($this->match() !== '') {
             $this->record($this->match(), $tok);
         }
         assert($this->eol());
         // consume newline
         if (!$this->eos()) {
             $this->record($this->get(), null);
         }
     }
 }
Esempio n. 9
0
 /**
  * @brief Returns the XML representation of the token stream
  *
  * This function triggers the generation of the XML output.
  * @return An XML-string which represents the tokens recorded by the scanner.
  */
 public function tagged()
 {
     $out = '';
     // call stream filters.
     foreach ($this->streamFilters as $f) {
         $this->tokens = call_user_func($f[1], $this->tokens);
     }
     foreach ($this->tokens as $t) {
         $type = $t[0];
         // speed is roughly 10% faster if we process the filters inside this
         // loop instead of separately.
         if (isset($this->filters[$type])) {
             foreach ($this->filters[$type] as $filter) {
                 $t = call_user_func($filter[1], $t);
             }
         }
         list($type, $string, $esc) = $t;
         if (!$esc) {
             $string = Utils::escapeString($string);
         }
         if ($type !== null) {
             $out .= Utils::tagBlock($type, $string);
         } else {
             $out .= $string;
         }
     }
     return $out;
 }
Esempio n. 10
0
 public function main()
 {
     while (!$this->eos()) {
         if ($this->bol() && !empty($this->heredocs)) {
             $this->doHeredoc();
         }
         if ($this->interpolation) {
             $c = $this->peek();
             if ($c === '{') {
                 $this->curleyBraces++;
             } elseif ($c === '}') {
                 $this->curleyBraces--;
                 if ($this->curleyBraces <= 0) {
                     break;
                 }
             }
         }
         if ($this->rails && $this->check('/-?%>/')) {
             break;
         }
         $c = $this->peek();
         $variableRegex = '/\\$
                 (?:
                     (?:[!@`\'\\+1~=\\/\\\\,;\\._0\\*\\$\\?:"&<>])
                     |
                     (?: -[0adFiIlpvw])
                     |
                     (?:DEBUG|FILENAME|LOAD_PATH|stderr|stdin|stdout|VERBOSE)
                 )
             /x';
         $stringRegex = '/[\'"`]|%( [qQrswWx](?![[:alnum:]]|$) | (?![[:alnum:]\\s]|$))/xm';
         if ($c === '=' && $this->scan('/^=begin .*? (^=end|\\z)/msx')) {
             $this->record($this->match(), 'DOCCOMMENT');
         } elseif ($c === '#' && $this->scan($this->commentRegex)) {
             $this->record($this->match(), 'COMMENT');
         } elseif ($this->scan($this->numeric) !== null) {
             $this->record($this->match(), 'NUMERIC');
         } elseif ($c === '$' && $this->scan($variableRegex) || $this->scan('/(\\$|@@?)\\w+/')) {
             $this->record($this->match(), 'VARIABLE');
         } elseif ($this->scan('/:\\w+/')) {
             $this->record($this->match(), 'VALUE');
         } elseif ($c === '<' && $this->scan('/(<<(-?))([\'"`]?)([A-Z_]\\w*)(\\3)/i')) {
             $m = $this->matchGroups();
             $this->record($m[0], 'DELIMITER');
             $hdoc = array($m[4], $m[2] === '-', $m[3] !== "'");
             $this->heredocs[] = $hdoc;
         } elseif (strspn($c, '"\'`%') === 1 && $this->scan($stringRegex) || $c === '/' && $this->isRegex()) {
             // TODO: "% hello " is I think a valid string, using whitespace as
             // delimiters. We're going to disallow this for now because
             // we're not disambiguating between that and modulus
             $interpolation = false;
             $type = 'STRING';
             $delimiter;
             $pos;
             $fancyDelim = false;
             $split = false;
             if ($c === '/') {
                 $interpolation = true;
                 $type = 'REGEX';
                 $delimiter = $c;
                 $pos = $this->pos();
                 $this->get();
             } else {
                 $pos = $this->matchPos();
                 $delimiter = $this->match();
                 if ($delimiter === '"') {
                     $interpolation = true;
                 } elseif ($delimiter === "'") {
                 } elseif ($delimiter === '`') {
                     $type = 'FUNCTION';
                 } else {
                     $delimiter = $this->get();
                     $m1 = $this->matchGroup(1);
                     if ($m1 === 'Q' || $m1 === 'r' || $m1 === 'W' || $m1 === 'x') {
                         $interpolation = true;
                     }
                     if ($m1 === 'w' || $m1 === 'W') {
                         $split = true;
                     }
                     if ($m1 === 'x') {
                         $type = 'FUNCTION';
                     } elseif ($m1 === 'r') {
                         $type = 'REGEX';
                     }
                     $fancyDelim = true;
                     $this->record($this->match() . $delimiter, 'DELIMITER');
                     $pos = $this->pos();
                 }
             }
             $data = array($type, $delimiter, Utils::balanceDelimiter($delimiter), $pos, $interpolation, $fancyDelim, $split);
             $this->doString($data);
         } elseif ((ctype_alpha($c) || $c === '_') && ($m = $this->scan('/[_a-zA-Z]\\w*[!?]?/')) !== null) {
             $this->record($m, ctype_upper($m[0]) ? 'CONSTANT' : 'IDENT');
             if ($m === '__END__') {
                 if (!$this->interpolation) {
                     $this->record($this->rest(), null);
                     $this->terminate();
                 }
                 break;
             }
         } elseif ($this->scan($this->operatorRegex)) {
             $this->record($this->match(), 'OPERATOR');
         } elseif ($this->scan("/[ \t]+/")) {
             $this->record($this->match(), null);
         } else {
             $this->record($this->get(), null);
         }
     }
     // In case not everything was popped
     if (isset($this->state[0])) {
         $this->record(substr($this->string(), $this->state[0][3], $this->pos() - $this->state[0][3]), $this->state[0][0]);
         $this->terminate();
     }
 }
Esempio n. 11
0
 public static function preprocessorFilter($token)
 {
     $token = Utils::escapeToken($token);
     $token[1] = preg_replace_callback("@\n                (?:\" (?> [^\\\\\n\"]+ | \\\\. )* (?: \"|\$) | (?: &lt; (.*?) &gt;))\n                | // .*\n                | /\\* (?s:.*?) (\\*/ | \$)\n            @x", array('Luminous\\Scanners\\CppScanner', 'preprocessorFilterCb'), $token[1]);
     return $token;
 }
Esempio n. 12
0
 public static function stringFilter($token)
 {
     $token = Utils::escapeToken($token);
     $token[1] = preg_replace("/\\\$(?:\\w+|\\{[^}\n]+\\})/", '<VARIABLE>$0</VARIABLE>', $token[1]);
     return $token;
 }
Esempio n. 13
0
 /**
  * Recursive function to collapse the token tree into XML
  * @internal
  */
 protected function collapseTokenTree($node)
 {
     $text = '';
     foreach ($node['children'] as $c) {
         if (is_string($c)) {
             $text .= Utils::escapeString($c);
         } else {
             $text .= $this->collapseTokenTree($c);
         }
     }
     $tokenName = $node['token_name'];
     $token = array($node['token_name'], $text, true);
     $token_ = $this->ruleMapperFilter(array($token));
     $token = $token_[0];
     if (isset($this->filters[$tokenName])) {
         foreach ($this->filters[$tokenName] as $filter) {
             $token = call_user_func($filter[1], $token);
         }
     }
     list($tokenName, $text, ) = $token;
     return $tokenName === null ? $text : Utils::tagBlock($tokenName, $text);
 }