Example #1
0
function testBalance()
{
    // balanced delimiters:
    // most character when used as a dynamic delimiter (perl, ruby) will map to
    // themselves as the end delimiter. But brackets pair up.
    $balanced = array('{' => '}', '[' => ']', '(' => ')', '<' => '>');
    for ($i = 32; $i <= 126; $i++) {
        $chr = chr($i);
        $expected = isset($balanced[$chr]) ? $balanced[$chr] : $chr;
        if (($out = Utils::balanceDelimiter($chr)) !== $expected) {
            echo "balanceDelimiter({$chr}) = {$out} (expected {$expected})\n";
            assert(0);
        }
    }
}
Example #2
0
 public function strOverride($matches)
 {
     $this->pos($this->pos() + strlen($matches[0]));
     $this->record($matches[0], 'DELIMITER');
     $f = $matches[1];
     $type = 'STRING';
     if ($f === 'm' || $f === 'qr' || $f === 's' || $f === 'tr' || $f === 'y') {
         $type = 'REGEX';
     } elseif ($f === 'qw') {
         $type = 'SPLIT_STRING';
     }
     $this->consumeString($matches[3], $type);
     if ($f === 's' || $f === 'tr' || $f === 'y') {
         // s/tr/y take two strings, e.g. s/something/somethingelse/, so we
         // have to consume the next delimiter (if it exists) and consume the
         // string, again.
         // if delims were balanced, there's a new delimiter right here, e.g.
         // s[something][somethingelse]
         $this->skipWhitespace();
         $balanced = Utils::balanceDelimiter($matches[3]) !== $matches[3];
         if ($balanced) {
             $delim2 = $this->scan('/[^a-zA-Z0-9]/');
             if ($delim2 !== null) {
                 $this->record($delim2, 'DELIMITER');
                 $this->consumeString($delim2, 'STRING');
             }
         } else {
             // if they weren't balanced then the delimiter is the same, and has
             // already been consumed as the end-delim to the first pattern
             $this->consumeString($matches[3], 'STRING');
         }
     }
     if ($type === 'REGEX' && $this->scan('/[cgimosxpe]+/')) {
         $this->record($this->match(), 'KEYWORD');
     }
 }
Example #3
0
 public function main()
 {
     while (!$this->eos()) {
         if ($this->bol() && !empty($this->heredocs)) {
             $this->doHeredoc();
         }
         if ($this->interpolation) {
             $c = $this->peek();
             if ($c === '{') {
                 $this->curleyBraces++;
             } elseif ($c === '}') {
                 $this->curleyBraces--;
                 if ($this->curleyBraces <= 0) {
                     break;
                 }
             }
         }
         if ($this->rails && $this->check('/-?%>/')) {
             break;
         }
         $c = $this->peek();
         $variableRegex = '/\\$
                 (?:
                     (?:[!@`\'\\+1~=\\/\\\\,;\\._0\\*\\$\\?:"&<>])
                     |
                     (?: -[0adFiIlpvw])
                     |
                     (?:DEBUG|FILENAME|LOAD_PATH|stderr|stdin|stdout|VERBOSE)
                 )
             /x';
         $stringRegex = '/[\'"`]|%( [qQrswWx](?![[:alnum:]]|$) | (?![[:alnum:]\\s]|$))/xm';
         if ($c === '=' && $this->scan('/^=begin .*? (^=end|\\z)/msx')) {
             $this->record($this->match(), 'DOCCOMMENT');
         } elseif ($c === '#' && $this->scan($this->commentRegex)) {
             $this->record($this->match(), 'COMMENT');
         } elseif ($this->scan($this->numeric) !== null) {
             $this->record($this->match(), 'NUMERIC');
         } elseif ($c === '$' && $this->scan($variableRegex) || $this->scan('/(\\$|@@?)\\w+/')) {
             $this->record($this->match(), 'VARIABLE');
         } elseif ($this->scan('/:\\w+/')) {
             $this->record($this->match(), 'VALUE');
         } elseif ($c === '<' && $this->scan('/(<<(-?))([\'"`]?)([A-Z_]\\w*)(\\3)/i')) {
             $m = $this->matchGroups();
             $this->record($m[0], 'DELIMITER');
             $hdoc = array($m[4], $m[2] === '-', $m[3] !== "'");
             $this->heredocs[] = $hdoc;
         } elseif (strspn($c, '"\'`%') === 1 && $this->scan($stringRegex) || $c === '/' && $this->isRegex()) {
             // TODO: "% hello " is I think a valid string, using whitespace as
             // delimiters. We're going to disallow this for now because
             // we're not disambiguating between that and modulus
             $interpolation = false;
             $type = 'STRING';
             $delimiter;
             $pos;
             $fancyDelim = false;
             $split = false;
             if ($c === '/') {
                 $interpolation = true;
                 $type = 'REGEX';
                 $delimiter = $c;
                 $pos = $this->pos();
                 $this->get();
             } else {
                 $pos = $this->matchPos();
                 $delimiter = $this->match();
                 if ($delimiter === '"') {
                     $interpolation = true;
                 } elseif ($delimiter === "'") {
                 } elseif ($delimiter === '`') {
                     $type = 'FUNCTION';
                 } else {
                     $delimiter = $this->get();
                     $m1 = $this->matchGroup(1);
                     if ($m1 === 'Q' || $m1 === 'r' || $m1 === 'W' || $m1 === 'x') {
                         $interpolation = true;
                     }
                     if ($m1 === 'w' || $m1 === 'W') {
                         $split = true;
                     }
                     if ($m1 === 'x') {
                         $type = 'FUNCTION';
                     } elseif ($m1 === 'r') {
                         $type = 'REGEX';
                     }
                     $fancyDelim = true;
                     $this->record($this->match() . $delimiter, 'DELIMITER');
                     $pos = $this->pos();
                 }
             }
             $data = array($type, $delimiter, Utils::balanceDelimiter($delimiter), $pos, $interpolation, $fancyDelim, $split);
             $this->doString($data);
         } elseif ((ctype_alpha($c) || $c === '_') && ($m = $this->scan('/[_a-zA-Z]\\w*[!?]?/')) !== null) {
             $this->record($m, ctype_upper($m[0]) ? 'CONSTANT' : 'IDENT');
             if ($m === '__END__') {
                 if (!$this->interpolation) {
                     $this->record($this->rest(), null);
                     $this->terminate();
                 }
                 break;
             }
         } elseif ($this->scan($this->operatorRegex)) {
             $this->record($this->match(), 'OPERATOR');
         } elseif ($this->scan("/[ \t]+/")) {
             $this->record($this->match(), null);
         } else {
             $this->record($this->get(), null);
         }
     }
     // In case not everything was popped
     if (isset($this->state[0])) {
         $this->record(substr($this->string(), $this->state[0][3], $this->pos() - $this->state[0][3]), $this->state[0][0]);
         $this->terminate();
     }
 }