Exemple #1
0
function getKeysFromRegexp($regexp)
{
    $lexer = new Lexer($regexp);
    $lexer->moveNext();
    $keys = readKeysFromLexer($lexer);
    return array_unique($keys);
}
Exemple #2
0
 /**
  *  Compress the lexer into value string until current lookahead
  *
  *  @access public
  *  @return string the compressed value string
  */
 public function compress()
 {
     $current = $this->lexer->lookahead['position'];
     $this->lexer->reset();
     $string = '';
     while ($this->lexer->moveNext() && $this->lexer->lookahead['position'] <= $current) {
         $string .= $this->lexer->lookahead['value'];
     }
     return $string;
 }
 /**
  *  Parse a reference 
  */
 public function evaluate(Lexer $lexer)
 {
     switch (true) {
         case $lexer->isNextToken(Lexer::T_SHORT_P):
             throw new ParserException('Property \\p (Unicode Property) not supported use \\x to specify unicode character or range');
             break;
         case $lexer->isNextToken(Lexer::T_SHORT_UNICODE_X):
             $lexer->moveNext();
             if ($lexer->lookahead['value'] !== '{') {
                 throw new ParserException('Expecting character { after \\X none found');
             }
             $tokens = array();
             while ($lexer->moveNext() && $lexer->lookahead['value'] !== '}') {
                 # check if we nested eg.{ddd{d}
                 if ($lexer->lookahead['value'] === '{') {
                     throw new ParserException('Nesting hex value ranges is not allowed');
                 }
                 if ($lexer->lookahead['value'] !== " " && ctype_xdigit($lexer->lookahead['value']) === false) {
                     throw new ParserException(sprintf('Character %s is not a hexdeciaml digit', $lexer->lookahead['value']));
                 }
                 $tokens[] = $lexer->lookahead['value'];
             }
             # check that current lookahead is a closing character as it's possible to iterate to end of string (i.e. lookahead === null)
             if ($lexer->lookahead['value'] !== '}') {
                 throw new ParserException('Closing quantifier token `}` not found');
             }
             if (count($tokens) === 0) {
                 throw new ParserException('No hex number found inside the range');
             }
             $number = trim(implode('', $tokens));
             return Utf8::chr(hexdec($number));
             break;
         case $lexer->isNextToken(Lexer::T_SHORT_X):
             // only allow another 2 hex characters
             $glimpse = $lexer->glimpse();
             if ($glimpse['value'] === '{') {
                 throw new ParserException('Braces not supported here');
             }
             $tokens = array();
             $count = 2;
             while ($count > 0 && $lexer->moveNext()) {
                 $tokens[] = $lexer->lookahead['value'];
                 --$count;
             }
             $value = trim(implode('', $tokens));
             return Utf8::chr(hexdec($value));
             break;
         default:
             throw new ParserException('No Unicode expression to evaluate');
     }
 }
 /**
  *  Parse the current token for new Quantifiers
  *
  *  @access public
  *  @return ReverseRegex\Generator\Scope a new head
  *  @param ReverseRegex\Generator\Scope $head
  *  @param ReverseRegex\Generator\Scope $set
  *  @param ReverseRegex\Lexer $lexer
  */
 public function parse(Scope $head, Scope $set, Lexer $lexer)
 {
     if ($lexer->lookahead['type'] !== Lexer::T_SET_OPEN) {
         throw new ParserException('Opening character set token not found');
     }
     $peek = $lexer->glimpse();
     if ($peek['type'] === Lexer::T_SET_NEGATED) {
         throw new ParserException('Negated Character Set ranges not supported at this time');
     }
     $normal_lexer = new Lexer($this->normalize($head, $set, $lexer));
     while ($normal_lexer->moveNext() && !$normal_lexer->isNextToken(Lexer::T_SET_CLOSE)) {
         $glimpse = $normal_lexer->glimpse();
         if ($glimpse['type'] === Lexer::T_SET_RANGE) {
             continue;
             //value be included in range when `-` character is passed
         }
         switch (true) {
             case $normal_lexer->isNextToken(Lexer::T_SET_RANGE):
                 $range_start = $normal_lexer->token['value'];
                 $normal_lexer->moveNext();
                 if ($normal_lexer->isNextToken(Lexer::T_ESCAPE_CHAR)) {
                     $normal_lexer->moveNext();
                 }
                 $range_end = $normal_lexer->lookahead['value'];
                 $this->fillRange($head, $range_start, $range_end);
                 break;
             case $normal_lexer->isNextToken(Lexer::T_LITERAL_NUMERIC) || $normal_lexer->isNextToken(Lexer::T_LITERAL_CHAR):
                 $index = (int) Utf8::ord($normal_lexer->lookahead['value']);
                 $head->setLiteral($index, $normal_lexer->lookahead['value']);
                 break;
             default:
                 # ignore
         }
     }
     $head->getLiterals()->sort();
     return $head;
 }
Exemple #5
0
 public function testDotRange()
 {
     $lexer = new Lexer('.');
     $scope = new Scope();
     $parser = new Short();
     $head = new LiteralScope('lit1', $scope);
     $lexer->moveNext();
     $lexer->moveNext();
     $parser->parse($head, $scope, $lexer);
     $result = $head->getLiterals();
     foreach ($result as $value) {
         $this->assertRegExp('/./', $value);
     }
 }
Exemple #6
0
 public function convertWhiteSpaceToRange(Scope $head, Scope $result, Lexer $lexer)
 {
     if ($lexer->isNextToken(Lexer::T_SHORT_S)) {
         # spaces, tabs, and line breaks
         #0009 #0010 #0012 #0013 #0032
         $head->addLiteral(chr(9));
         $head->addLiteral(chr(10));
         $head->addLiteral(chr(12));
         $head->addLiteral(chr(13));
         $head->addLiteral(chr(32));
     } else {
         # not spaces, tabs, and line breaks
         #0000-0008  #0011  #0014 - #0031
         for ($i = 0; $i <= 8; $i++) {
             $head->addLiteral(chr($i));
         }
         $head->addLiteral(chr(11));
         for ($i = 14; $i <= 31; $i++) {
             $head->addLiteral(chr($i));
         }
     }
 }
 /**
  *  Parse the current token for closers : {###} { ## } {##,##}
  *
  *  @access public
  *  @return ReverseRegex\Generator\Scope a new head
  *  @param ReverseRegex\Generator\Scope $head
  *  @param ReverseRegex\Generator\Scope $result
  *  @param ReverseRegex\Lexer $lexer
  */
 public function quantifyClosure(Scope $head, Scope $result, Lexer $lexer)
 {
     $tokens = array();
     $min = $head->getMinOccurances();
     $max = $head->getMaxOccurances();
     # move to the first token inside the quantifer.
     # parse for the minimum , move lookahead until read end of the closure or the `,`
     while ($lexer->moveNext() === true && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE) && $lexer->lookahead['value'] !== ',') {
         if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) {
             throw new ParserException('Nesting Quantifiers is not allowed');
         }
         $tokens[] = $lexer->lookahead;
     }
     $min = $this->convertInteger($tokens);
     # do we have a maximum after the comma?
     if ($lexer->lookahead['value'] === ',') {
         # make sure we have values to gather ie not {778,}
         $tokens = array();
         # move to the first token after the `,` character
         # grab the remaining numbers
         while ($lexer->moveNext() && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) {
             if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) {
                 throw new ParserException('Nesting Quantifiers is not allowed');
             }
             $tokens[] = $lexer->lookahead;
         }
         $max = $this->convertInteger($tokens);
     } else {
         $max = $min;
     }
     $head->setMaxOccurances($max);
     $head->setMinOccurances($min);
     # skip the lexer to the closing token
     $lexer->skipUntil(Lexer::T_QUANTIFIER_CLOSE);
     # check if the last matched token was the closing bracket
     # not going to stop errors like {#####,###{[a-z]} {#####{[a-z]}
     if (!$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) {
         throw new ParserException('Closing quantifier token `}` not found');
     }
     return $head;
 }
 public function testShortX()
 {
     $lexer = new Lexer('\\x64');
     $scope = new Scope();
     $parser = new Unicode();
     $head = new LiteralScope('lit1', $scope);
     $lexer->moveNext();
     $lexer->moveNext();
     $parser->parse($head, $scope, $lexer);
     $result = $head->getLiterals();
     $this->assertEquals('d', $result[0]);
 }
 /**
  *  @expectedException \ReverseRegex\Exception
  *  @expectedExceptionMessage Braces not supported here 
  */
 public function testParseHexShortBraceError()
 {
     $lexer = new Lexer('[\\x{61}-\\x6B-\\x6E]');
     $scope = new Scope();
     $head = new LiteralScope();
     $parser = new CharacterClass();
     $lexer->moveNext();
     $parser->parse($head, $scope, $lexer);
 }
Exemple #10
0
 /**
  *  @expectedException \ReverseRegex\Exception
  *  @expectedExceptionMessage Closing group char "(" has no matching opening character
  */
 public function testGroupNestingErrorClosedNotOpened()
 {
     $lexer = new Lexer('())');
     $lexer->moveNext();
     $this->assertEquals('(', $lexer->lookahead['value']);
     $this->assertEquals(Lexer::T_GROUP_OPEN, $lexer->lookahead['type']);
     $lexer->moveNext();
     $this->assertEquals(')', $lexer->lookahead['value']);
     $this->assertEquals(Lexer::T_GROUP_CLOSE, $lexer->lookahead['type']);
     $lexer->moveNext();
     $this->assertEquals(')', $lexer->lookahead['value']);
     $this->assertEquals(Lexer::T_GROUP_CLOSE, $lexer->lookahead['type']);
 }
 public function testQuestionQuantifier()
 {
     $pattern = 'az?';
     $lexer = new Lexer($pattern);
     $scope = new Scope();
     $qual = new Quantifier();
     $lexer->moveNext();
     $lexer->moveNext();
     $lexer->moveNext();
     $qual->parse($scope, $scope, $lexer);
     $this->assertEquals(0, $scope->getMinOccurances());
     $this->assertEquals(1, $scope->getMaxOccurances());
 }