Example #1
0
 /**
  *  Class Constructor
  *
  *  @access public
  *  @param string $label
  *  @param Node $parent
  */
 public function __construct($label = 'label')
 {
     parent::__construct($label);
     $this->literals = new ArrayCollection();
 }
 /**
  *  Fill a range given starting and ending character
  *
  *  @return void
  *  @access public
  */
 public function fillRange(Scope $head, $start, $end)
 {
     $start_index = Utf8::ord($start);
     $ending_index = Utf8::ord($end);
     if ($ending_index < $start_index) {
         throw new ParserException(sprintf('Character class range %s - %s is out of order', $start, $end));
     }
     for ($i = $start_index; $i <= $ending_index; $i++) {
         $head->setLiteral($i, Utf8::chr($i));
     }
 }
Example #3
0
 /**
  *  Will parse the regex into generator
  *
  *  @access public
  *  @return 
  */
 public function parse($sub = false)
 {
     try {
         while ($this->lexer->moveNext()) {
             $result = null;
             $scope = null;
             $parser = null;
             switch (true) {
                 case $this->lexer->isNextToken(Lexer::T_GROUP_OPEN):
                     # is the group character the first token? is the regex wrapped in brackets.
                     //if($this->lexer->token === null) {
                     //  continue;
                     //}
                     # note this is a new group create new parser instance.
                     $parser = new Parser($this->lexer, new Scope(), new Scope());
                     $this->left = $parser->parse(true)->getResult();
                     $this->head->attach($this->left);
                     break;
                 case $this->lexer->isNextToken(Lexer::T_GROUP_CLOSE):
                     # group is finished don't want to contine this loop break = 2
                     break 2;
                     break;
                 case $this->lexer->isNextTokenAny(array(Lexer::T_LITERAL_CHAR, Lexer::T_LITERAL_NUMERIC)):
                     # test for literal characters (abcd)
                     $this->left = new LiteralScope();
                     $this->left->addLiteral($this->lexer->lookahead['value']);
                     $this->head->attach($this->left);
                     break;
                 case $this->lexer->isNextToken(Lexer::T_SET_OPEN):
                     # character classes [a-z]
                     $this->left = new LiteralScope();
                     self::createSubParser('character')->parse($this->left, $this->head, $this->lexer);
                     $this->head->attach($this->left);
                     break;
                 case $this->lexer->isNextTokenAny(array(Lexer::T_DOT, Lexer::T_SHORT_D, Lexer::T_SHORT_NOT_D, Lexer::T_SHORT_W, Lexer::T_SHORT_NOT_W, Lexer::T_SHORT_S, Lexer::T_SHORT_NOT_S)):
                     # match short (. \d \D \w \W \s \S)
                     $this->left = new LiteralScope();
                     self::createSubParser('short')->parse($this->left, $this->head, $this->lexer);
                     $this->head->attach($this->left);
                     break;
                 case $this->lexer->isNextTokenAny(array(Lexer::T_SHORT_P, Lexer::T_SHORT_UNICODE_X, Lexer::T_SHORT_X)):
                     # match short (\p{L} \x \X  )
                     $this->left = new LiteralScope();
                     self::createSubParser('unicode')->parse($this->left, $this->head, $this->lexer);
                     $this->head->attach($this->left);
                     break;
                 case $this->lexer->isNextTokenAny(array(Lexer::T_QUANTIFIER_OPEN, Lexer::T_QUANTIFIER_PLUS, Lexer::T_QUANTIFIER_QUESTION, Lexer::T_QUANTIFIER_STAR, Lexer::T_QUANTIFIER_OPEN)):
                     # match quantifiers
                     self::createSubParser('quantifer')->parse($this->left, $this->head, $this->lexer);
                     break;
                 case $this->lexer->isNextToken(Lexer::T_CHOICE_BAR):
                     # match alternations
                     $this->left = $this->head;
                     $this->head = new Scope();
                     $this->result->useAlternatingStrategy();
                     $this->result->attach($this->head);
                     break;
                 default:
                     # ignore character
             }
         }
     } catch (ParserException $e) {
         $pos = $this->lexer->lookahead['position'];
         $compressed = $this->compress();
         throw new ParserException(sprintf('Error found STARTING at position %s after `%s` with msg %s ', $pos, $compressed, $e->getMessage()));
     }
     return $this;
 }
Example #4
0
 public function testGenerateWithAlternatingStrategy()
 {
     $scope = new Scope('scope1');
     $gen = new MersenneRandom(700);
     $result = '';
     $scope->setMinOccurances(7);
     $scope->setMaxOccurances(7);
     for ($i = 1; $i <= 6; $i++) {
         $lit = new LiteralScope('label_' . $i);
         $lit->addLiteral($i);
         $scope->attach($lit);
         $lit = null;
     }
     $scope->useAlternatingStrategy();
     $scope->generate($result, $gen);
     $this->assertRegExp('/[1-6]{7}/', $result);
 }
Example #5
0
 public function convertWhiteSpaceToRange(Scope $head, Scope $result, Lexer $lexer)
 {
     if ($lexer->isNextToken(Lexer::T_SHORT_S)) {
         # spaces, tabs, and line breaks
         #0009 #0010 #0012 #0013 #0032
         $head->addLiteral(chr(9));
         $head->addLiteral(chr(10));
         $head->addLiteral(chr(12));
         $head->addLiteral(chr(13));
         $head->addLiteral(chr(32));
     } else {
         # not spaces, tabs, and line breaks
         #0000-0008  #0011  #0014 - #0031
         for ($i = 0; $i <= 8; $i++) {
             $head->addLiteral(chr($i));
         }
         $head->addLiteral(chr(11));
         for ($i = 14; $i <= 31; $i++) {
             $head->addLiteral(chr($i));
         }
     }
 }
 /**
  *  Parse the current token for closers : {###} { ## } {##,##}
  *
  *  @access public
  *  @return ReverseRegex\Generator\Scope a new head
  *  @param ReverseRegex\Generator\Scope $head
  *  @param ReverseRegex\Generator\Scope $result
  *  @param ReverseRegex\Lexer $lexer
  */
 public function quantifyClosure(Scope $head, Scope $result, Lexer $lexer)
 {
     $tokens = array();
     $min = $head->getMinOccurances();
     $max = $head->getMaxOccurances();
     # move to the first token inside the quantifer.
     # parse for the minimum , move lookahead until read end of the closure or the `,`
     while ($lexer->moveNext() === true && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE) && $lexer->lookahead['value'] !== ',') {
         if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) {
             throw new ParserException('Nesting Quantifiers is not allowed');
         }
         $tokens[] = $lexer->lookahead;
     }
     $min = $this->convertInteger($tokens);
     # do we have a maximum after the comma?
     if ($lexer->lookahead['value'] === ',') {
         # make sure we have values to gather ie not {778,}
         $tokens = array();
         # move to the first token after the `,` character
         # grab the remaining numbers
         while ($lexer->moveNext() && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) {
             if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) {
                 throw new ParserException('Nesting Quantifiers is not allowed');
             }
             $tokens[] = $lexer->lookahead;
         }
         $max = $this->convertInteger($tokens);
     } else {
         $max = $min;
     }
     $head->setMaxOccurances($max);
     $head->setMinOccurances($min);
     # skip the lexer to the closing token
     $lexer->skipUntil(Lexer::T_QUANTIFIER_CLOSE);
     # check if the last matched token was the closing bracket
     # not going to stop errors like {#####,###{[a-z]} {#####{[a-z]}
     if (!$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) {
         throw new ParserException('Closing quantifier token `}` not found');
     }
     return $head;
 }
Example #7
0
 /**
  *  Parse the current token for new Quantifiers
  *
  *  @access public
  *  @return ReverseRegex\Generator\Scope a new head
  *  @param ReverseRegex\Generator\LiteralScope $head
  *  @param ReverseRegex\Generator\Scope $set
  *  @param ReverseRegex\Lexer $lexer
  */
 public function parse(Scope $head, Scope $set, Lexer $lexer)
 {
     $character = $this->evaluate($lexer);
     $head->addLiteral($character);
     return $head;
 }