/** * Class Constructor * * @access public * @param string $label * @param Node $parent */ public function __construct($label = 'label') { parent::__construct($label); $this->literals = new ArrayCollection(); }
/** * Fill a range given starting and ending character * * @return void * @access public */ public function fillRange(Scope $head, $start, $end) { $start_index = Utf8::ord($start); $ending_index = Utf8::ord($end); if ($ending_index < $start_index) { throw new ParserException(sprintf('Character class range %s - %s is out of order', $start, $end)); } for ($i = $start_index; $i <= $ending_index; $i++) { $head->setLiteral($i, Utf8::chr($i)); } }
/** * Will parse the regex into generator * * @access public * @return */ public function parse($sub = false) { try { while ($this->lexer->moveNext()) { $result = null; $scope = null; $parser = null; switch (true) { case $this->lexer->isNextToken(Lexer::T_GROUP_OPEN): # is the group character the first token? is the regex wrapped in brackets. //if($this->lexer->token === null) { // continue; //} # note this is a new group create new parser instance. $parser = new Parser($this->lexer, new Scope(), new Scope()); $this->left = $parser->parse(true)->getResult(); $this->head->attach($this->left); break; case $this->lexer->isNextToken(Lexer::T_GROUP_CLOSE): # group is finished don't want to contine this loop break = 2 break 2; break; case $this->lexer->isNextTokenAny(array(Lexer::T_LITERAL_CHAR, Lexer::T_LITERAL_NUMERIC)): # test for literal characters (abcd) $this->left = new LiteralScope(); $this->left->addLiteral($this->lexer->lookahead['value']); $this->head->attach($this->left); break; case $this->lexer->isNextToken(Lexer::T_SET_OPEN): # character classes [a-z] $this->left = new LiteralScope(); self::createSubParser('character')->parse($this->left, $this->head, $this->lexer); $this->head->attach($this->left); break; case $this->lexer->isNextTokenAny(array(Lexer::T_DOT, Lexer::T_SHORT_D, Lexer::T_SHORT_NOT_D, Lexer::T_SHORT_W, Lexer::T_SHORT_NOT_W, Lexer::T_SHORT_S, Lexer::T_SHORT_NOT_S)): # match short (. \d \D \w \W \s \S) $this->left = new LiteralScope(); self::createSubParser('short')->parse($this->left, $this->head, $this->lexer); $this->head->attach($this->left); break; case $this->lexer->isNextTokenAny(array(Lexer::T_SHORT_P, Lexer::T_SHORT_UNICODE_X, Lexer::T_SHORT_X)): # match short (\p{L} \x \X ) $this->left = new LiteralScope(); self::createSubParser('unicode')->parse($this->left, $this->head, $this->lexer); $this->head->attach($this->left); break; case $this->lexer->isNextTokenAny(array(Lexer::T_QUANTIFIER_OPEN, Lexer::T_QUANTIFIER_PLUS, Lexer::T_QUANTIFIER_QUESTION, Lexer::T_QUANTIFIER_STAR, Lexer::T_QUANTIFIER_OPEN)): # match quantifiers self::createSubParser('quantifer')->parse($this->left, $this->head, $this->lexer); break; case $this->lexer->isNextToken(Lexer::T_CHOICE_BAR): # match alternations $this->left = $this->head; $this->head = new Scope(); $this->result->useAlternatingStrategy(); $this->result->attach($this->head); break; default: # ignore character } } } catch (ParserException $e) { $pos = $this->lexer->lookahead['position']; $compressed = $this->compress(); throw new ParserException(sprintf('Error found STARTING at position %s after `%s` with msg %s ', $pos, $compressed, $e->getMessage())); } return $this; }
public function testGenerateWithAlternatingStrategy() { $scope = new Scope('scope1'); $gen = new MersenneRandom(700); $result = ''; $scope->setMinOccurances(7); $scope->setMaxOccurances(7); for ($i = 1; $i <= 6; $i++) { $lit = new LiteralScope('label_' . $i); $lit->addLiteral($i); $scope->attach($lit); $lit = null; } $scope->useAlternatingStrategy(); $scope->generate($result, $gen); $this->assertRegExp('/[1-6]{7}/', $result); }
public function convertWhiteSpaceToRange(Scope $head, Scope $result, Lexer $lexer) { if ($lexer->isNextToken(Lexer::T_SHORT_S)) { # spaces, tabs, and line breaks #0009 #0010 #0012 #0013 #0032 $head->addLiteral(chr(9)); $head->addLiteral(chr(10)); $head->addLiteral(chr(12)); $head->addLiteral(chr(13)); $head->addLiteral(chr(32)); } else { # not spaces, tabs, and line breaks #0000-0008 #0011 #0014 - #0031 for ($i = 0; $i <= 8; $i++) { $head->addLiteral(chr($i)); } $head->addLiteral(chr(11)); for ($i = 14; $i <= 31; $i++) { $head->addLiteral(chr($i)); } } }
/** * Parse the current token for closers : {###} { ## } {##,##} * * @access public * @return ReverseRegex\Generator\Scope a new head * @param ReverseRegex\Generator\Scope $head * @param ReverseRegex\Generator\Scope $result * @param ReverseRegex\Lexer $lexer */ public function quantifyClosure(Scope $head, Scope $result, Lexer $lexer) { $tokens = array(); $min = $head->getMinOccurances(); $max = $head->getMaxOccurances(); # move to the first token inside the quantifer. # parse for the minimum , move lookahead until read end of the closure or the `,` while ($lexer->moveNext() === true && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE) && $lexer->lookahead['value'] !== ',') { if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) { throw new ParserException('Nesting Quantifiers is not allowed'); } $tokens[] = $lexer->lookahead; } $min = $this->convertInteger($tokens); # do we have a maximum after the comma? if ($lexer->lookahead['value'] === ',') { # make sure we have values to gather ie not {778,} $tokens = array(); # move to the first token after the `,` character # grab the remaining numbers while ($lexer->moveNext() && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) { if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) { throw new ParserException('Nesting Quantifiers is not allowed'); } $tokens[] = $lexer->lookahead; } $max = $this->convertInteger($tokens); } else { $max = $min; } $head->setMaxOccurances($max); $head->setMinOccurances($min); # skip the lexer to the closing token $lexer->skipUntil(Lexer::T_QUANTIFIER_CLOSE); # check if the last matched token was the closing bracket # not going to stop errors like {#####,###{[a-z]} {#####{[a-z]} if (!$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) { throw new ParserException('Closing quantifier token `}` not found'); } return $head; }
/** * Parse the current token for new Quantifiers * * @access public * @return ReverseRegex\Generator\Scope a new head * @param ReverseRegex\Generator\LiteralScope $head * @param ReverseRegex\Generator\Scope $set * @param ReverseRegex\Lexer $lexer */ public function parse(Scope $head, Scope $set, Lexer $lexer) { $character = $this->evaluate($lexer); $head->addLiteral($character); return $head; }