function getKeysFromRegexp($regexp) { $lexer = new Lexer($regexp); $lexer->moveNext(); $keys = readKeysFromLexer($lexer); return array_unique($keys); }
/** * Compress the lexer into value string until current lookahead * * @access public * @return string the compressed value string */ public function compress() { $current = $this->lexer->lookahead['position']; $this->lexer->reset(); $string = ''; while ($this->lexer->moveNext() && $this->lexer->lookahead['position'] <= $current) { $string .= $this->lexer->lookahead['value']; } return $string; }
/** * Parse a reference */ public function evaluate(Lexer $lexer) { switch (true) { case $lexer->isNextToken(Lexer::T_SHORT_P): throw new ParserException('Property \\p (Unicode Property) not supported use \\x to specify unicode character or range'); break; case $lexer->isNextToken(Lexer::T_SHORT_UNICODE_X): $lexer->moveNext(); if ($lexer->lookahead['value'] !== '{') { throw new ParserException('Expecting character { after \\X none found'); } $tokens = array(); while ($lexer->moveNext() && $lexer->lookahead['value'] !== '}') { # check if we nested eg.{ddd{d} if ($lexer->lookahead['value'] === '{') { throw new ParserException('Nesting hex value ranges is not allowed'); } if ($lexer->lookahead['value'] !== " " && ctype_xdigit($lexer->lookahead['value']) === false) { throw new ParserException(sprintf('Character %s is not a hexdeciaml digit', $lexer->lookahead['value'])); } $tokens[] = $lexer->lookahead['value']; } # check that current lookahead is a closing character as it's possible to iterate to end of string (i.e. lookahead === null) if ($lexer->lookahead['value'] !== '}') { throw new ParserException('Closing quantifier token `}` not found'); } if (count($tokens) === 0) { throw new ParserException('No hex number found inside the range'); } $number = trim(implode('', $tokens)); return Utf8::chr(hexdec($number)); break; case $lexer->isNextToken(Lexer::T_SHORT_X): // only allow another 2 hex characters $glimpse = $lexer->glimpse(); if ($glimpse['value'] === '{') { throw new ParserException('Braces not supported here'); } $tokens = array(); $count = 2; while ($count > 0 && $lexer->moveNext()) { $tokens[] = $lexer->lookahead['value']; --$count; } $value = trim(implode('', $tokens)); return Utf8::chr(hexdec($value)); break; default: throw new ParserException('No Unicode expression to evaluate'); } }
/** * Parse the current token for new Quantifiers * * @access public * @return ReverseRegex\Generator\Scope a new head * @param ReverseRegex\Generator\Scope $head * @param ReverseRegex\Generator\Scope $set * @param ReverseRegex\Lexer $lexer */ public function parse(Scope $head, Scope $set, Lexer $lexer) { if ($lexer->lookahead['type'] !== Lexer::T_SET_OPEN) { throw new ParserException('Opening character set token not found'); } $peek = $lexer->glimpse(); if ($peek['type'] === Lexer::T_SET_NEGATED) { throw new ParserException('Negated Character Set ranges not supported at this time'); } $normal_lexer = new Lexer($this->normalize($head, $set, $lexer)); while ($normal_lexer->moveNext() && !$normal_lexer->isNextToken(Lexer::T_SET_CLOSE)) { $glimpse = $normal_lexer->glimpse(); if ($glimpse['type'] === Lexer::T_SET_RANGE) { continue; //value be included in range when `-` character is passed } switch (true) { case $normal_lexer->isNextToken(Lexer::T_SET_RANGE): $range_start = $normal_lexer->token['value']; $normal_lexer->moveNext(); if ($normal_lexer->isNextToken(Lexer::T_ESCAPE_CHAR)) { $normal_lexer->moveNext(); } $range_end = $normal_lexer->lookahead['value']; $this->fillRange($head, $range_start, $range_end); break; case $normal_lexer->isNextToken(Lexer::T_LITERAL_NUMERIC) || $normal_lexer->isNextToken(Lexer::T_LITERAL_CHAR): $index = (int) Utf8::ord($normal_lexer->lookahead['value']); $head->setLiteral($index, $normal_lexer->lookahead['value']); break; default: # ignore } } $head->getLiterals()->sort(); return $head; }
public function testDotRange() { $lexer = new Lexer('.'); $scope = new Scope(); $parser = new Short(); $head = new LiteralScope('lit1', $scope); $lexer->moveNext(); $lexer->moveNext(); $parser->parse($head, $scope, $lexer); $result = $head->getLiterals(); foreach ($result as $value) { $this->assertRegExp('/./', $value); } }
public function convertWhiteSpaceToRange(Scope $head, Scope $result, Lexer $lexer) { if ($lexer->isNextToken(Lexer::T_SHORT_S)) { # spaces, tabs, and line breaks #0009 #0010 #0012 #0013 #0032 $head->addLiteral(chr(9)); $head->addLiteral(chr(10)); $head->addLiteral(chr(12)); $head->addLiteral(chr(13)); $head->addLiteral(chr(32)); } else { # not spaces, tabs, and line breaks #0000-0008 #0011 #0014 - #0031 for ($i = 0; $i <= 8; $i++) { $head->addLiteral(chr($i)); } $head->addLiteral(chr(11)); for ($i = 14; $i <= 31; $i++) { $head->addLiteral(chr($i)); } } }
/** * Parse the current token for closers : {###} { ## } {##,##} * * @access public * @return ReverseRegex\Generator\Scope a new head * @param ReverseRegex\Generator\Scope $head * @param ReverseRegex\Generator\Scope $result * @param ReverseRegex\Lexer $lexer */ public function quantifyClosure(Scope $head, Scope $result, Lexer $lexer) { $tokens = array(); $min = $head->getMinOccurances(); $max = $head->getMaxOccurances(); # move to the first token inside the quantifer. # parse for the minimum , move lookahead until read end of the closure or the `,` while ($lexer->moveNext() === true && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE) && $lexer->lookahead['value'] !== ',') { if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) { throw new ParserException('Nesting Quantifiers is not allowed'); } $tokens[] = $lexer->lookahead; } $min = $this->convertInteger($tokens); # do we have a maximum after the comma? if ($lexer->lookahead['value'] === ',') { # make sure we have values to gather ie not {778,} $tokens = array(); # move to the first token after the `,` character # grab the remaining numbers while ($lexer->moveNext() && !$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) { if ($lexer->isNextToken(Lexer::T_QUANTIFIER_OPEN)) { throw new ParserException('Nesting Quantifiers is not allowed'); } $tokens[] = $lexer->lookahead; } $max = $this->convertInteger($tokens); } else { $max = $min; } $head->setMaxOccurances($max); $head->setMinOccurances($min); # skip the lexer to the closing token $lexer->skipUntil(Lexer::T_QUANTIFIER_CLOSE); # check if the last matched token was the closing bracket # not going to stop errors like {#####,###{[a-z]} {#####{[a-z]} if (!$lexer->isNextToken(Lexer::T_QUANTIFIER_CLOSE)) { throw new ParserException('Closing quantifier token `}` not found'); } return $head; }
public function testShortX() { $lexer = new Lexer('\\x64'); $scope = new Scope(); $parser = new Unicode(); $head = new LiteralScope('lit1', $scope); $lexer->moveNext(); $lexer->moveNext(); $parser->parse($head, $scope, $lexer); $result = $head->getLiterals(); $this->assertEquals('d', $result[0]); }
/** * @expectedException \ReverseRegex\Exception * @expectedExceptionMessage Braces not supported here */ public function testParseHexShortBraceError() { $lexer = new Lexer('[\\x{61}-\\x6B-\\x6E]'); $scope = new Scope(); $head = new LiteralScope(); $parser = new CharacterClass(); $lexer->moveNext(); $parser->parse($head, $scope, $lexer); }
/** * @expectedException \ReverseRegex\Exception * @expectedExceptionMessage Closing group char "(" has no matching opening character */ public function testGroupNestingErrorClosedNotOpened() { $lexer = new Lexer('())'); $lexer->moveNext(); $this->assertEquals('(', $lexer->lookahead['value']); $this->assertEquals(Lexer::T_GROUP_OPEN, $lexer->lookahead['type']); $lexer->moveNext(); $this->assertEquals(')', $lexer->lookahead['value']); $this->assertEquals(Lexer::T_GROUP_CLOSE, $lexer->lookahead['type']); $lexer->moveNext(); $this->assertEquals(')', $lexer->lookahead['value']); $this->assertEquals(Lexer::T_GROUP_CLOSE, $lexer->lookahead['type']); }
public function testQuestionQuantifier() { $pattern = 'az?'; $lexer = new Lexer($pattern); $scope = new Scope(); $qual = new Quantifier(); $lexer->moveNext(); $lexer->moveNext(); $lexer->moveNext(); $qual->parse($scope, $scope, $lexer); $this->assertEquals(0, $scope->getMinOccurances()); $this->assertEquals(1, $scope->getMaxOccurances()); }