PHP REBuilder\Parser Token Examples

Programming Language: PHP

Namespace/Package Name: REBuilder\Parser

Class/Type: Token

Examples at hotexamples.com: 2

PHP REBuilder\Parser Token - 2 examples found. These are the top rated real world PHP examples of REBuilder\Parser\Token extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getType(2)

getIdentifier(1)

getSubject(1)

Example #1

Show file

File: Builder.php Project: mck89/rebuilder

 /**
  * Handles a repetition token
  * 
  * @param Token $token Token
  * 
  * @return void
  */
 protected function _handleRepetition(Token $token)
 {
     //If there is no current item, throw exception
     if ($this->_currentItem === null) {
         throw new Exception\InvalidRepetition("Nothing to repeat");
     }
     //Repetitions are allowed only after certain tokens, so check the last
     //emitted token
     $lastToken = $this->_tokensStack->top();
     switch ($lastToken->getType()) {
         //Handle lazy repetition
         case Token::TYPE_REPETITION:
             $prevLastToken = $this->_tokensStack->offsetGet(1);
             //if this token is "?" and follows a repetition token that
             //does not come after another repetition token set the lazy flag
             if ($token->getIdentifier() === "?" && $prevLastToken->getType() !== Token::TYPE_REPETITION) {
                 //Check if last repetition supports the lazy flag
                 $lastRepetition = $this->_currentItem->getRepetition();
                 if ($lastRepetition->supportsLazy()) {
                     $lastRepetition->setLazy(true);
                 }
                 return;
             } else {
                 throw new Exception\InvalidRepetition("Nothing to repeat");
             }
             break;
             //Tokens that can handle the repetition
         //Tokens that can handle the repetition
         case Token::TYPE_NON_PRINTING_CHAR:
         case Token::TYPE_GENERIC_CHAR_TYPE:
         case Token::TYPE_CONTROL_CHAR:
         case Token::TYPE_EXT_UNICODE_SEQUENCE:
         case Token::TYPE_UNICODE_CHAR_CLASS:
         case Token::TYPE_HEX_CHAR:
         case Token::TYPE_DOT:
         case Token::TYPE_BYTE:
         case Token::TYPE_SUBPATTERN_END:
         case Token::TYPE_COMMENT:
         case Token::TYPE_OCTAL_CHAR:
         case Token::TYPE_BACK_REFERENCE:
         case Token::TYPE_CHAR_CLASS_END:
         case Token::TYPE_RECURSIVE_PATTERN:
             break;
             //When simple characters are grouped, repetition is valid only
             //for the last one, so it needs to be splitted so that the last
             //character belongs to a different object
         //When simple characters are grouped, repetition is valid only
         //for the last one, so it needs to be splitted so that the last
         //character belongs to a different object
         case Token::TYPE_CHAR:
             $chars = $this->_currentItem->getChar();
             if (strlen($chars) > 1) {
                 $this->_currentItem->setChar(substr($chars, 0, -1));
                 $this->_currentItem = new Pattern\Char($chars[strlen($chars) - 1]);
                 $this->_containersStack->top()->addChild($this->_currentItem);
             }
             break;
         default:
             throw new Exception\InvalidRepetition("Repetition cannot be inserted at this point");
             break;
     }
     //Get the right repetition class
     switch ($token->getIdentifier()) {
         case "*":
             $repetition = new Pattern\Repetition\ZeroOrMore();
             break;
         case "+":
             $repetition = new Pattern\Repetition\OneOrMore();
             break;
         case "?":
             $repetition = new Pattern\Repetition\Optional();
             break;
         case "{":
             //Check if {}
             if (strpos($token->getSubject(), ",") === false) {
                 $repetition = new Pattern\Repetition\Number($token->getSubject());
             } else {
                 $limits = explode(",", $token->getSubject());
                 $repetition = new Pattern\Repetition\Range($limits[0], $limits[1] === "" ? null : $limits[1]);
             }
             break;
     }
     //Set the repetition on the current item
     $this->_currentItem->setRepetition($repetition);
 }

Example #2

Show file

File: Tokenizer.php Project: mck89/rebuilder

 /**
  * Starts the tokenization proces
  * 
  * @return void
  */
 public function tokenize()
 {
     //Since delimiters are the only exception to the normal regex syntax and
     //the tokenizer needs to know regex modifiers to handle some situations,
     //parse them immediately and strip them from the regex
     list($delimiter, $endDelimiter, $rModifiers) = $this->_stripDelimitersAndModifiers();
     $checkEndDelimiter = $delimiter === $endDelimiter;
     //Store regex length
     $this->_length = strlen($this->_regex);
     //Loop regex characters
     while (($char = $this->_consume()) !== null) {
         //If character is backslash and it's not escaped
         if ($char === "\\" && !$this->_escaped) {
             //Set escaped flag to true
             $this->_escaped = true;
             continue;
         } elseif (!$this->_inCharClass && !$this->_escaped && $char === ".") {
             //Emit a dot token
             $this->_emitToken(Token::TYPE_DOT, $char);
         } elseif ($this->_escaped && Rules::validateGenericCharType($char)) {
             //Emit a generic character type token
             $this->_emitToken(Token::TYPE_GENERIC_CHAR_TYPE, $char);
         } elseif (!$this->_inCharClass && $this->_escaped && Rules::validateSimpleAssertion($char)) {
             //Emit a simple assertion token
             $this->_emitToken(Token::TYPE_SIMPLE_ASSERTION, $char);
         } elseif ($this->_escaped && Rules::validateNonPrintingChar($char)) {
             //Emit a non-printing character token
             $this->_emitToken(Token::TYPE_NON_PRINTING_CHAR, $char);
         } elseif (!$this->_inCharClass && $this->_escaped && $char === "X") {
             //Emit a extended unicode sequence token
             $this->_emitToken(Token::TYPE_EXT_UNICODE_SEQUENCE, $char);
         } elseif (!$this->_inCharClass && $this->_escaped && $char === "C") {
             //Emit a single byte identifier token
             $this->_emitToken(Token::TYPE_BYTE, $char);
         } elseif (!$this->_inCharClass && !$this->_escaped && ($char === "^" || $char === "\$")) {
             //Emit an anchor token
             $this->_emitToken($char === "^" ? Token::TYPE_START_ANCHOR : Token::TYPE_END_ANCHOR, $char);
         } elseif (!$this->_inCharClass && !$this->_escaped && $char === "|") {
             //Emit an alternation identifier token
             $this->_emitToken(Token::TYPE_ALTERNATION, $char);
         } elseif (!$this->_inCharClass && $this->_escaped && ($char === "p" || $char === "P")) {
             //Take the next character
             $nextChar = $this->_consume();
             //If there are no characters left throw an exception
             if ($nextChar === null) {
                 throw new Exception\Generic("Unspecified character class form \\" . $char);
             } elseif ($nextChar !== "{") {
                 $this->_emitToken(Token::TYPE_UNICODE_CHAR_CLASS, $char, $nextChar);
             } else {
                 //Find everything until the closing bracket
                 $nextChars = $this->_consumeUntil("}", true);
                 //If the closing bracket has not been found throw an
                 //exception
                 if ($nextChars === null) {
                     throw new Exception\Generic("Unclosed \\" . $char . " character class");
                 } else {
                     $this->_emitToken(Token::TYPE_UNICODE_CHAR_CLASS, $char, $nextChar . $nextChars);
                 }
             }
         } elseif ($this->_escaped && $char === "x") {
             $nextChar = $this->_consume();
             $tokenSubject = "";
             if ($nextChar === "{") {
                 $nextChars = $this->_consumeUntil("}", true);
                 if ($nextChars === null) {
                     throw new Exception\Generic("Unclosed brace in hex char");
                 }
                 $tokenSubject = trim($nextChars, "}");
             } elseif ($nextChar !== null) {
                 $this->_unconsume();
                 //Find following hexadecimal digits
                 for ($i = 0; $i < 2; $i++) {
                     $nextChar = $this->_consume();
                     if ($nextChar !== null && Rules::validateHexString($nextChar)) {
                         $tokenSubject .= $nextChar;
                     } else {
                         $nextChar !== null && $this->_unconsume();
                         break;
                     }
                 }
             }
             //Emit the hexadecimal character token
             $this->_emitToken(Token::TYPE_HEX_CHAR, $char, $tokenSubject);
         } elseif ($this->_escaped && $char === "c") {
             //Take the next character
             $nextChar = $this->_consumeIgnoreEscape();
             //If there are no characters left throw an exception
             if ($nextChar === null) {
                 throw new Exception\Generic("Character not specified for control character");
             }
             //Otherwise emit the control character token
             $this->_emitToken(Token::TYPE_CONTROL_CHAR, $char, $nextChar);
         } elseif (!$this->_inCharClass && !$this->_escaped && ($char === "*" || $char === "+" || $char === "?")) {
             //Emit a repetition token
             $this->_emitToken(Token::TYPE_REPETITION, $char);
         } elseif (!$this->_inCharClass && !$this->_escaped && $char === "{" && ($nextChars = $this->_consumeRegex("/^\\d+(?:,\\d*)?\\}/"))) {
             //Emit a repetition token
             $this->_emitToken(Token::TYPE_REPETITION, $char, rtrim($nextChars, "}"));
         } elseif (!$this->_inCharClass && !$this->_escaped && $char === "(") {
             $this->_handleSubpattern();
         } elseif (!$this->_inCharClass && !$this->_escaped && $char === ")") {
             //Throw exception if there are no open subpatterns
             if (!$this->_openSubpatterns) {
                 throw new Exception\Generic("Unmatched parenthesis");
             }
             //Emit a subpattern end token
             $this->_emitToken(Token::TYPE_SUBPATTERN_END, $char);
             $this->_openSubpatterns--;
             $this->_modifiersStack->pop();
         } elseif (!$this->_inCharClass && !$this->_escaped && $char === "[") {
             //Emit a char class start token
             $this->_emitToken(Token::TYPE_CHAR_CLASS_START, $char);
             $this->_inCharClass = true;
             //Consume next char
             $char = $this->_consume();
             //If the character is a char class negation
             if ($char === "^") {
                 //Emit the char class negate token
                 $this->_emitToken(Token::TYPE_CHAR_CLASS_NEGATE, $char);
                 $char = $this->_consume();
             }
             //If the first char in a char class is a closed square bracket
             if ($char === "]") {
                 //Emit the bracket as char token
                 $this->_emitToken(Token::TYPE_CHAR, $char);
             } else {
                 $this->_unconsume();
             }
         } elseif ($this->_inCharClass && !$this->_escaped && $char === "]") {
             //Emit a char class end token
             $this->_emitToken(Token::TYPE_CHAR_CLASS_END, $char);
             $this->_inCharClass = false;
         } elseif ($this->_inCharClass && !$this->_escaped && $char === "-" && in_array($this->_lastToken->getType(), $this->_allowedInCharClassRange)) {
             //Enable the after char class range mode
             $this->_afterCharClassRange = true;
         } elseif ($this->_inCharClass && !$this->_escaped && $char === "[" && ($nextChars = $this->_consumeRegex("/^:\\^?[a-z]+:\\]/"))) {
             //Emit a posix char class token
             $subject = str_replace(array(":", "]"), "", $nextChars);
             $this->_emitToken(Token::TYPE_POSIX_CHAR_CLASS, $char . $nextChars, $subject);
         } elseif (!$this->_inCharClass && $this->_escaped && ($char === "g" || $char === "k")) {
             //It's a back reference. Check for the reference identifier
             if ($char === "g") {
                 $testPattern = "(?|(\\d+)|\\{(-?\\d+|\\w+)\\})";
             } else {
                 $testPattern = "(<\\w+>|'\\w+'|\\{\\w+\\})";
             }
             $nextChars = $this->_consumeRegex("/^{$testPattern}/", 1);
             if ($nextChars === null) {
                 throw new Exception\Generic("Invalid backreference");
             }
             if ($char === "k") {
                 $nextChars = substr($nextChars, 1, -1);
             }
             //Check reference validity
             if (!$this->_checkValidReference($nextChars)) {
                 throw new Exception\Generic("Reference to non-existent subpattern '{$nextChars}'");
             }
             //Emit a backreference token
             $this->_emitToken(Token::TYPE_BACK_REFERENCE, $char, $nextChars);
         } elseif ($this->_escaped && is_numeric($char)) {
             //Char class does not handle back references so if the character
             //is not octal process the character again without the escape
             if ($this->_inCharClass && $char > 7) {
                 $this->_unconsume();
                 $this->_escaped = false;
                 continue;
             }
             //If the character is a 0 consume up to 2 octal digits,
             //otherwise consume all the following digits
             if ($char === "0" || $this->_inCharClass) {
                 $testPattern = "^[0-7]{1,2}";
             } else {
                 $testPattern = "^\\d+";
             }
             //Consume following numbers
             $nextChars = $this->_consumeRegex("/^{$testPattern}/");
             if ($nextChars !== null) {
                 $char .= $nextChars;
             }
             //If the first digit is 0 or its a valid octal number and there
             //are not enough back references
             $hasReference = $this->_checkValidReference($char);
             if ($char[0] === "0" || $this->_inCharClass || preg_match("/^[0-7]{2,3}\$/", $char) && !$hasReference) {
                 $this->_emitToken(Token::TYPE_OCTAL_CHAR, $char);
             } elseif ($hasReference) {
                 //Emit a backreference token
                 $this->_emitToken(Token::TYPE_BACK_REFERENCE, "\\", $char);
             } else {
                 throw new Exception\Generic("Reference to non-existent subpattern '{$char}'");
             }
         } elseif (!$this->_escaped && $checkEndDelimiter && $char === $endDelimiter) {
             //Throw an exception
             throw new Exception\InvalidDelimiter("Unescaped end delimiter '{$char}' inside regex");
         } else {
             //If the character is not escaped and the "x" modifier is active
             if (!$this->_escaped && strpos($this->_modifiersStack->top(), "x") !== false) {
                 //If it is a "#"
                 if ($char === "#") {
                     //Emit a comment token
                     $nextChars = $this->_consumeUntil("\n");
                     if ($nextChars === null) {
                         $nextChars = $this->_consumeRemaining();
                     }
                     $this->_emitToken(Token::TYPE_COMMENT, $char, $nextChars);
                     continue;
                 } elseif (preg_match("/\\s/", $char)) {
                     continue;
                 }
             }
             //Emit the character as a simple pattern token
             $this->_emitToken(Token::TYPE_CHAR, $char);
         }
         //Reset the escaped state
         $this->_escaped = false;
     }
     //If the escaped state is already active it means that no end delimiter
     //has been found, so an exception must be thrown
     if ($this->_escaped) {
         throw new Exception\InvalidDelimiter("End delimiter '{$endDelimiter}' not found");
     }
     //Throw exception if there are unclosed subpatterns
     if ($this->_openSubpatterns) {
         throw new Exception\Generic("The regex contains unclosed subpatterns");
     }
     //Throw exception if there are unclosed char classes
     if ($this->_inCharClass) {
         throw new Exception\Generic("The regex contains unclosed character classes");
     }
     //Emit the end delimiter token
     $this->_emitToken(Token::TYPE_REGEX_END_DELIMITER, $endDelimiter);
     //If regex modifiers were specified emit the token
     if ($rModifiers) {
         $this->_emitToken(Token::TYPE_REGEX_MODIFIERS, $rModifiers);
     }
 }