/** * {@inheritDoc} */ public function lex($string) { // normalize line endings $string = strtr($string, array("\r\n" => "\n", "\r" => "\n")); $tokens = array(); $position = 0; $originalString = $string; $originalLength = Util::stringLength($string); while (true) { $token = $this->extractToken($string); if ($token === null) { break; } if (!$this->shouldSkipToken($token)) { $tokens[] = $token; } $shift = Util::stringLength($token->getValue()); $position += $shift; // update line + offset if ($position > 0) { $this->line = substr_count($originalString, "\n", 0, $position) + 1; } $string = Util::substring($string, $shift); } if ($position !== $originalLength) { throw new RecognitionException($this->line); } $tokens[] = new CommonToken(Parser::EOF_TOKEN_TYPE, '', $this->line); return new ArrayTokenStream($tokens); }
/** * {@inheritDoc} */ protected function extractToken($string) { $value = $type = null; foreach ($this->recognizers as $t => $recognizer) { if ($recognizer->match($string, $v)) { if ($value === null || Util::stringLength($v) > Util::stringLength($value)) { $value = $v; $type = $t; } } } if ($type !== null) { return new CommonToken($type, $value, $this->getCurrentLine()); } return null; }
/** * Calculates the FIRST sets of all nonterminals. * * @param array $rules The rules grouped by the LHS. * * @return array Calculated FIRST sets. */ protected function calculateFirstSets(array $rules) { // initialize $firstSets = array(); foreach (array_keys($rules) as $lhs) { $firstSets[$lhs] = array(); } do { $changes = false; foreach ($rules as $lhs => $ruleArray) { foreach ($ruleArray as $rule) { $components = $rule->getComponents(); $new = array(); if (empty($components)) { $new = array(Grammar::EPSILON); } else { foreach ($components as $i => $component) { if (array_key_exists($component, $rules)) { // if nonterminal, copy its FIRST set to // this rule's first set $x = $firstSets[$component]; if (!in_array(Grammar::EPSILON, $x)) { // if the component doesn't derive // epsilon, merge the first sets and // we're done $new = Util::union($new, $x); break; } else { // if all components derive epsilon, // the rule itself derives epsilon if ($i < count($components) - 1) { // more components ahead, remove epsilon unset($x[array_search(Grammar::EPSILON, $x)]); } $new = Util::union($new, $x); } } else { // if terminal, simply add it the the FIRST set // and we're done $new = Util::union($new, array($component)); break; } } } if (Util::different($new, $firstSets[$lhs])) { $firstSets[$lhs] = Util::union($firstSets[$lhs], $new); $changes = true; } } } } while ($changes); return $firstSets; }
/** * {@inheritDoc} */ protected function extractToken($string) { if (empty($this->stateStack)) { throw new LogicException("You must set a starting state before lexing."); } $value = $type = $action = null; $state = $this->states[$this->stateStack[count($this->stateStack) - 1]]; foreach ($state['recognizers'] as $t => $recognizer) { if ($recognizer->match($string, $v)) { if ($value === null || Util::stringLength($v) > Util::stringLength($value)) { $value = $v; $type = $t; $action = $state['actions'][$type]; } } } if ($type !== null) { if (is_string($action)) { // enter new state $this->stateStack[] = $action; } elseif ($action === self::POP_STATE) { array_pop($this->stateStack); } return new CommonToken($type, $value, $this->getCurrentLine()); } return null; }