/** * Parse :-). * * @param string $text Text to parse. * @param string $rule The axiom, i.e. root rule. * @param bool $tree Whether build tree or not. * @return mixed * @throws \Hoa\Compiler\Exception\UnexpectedToken */ public function parse($text, $rule = null, $tree = true) { $lexer = new Lexer(); $this->_tokenSequence = $lexer->lexMe($text, $this->_tokens); $this->_currentState = 0; $this->_errorState = 0; $this->_trace = []; $this->_todo = []; if (false === array_key_exists($rule, $this->_rules)) { $rule = $this->getRootRule(); } $closeRule = new Rule\Ekzit($rule, 0); $openRule = new Rule\Entry($rule, 0, [$closeRule]); $this->_todo = [$closeRule, $openRule]; do { $out = $this->unfold(); if (null !== $out && 'EOF' === $this->getCurrentToken()) { break; } if (false === $this->backtrack()) { $token = $this->_tokenSequence[$this->_errorState]; $offset = $token['offset']; $line = 1; $column = 1; if (!empty($text)) { if (0 === $offset) { $leftnl = 0; } else { $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0; } $rightnl = strpos($text, "\n", $offset); $line = substr_count($text, "\n", 0, $leftnl + 1) + 1; $column = $offset - $leftnl + (0 === $leftnl); if (false !== $rightnl) { $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n"); } } throw new Compiler\Exception\UnexpectedToken('Unexpected token "%s" (%s) at line %d and column %d:' . "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑', 0, [$token['value'], $token['token'], $line, $column, $text], $line, $column); } } while (true); if (false === $tree) { return true; } $tree = $this->_buildTree(); if (!$tree instanceof TreeNode) { throw new Compiler\Exception('Parsing error: cannot build AST, the trace is corrupted.', 1); } return $this->_tree = $tree; }
/** * Build the analyzer of the rules (does not analyze the rules). * * @param array $rules Rule to be analyzed. * @return void * @throws \Hoa\Compiler\Exception */ public function analyzeRules(array $rules) { if (empty($rules)) { throw new Compiler\Exception\Rule('No rules specified!', 0); } $tokens = ['default' => ['skip' => '\\s', 'or' => '\\|', 'zero_or_one' => '\\?', 'one_or_more' => '\\+', 'zero_or_more' => '\\*', 'n_to_m' => '\\{[0-9]+,[0-9]+\\}', 'zero_to_m' => '\\{,[0-9]+\\}', 'n_or_more' => '\\{[0-9]+,\\}', 'exactly_n' => '\\{[0-9]+\\}', 'skipped' => '::[a-zA-Z_][a-zA-Z0-9_]*(\\[\\d+\\])?::', 'kept' => '<[a-zA-Z_][a-zA-Z0-9_]*(\\[\\d+\\])?' . '>', 'named' => '[a-zA-Z_][a-zA-Z0-9_]*\\(\\)', 'node' => '#[a-zA-Z_][a-zA-Z0-9_]*(:[mM])?', 'capturing_' => '\\(', '_capturing' => '\\)']]; $this->_createdRules = []; $this->_rules = $rules; foreach ($rules as $key => $value) { $lexer = new Compiler\Llk\Lexer(); $this->_tokenSequence = $lexer->lexMe($value, $tokens); $this->_rule = $value; $this->_currentState = 0; $nodeId = null; if ('#' === $key[0]) { $nodeId = $key; $key = substr($key, 1); } $pNodeId = $nodeId; $rule = $this->rule($pNodeId); if (null === $rule) { throw new Compiler\Exception('Error while parsing rule %s.', 1, $key); } $zeRule = $this->_createdRules[$rule]; $zeRule->setName($key); $zeRule->setPPRepresentation($value); if (null !== $nodeId) { $zeRule->setDefaultId($nodeId); } unset($this->_createdRules[$rule]); $this->_createdRules[$key] = $zeRule; } return $this->_createdRules; }