/** * @param array|NormalizedProduction[] $productions * @param ParseSets $firstSets * @param SymbolSet $emptySet * * @return SetsGenerator */ public function generateFirstSets(array $productions, ParseSets $firstSets, SymbolSet $emptySet) { //add epsilon to the first sets of the non terminals which generate epsilon foreach ($productions as $production) { $lhs = $production->getLeftHandSide(); if ($emptySet->contains($lhs)) { $firstSets->addEpsilon($lhs); } } //initialize the obvious first sets of productions which start with a terminal foreach ($productions as $production) { $symbol = $production->getFirstSymbol(); if ($symbol->getType() == Symbol::TYPE_TERMINAL) { $firstSets->addTerminal($production->getLeftHandSide(), $symbol); } } //initialize first sets for productions which contain several non-terminals do { $changes = false; foreach ($productions as $production) { $updateSet = new ArraySet(); $rhs = $production->getRightHandSide(); $this->firstSetCalculator->processSymbolList($updateSet, $rhs, $firstSets); $nonTerminal = $production->getLeftHandSide(); $changes |= $firstSets->addAllTerminals($nonTerminal, $updateSet); } } while ($changes); return $this; }
/** * @param NormalizedProduction $production * @param SymbolSet $alreadyAdded * * @return bool */ private function indirectlyGeneratesEpsilon(NormalizedProduction $production, SymbolSet $alreadyAdded) { $rhsItems = $production->getRightHandSide(); /** @var Symbol $item */ $item = reset($rhsItems); $answer = $alreadyAdded->contains($item); for (next($rhsItems); $answer && !is_null(key($rhsItems)); next($rhsItems)) { $item = current($rhsItems); $answer = $answer && $alreadyAdded->contains($item); } $answer = is_null(key($rhsItems)) && $answer; return $answer; }
/** * Calculates the first set of $list and adds it's elements to $set * * @param \Helstern\Nomsky\Grammar\Symbol\SymbolSet $newFirstSet * @param array|NormalizedProduction[] $list * @param \Helstern\Nomsky\GrammarAnalysis\ParseSets\ParseSets $firstSets * * @return bool if the epsilon symbol was added to $set */ public function processSymbolList(SymbolSet $newFirstSet, array $list, ParseSets $firstSets) { if (0 == count($list)) { $newFirstSet->add(new EpsilonSymbol()); return true; } if (1 == count($list)) { /** @var Symbol $symbol */ $symbol = $list[0]; return $this->processSymbol($newFirstSet, $symbol, $firstSets); } $symbolIsEpsilon = SymbolIsEpsilon::singletonInstance(); $symbolsIsNonTerminal = SymbolTypeEquals::newInstanceMatchingNonTerminals(); /** @var SymbolSet $lastSet */ $lastSet = null; $epsilonCounter = new MatchCountingInterceptor($symbolIsEpsilon); $acceptor = Inverter::newInstance($epsilonCounter); //we assume there is no epsilon symbol in the $list list $previousSymbol = reset($list); //add the first non-terminal and continue past this one later on if ($symbolsIsNonTerminal->matchSymbol($previousSymbol)) { $lastSet = $firstSets->filterTerminalSet($previousSymbol, $acceptor); $newFirstSet->addAll($lastSet); } else { $newFirstSet->add($previousSymbol); } //as long as the previous symbol was a non-terminal, process the next symbol for (next($list); !is_null(key($list)) && $symbolsIsNonTerminal->matchSymbol($previousSymbol) && $epsilonCounter->getMatchCount() > 0; next($list)) { $previousSymbol = current($list); $epsilonCounter = new MatchCountingInterceptor($symbolIsEpsilon); $acceptor = Inverter::newInstance($epsilonCounter); if ($symbolsIsNonTerminal->matchSymbol($previousSymbol)) { $lastSet = $firstSets->filterTerminalSet($previousSymbol, $acceptor); $newFirstSet->addAll($lastSet); } else { $newFirstSet->add($previousSymbol); } } if (is_null(key($list)) && $symbolsIsNonTerminal->matchSymbol($previousSymbol) && $epsilonCounter->getMatchCount() > 0) { $newFirstSet->add(new EpsilonSymbol()); return true; } return false; }
/** * @param Symbol $nonTerminal * @param Symbol $terminal * @throws \Exception * @return null */ private function assertKnownSymbols(Symbol $nonTerminal, Symbol $terminal) { $knownNonTerminal = $this->nonTerminals->contains($nonTerminal); $knownTerminal = $this->terminals->contains($terminal); if ($knownNonTerminal && $knownTerminal) { return true; } if (!$knownNonTerminal && !$knownTerminal) { $assertionMessage = 'Uknown symbols: non-terminal %s and terminal %s'; $assertionMessage = sprintf($assertionMessage, $nonTerminal->toString(), $terminal->toString()); } else { if ($knownTerminal) { $assertionMessage = 'Uknown non-terminal: %s'; $assertionMessage = sprintf($assertionMessage, $nonTerminal->toString()); } else { $assertionMessage = 'Uknown terminal: %s'; $assertionMessage = sprintf($assertionMessage, $terminal->toString()); } } throw new \RuntimeException($assertionMessage); }