function main() { $lem = new PHP_ParserGenerator_Data(); $this->OptInit($_SERVER['argv']); if ($this->version) { echo "Lemon version 1.0/PHP_ParserGenerator port version 0.1.5\n"; exit(0); } if ($this->OptNArgs($_SERVER['argv']) != 1) { echo "Exactly one filename argument is required.\n"; exit(1); } $lem->errorcnt = 0; /* Initialize the machine */ $lem->argv0 = $_SERVER['argv'][0]; $lem->filename = $this->OptArg(0, $_SERVER['argv']); $a = pathinfo($lem->filename); if (isset($a['extension'])) { $ext = '.' . $a['extension']; $lem->filenosuffix = substr($lem->filename, 0, strlen($lem->filename) - strlen($ext)); } else { $lem->filenosuffix = $lem->filename; } $lem->basisflag = $this->basisflag; $lem->has_fallback = 0; $lem->nconflict = 0; $lem->name = $lem->include_code = $lem->include_classcode = $lem->arg = $lem->tokentype = $lem->start = 0; $lem->vartype = 0; $lem->stacksize = 0; $lem->error = $lem->overflow = $lem->failure = $lem->accept = $lem->tokendest = $lem->tokenprefix = $lem->outname = $lem->extracode = 0; $lem->vardest = 0; $lem->tablesize = 0; PHP_ParserGenerator_Symbol::Symbol_new("\$"); $lem->errsym = PHP_ParserGenerator_Symbol::Symbol_new("error"); /* Parse the input file */ $parser = new PHP_ParserGenerator_Parser($this); $parser->Parse($lem); if ($lem->errorcnt) { exit($lem->errorcnt); } if ($lem->rule === 0) { printf("Empty grammar.\n"); exit(1); } /* Count and index the symbols of the grammar */ $lem->nsymbol = PHP_ParserGenerator_Symbol::Symbol_count(); PHP_ParserGenerator_Symbol::Symbol_new("{default}"); $lem->symbols = PHP_ParserGenerator_Symbol::Symbol_arrayof(); for ($i = 0; $i <= $lem->nsymbol; $i++) { $lem->symbols[$i]->index = $i; } usort($lem->symbols, array('PHP_ParserGenerator_Symbol', 'sortSymbols')); for ($i = 0; $i <= $lem->nsymbol; $i++) { $lem->symbols[$i]->index = $i; } // find the first lower-case symbol for ($i = 1; ord($lem->symbols[$i]->name[0]) < ord('Z'); $i++) { } $lem->nterminal = $i; /* Generate a reprint of the grammar, if requested on the command line */ if ($this->rpflag) { $this->Reprint(); } else { /* Initialize the size for all follow and first sets */ $this->SetSize($lem->nterminal); /* Find the precedence for every production rule (that has one) */ $lem->FindRulePrecedences(); /* Compute the lambda-nonterminals and the first-sets for every ** nonterminal */ $lem->FindFirstSets(); /* Compute all LR(0) states. Also record follow-set propagation ** links so that the follow-set can be computed later */ $lem->nstate = 0; $lem->FindStates(); $lem->sorted = PHP_ParserGenerator_State::State_arrayof(); /* Tie up loose ends on the propagation links */ $lem->FindLinks(); /* Compute the follow set of every reducible configuration */ $lem->FindFollowSets(); /* Compute the action tables */ $lem->FindActions(); /* Compress the action tables */ if ($this->compress === 0) { $lem->CompressTables(); } /* Reorder and renumber the states so that states with fewer choices ** occur at the end. */ $lem->ResortStates(); /* Generate a report of the parser generated. (the "y.output" file) */ if (!$this->quiet) { $lem->ReportOutput(); } /* Generate the source code for the parser */ $lem->ReportTable($this->mhflag); /* Produce a header file for use by the scanner. (This step is ** omitted if the "-m" option is used because makeheaders will ** generate the file for us.) */ // if (!$this->mhflag) { // $this->ReportHeader(); // } } if ($this->statistics) { printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", $lem->nterminal, $lem->nsymbol - $lem->nterminal, $lem->nrule); printf(" %d states, %d parser table entries, %d conflicts\n", $lem->nstate, $lem->tablesize, $lem->nconflict); } if ($lem->nconflict) { printf("%d parsing conflicts.\n", $lem->nconflict); } exit($lem->errorcnt + $lem->nconflict); return $lem->errorcnt + $lem->nconflict; }
/** * Reduce the size of the action tables, if possible, by making use * of defaults. * * In this version, we take the most frequent REDUCE action and make * it the default. */ function CompressTables() { for ($i = 0; $i < $this->nstate; $i++) { $stp = $this->sorted[$i]->data; $nbest = 0; $rbest = 0; for ($ap = $stp->ap; $ap; $ap = $ap->next) { if ($ap->type != PHP_ParserGenerator_Action::REDUCE) { continue; } $rp = $ap->x; if ($rp === $rbest) { continue; } $n = 1; for ($ap2 = $ap->next; $ap2; $ap2 = $ap2->next) { if ($ap2->type != PHP_ParserGenerator_Action::REDUCE) { continue; } $rp2 = $ap2->x; if ($rp2 === $rbest) { continue; } if ($rp2 === $rp) { $n++; } } if ($n > $nbest) { $nbest = $n; $rbest = $rp; } } /* Do not make a default if the number of rules to default ** is not at least 1 */ if ($nbest < 1) { continue; } /* Combine matching REDUCE actions into a single default */ for ($ap = $stp->ap; $ap; $ap = $ap->next) { if ($ap->type == PHP_ParserGenerator_Action::REDUCE && $ap->x === $rbest) { break; } } if ($ap === 0) { throw new Exception('$ap is not an object'); } $ap->sp = PHP_ParserGenerator_Symbol::Symbol_new("{default}"); for ($ap = $ap->next; $ap; $ap = $ap->next) { if ($ap->type == PHP_ParserGenerator_Action::REDUCE && $ap->x === $rbest) { $ap->type = PHP_ParserGenerator_Action::NOT_USED; } } $stp->ap = PHP_ParserGenerator_Action::Action_sort($stp->ap); } }
/** * Parse a single token * @param string token */ function parseonetoken($token) { $x = $token; $this->a = 0; // for referencing in WAITING_FOR_DECL_KEYWORD if (PHP_ParserGenerator::DEBUG) { printf("%s:%d: Token=[%s] state=%d\n", $this->filename, $this->tokenlineno, $token, $this->state); } switch ($this->state) { case self::INITIALIZE: $this->prevrule = 0; $this->preccounter = 0; $this->firstrule = $this->lastrule = 0; $this->gp->nrule = 0; /* Fall thru to next case */ /* Fall thru to next case */ case self::WAITING_FOR_DECL_OR_RULE: if ($x[0] == '%') { $this->state = self::WAITING_FOR_DECL_KEYWORD; } elseif (preg_match('/[a-z]/', $x[0])) { $this->lhs = PHP_ParserGenerator_Symbol::Symbol_new($x); $this->nrhs = 0; $this->lhsalias = 0; $this->state = self::WAITING_FOR_ARROW; } elseif ($x[0] == '{') { if ($this->prevrule === 0) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "There is no prior rule opon which to attach the code\n fragment which begins on this line."); $this->errorcnt++; } elseif ($this->prevrule->code != 0) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Code fragment beginning on this line is not the first \\\n to follow the previous rule."); $this->errorcnt++; } else { $this->prevrule->line = $this->tokenlineno; $this->prevrule->code = substr($x, 1); } } elseif ($x[0] == '[') { $this->state = self::PRECEDENCE_MARK_1; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Token \"%s\" should be either \"%%\" or a nonterminal name.", $x); $this->errorcnt++; } break; case self::PRECEDENCE_MARK_1: if (!preg_match('/[A-Z]/', $x[0])) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "The precedence symbol must be a terminal."); $this->errorcnt++; } elseif ($this->prevrule === 0) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "There is no prior rule to assign precedence \"[%s]\".", $x); $this->errorcnt++; } elseif ($this->prevrule->precsym != 0) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Precedence mark on this line is not the first to follow the previous rule."); $this->errorcnt++; } else { $this->prevrule->precsym = PHP_ParserGenerator_Symbol::Symbol_new($x); } $this->state = self::PRECEDENCE_MARK_2; break; case self::PRECEDENCE_MARK_2: if ($x[0] != ']') { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Missing \"]\" on precedence mark."); $this->errorcnt++; } $this->state = self::WAITING_FOR_DECL_OR_RULE; break; case self::WAITING_FOR_ARROW: if ($x[0] == ':' && $x[1] == ':' && $x[2] == '=') { $this->state = self::IN_RHS; } elseif ($x[0] == '(') { $this->state = self::LHS_ALIAS_1; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Expected to see a \":\" following the LHS symbol \"%s\".", $this->lhs->name); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } break; case self::LHS_ALIAS_1: if (preg_match('/[A-Za-z]/', $x[0])) { $this->lhsalias = $x; $this->state = self::LHS_ALIAS_2; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "\"%s\" is not a valid alias for the LHS \"%s\"\n", $x, $this->lhs->name); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } break; case self::LHS_ALIAS_2: if ($x[0] == ')') { $this->state = self::LHS_ALIAS_3; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Missing \")\" following LHS alias name \"%s\".", $this->lhsalias); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } break; case self::LHS_ALIAS_3: if ($x == '::=') { $this->state = self::IN_RHS; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Missing \"->\" following: \"%s(%s)\".", $this->lhs->name, $this->lhsalias); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } break; case self::IN_RHS: if ($x[0] == '.') { $rp = new PHP_ParserGenerator_Rule(); $rp->ruleline = $this->tokenlineno; for ($i = 0; $i < $this->nrhs; $i++) { $rp->rhs[$i] = $this->rhs[$i]; $rp->rhsalias[$i] = $this->alias[$i]; } $rp->lhs = $this->lhs; $rp->lhsalias = $this->lhsalias; $rp->nrhs = $this->nrhs; $rp->code = 0; $rp->precsym = 0; $rp->index = $this->gp->nrule++; $rp->nextlhs = $rp->lhs->rule; $rp->lhs->rule = $rp; $rp->next = 0; if ($this->firstrule === 0) { $this->firstrule = $this->lastrule = $rp; } else { $this->lastrule->next = $rp; $this->lastrule = $rp; } $this->prevrule = $rp; $this->state = self::WAITING_FOR_DECL_OR_RULE; } elseif (preg_match('/[a-zA-Z]/', $x[0])) { if ($this->nrhs >= PHP_ParserGenerator::MAXRHS) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Too many symbols on RHS or rule beginning at \"%s\".", $x); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } else { if (isset($this->rhs[$this->nrhs - 1])) { $msp = $this->rhs[$this->nrhs - 1]; if ($msp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) { $inf = array_reduce($msp->subsym, array($this, '_printmulti'), ''); PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, 'WARNING: symbol ' . $x . ' will not' . ' be part of previous multiterminal %s', substr($inf, 0, strlen($inf) - 1)); } } $this->rhs[$this->nrhs] = PHP_ParserGenerator_Symbol::Symbol_new($x); $this->alias[$this->nrhs] = 0; $this->nrhs++; } } elseif (($x[0] == '|' || $x[0] == '/') && $this->nrhs > 0) { $msp = $this->rhs[$this->nrhs - 1]; if ($msp->type != PHP_ParserGenerator_Symbol::MULTITERMINAL) { $origsp = $msp; $msp = new PHP_ParserGenerator_Symbol(); $msp->type = PHP_ParserGenerator_Symbol::MULTITERMINAL; $msp->nsubsym = 1; $msp->subsym = array($origsp); $msp->name = $origsp->name; $this->rhs[$this->nrhs - 1] = $msp; } $msp->nsubsym++; $msp->subsym[$msp->nsubsym - 1] = PHP_ParserGenerator_Symbol::Symbol_new(substr($x, 1)); if (preg_match('/[a-z]/', $x[1]) || preg_match('/[a-z]/', $msp->subsym[0]->name[0])) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Cannot form a compound containing a non-terminal"); $this->errorcnt++; } } elseif ($x[0] == '(' && $this->nrhs > 0) { $this->state = self::RHS_ALIAS_1; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Illegal character on RHS of rule: \"%s\".", $x); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } break; case self::RHS_ALIAS_1: if (preg_match('/[A-Za-z]/', $x[0])) { $this->alias[$this->nrhs - 1] = $x; $this->state = self::RHS_ALIAS_2; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", $x, $this->rhs[$this->nrhs - 1]->name); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } break; case self::RHS_ALIAS_2: if ($x[0] == ')') { $this->state = self::IN_RHS; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Missing \")\" following LHS alias name \"%s\".", $this->lhsalias); $this->errorcnt++; $this->state = self::RESYNC_AFTER_RULE_ERROR; } break; case self::WAITING_FOR_DECL_KEYWORD: if (preg_match('/[A-Za-z]/', $x[0])) { $this->declkeyword = $x; $this->declargslot =& $this->a; $this->decllnslot =& $this->a; $this->state = self::WAITING_FOR_DECL_ARG; if ('name' == $x) { $this->declargslot =& $this->gp->name; } elseif ('include' == $x) { $this->declargslot =& $this->gp->include_code; $this->decllnslot =& $this->gp->includeln; } elseif ('include_class' == $x) { $this->declargslot =& $this->gp->include_classcode; $this->decllnslot =& $this->gp->include_classln; } elseif ('declare_class' == $x) { $this->declargslot =& $this->gp->declare_classcode; $this->decllnslot =& $this->gp->declare_classln; } elseif ('code' == $x) { $this->declargslot =& $this->gp->extracode; $this->decllnslot =& $this->gp->extracodeln; } elseif ('token_destructor' == $x) { $this->declargslot =& $this->gp->tokendest; $this->decllnslot =& $this->gp->tokendestln; } elseif ('default_destructor' == $x) { $this->declargslot =& $this->gp->vardest; $this->decllnslot =& $this->gp->vardestln; } elseif ('token_prefix' == $x) { $this->declargslot =& $this->gp->tokenprefix; } elseif ('syntax_error' == $x) { $this->declargslot =& $this->gp->error; $this->decllnslot =& $this->gp->errorln; } elseif ('parse_accept' == $x) { $this->declargslot =& $this->gp->accept; $this->decllnslot =& $this->gp->acceptln; } elseif ('parse_failure' == $x) { $this->declargslot =& $this->gp->failure; $this->decllnslot =& $this->gp->failureln; } elseif ('stack_overflow' == $x) { $this->declargslot =& $this->gp->overflow; $this->decllnslot =& $this->gp->overflowln; } elseif ('token_type' == $x) { $this->declargslot =& $this->gp->tokentype; } elseif ('default_type' == $x) { $this->declargslot =& $this->gp->vartype; } elseif ('stack_size' == $x) { $this->declargslot =& $this->gp->stacksize; } elseif ('start_symbol' == $x) { $this->declargslot =& $this->gp->start; } elseif ('left' == $x) { $this->preccounter++; $this->declassoc = PHP_ParserGenerator_Symbol::LEFT; $this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL; } elseif ('right' == $x) { $this->preccounter++; $this->declassoc = PHP_ParserGenerator_Symbol::RIGHT; $this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL; } elseif ('nonassoc' == $x) { $this->preccounter++; $this->declassoc = PHP_ParserGenerator_Symbol::NONE; $this->state = self::WAITING_FOR_PRECEDENCE_SYMBOL; } elseif ('destructor' == $x) { $this->state = self::WAITING_FOR_DESTRUCTOR_SYMBOL; } elseif ('type' == $x) { $this->state = self::WAITING_FOR_DATATYPE_SYMBOL; } elseif ('fallback' == $x) { $this->fallback = 0; $this->state = self::WAITING_FOR_FALLBACK_ID; } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Unknown declaration keyword: \"%%%s\".", $x); $this->errorcnt++; $this->state = self::RESYNC_AFTER_DECL_ERROR; } } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Illegal declaration keyword: \"%s\".", $x); $this->errorcnt++; $this->state = self::RESYNC_AFTER_DECL_ERROR; } break; case self::WAITING_FOR_DESTRUCTOR_SYMBOL: if (!preg_match('/[A-Za-z]/', $x[0])) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Symbol name missing after %destructor keyword"); $this->errorcnt++; $this->state = self::RESYNC_AFTER_DECL_ERROR; } else { $sp = PHP_ParserGenerator_Symbol::Symbol_new($x); $this->declargslot =& $sp->destructor; $this->decllnslot =& $sp->destructorln; $this->state = self::WAITING_FOR_DECL_ARG; } break; case self::WAITING_FOR_DATATYPE_SYMBOL: if (!preg_match('/[A-Za-z]/', $x[0])) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Symbol name missing after %destructor keyword"); $this->errorcnt++; $this->state = self::RESYNC_AFTER_DECL_ERROR; } else { $sp = PHP_ParserGenerator_Symbol::Symbol_new($x); $this->declargslot =& $sp->datatype; $this->state = self::WAITING_FOR_DECL_ARG; } break; case self::WAITING_FOR_PRECEDENCE_SYMBOL: if ($x[0] == '.') { $this->state = self::WAITING_FOR_DECL_OR_RULE; } elseif (preg_match('/[A-Z]/', $x[0])) { $sp = PHP_ParserGenerator_Symbol::Symbol_new($x); if ($sp->prec >= 0) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Symbol \"%s\" has already been given a precedence.", $x); $this->errorcnt++; } else { $sp->prec = $this->preccounter; $sp->assoc = $this->declassoc; } } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Can't assign a precedence to \"%s\".", $x); $this->errorcnt++; } break; case self::WAITING_FOR_DECL_ARG: if (preg_match('/[A-Za-z0-9{"]/', $x[0])) { if ($this->declargslot != 0) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "The argument \"%s\" to declaration \"%%%s\" is not the first.", $x[0] == '"' ? substr($x, 1) : $x, $this->declkeyword); $this->errorcnt++; $this->state = self::RESYNC_AFTER_DECL_ERROR; } else { $this->declargslot = $x[0] == '"' || $x[0] == '{' ? substr($x, 1) : $x; $this->a = 1; if (!$this->decllnslot) { $this->decllnslot = $this->tokenlineno; } $this->state = self::WAITING_FOR_DECL_OR_RULE; } } else { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "Illegal argument to %%%s: %s", $this->declkeyword, $x); $this->errorcnt++; $this->state = self::RESYNC_AFTER_DECL_ERROR; } break; case self::WAITING_FOR_FALLBACK_ID: if ($x[0] == '.') { $this->state = self::WAITING_FOR_DECL_OR_RULE; } elseif (!preg_match('/[A-Z]/', $x[0])) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "%%fallback argument \"%s\" should be a token", $x); $this->errorcnt++; } else { $sp = PHP_ParserGenerator_Symbol::Symbol_new($x); if ($this->fallback === 0) { $this->fallback = $sp; } elseif (is_object($sp->fallback)) { PHP_ParserGenerator::ErrorMsg($this->filename, $this->tokenlineno, "More than one fallback assigned to token %s", $x); $this->errorcnt++; } else { $sp->fallback = $this->fallback; $this->gp->has_fallback = 1; } } break; case self::RESYNC_AFTER_RULE_ERROR: /* if ($x[0] == '.') $this->state = self::WAITING_FOR_DECL_OR_RULE; ** break; */ /* if ($x[0] == '.') $this->state = self::WAITING_FOR_DECL_OR_RULE; ** break; */ case self::RESYNC_AFTER_DECL_ERROR: if ($x[0] == '.') { $this->state = self::WAITING_FOR_DECL_OR_RULE; } if ($x[0] == '%') { $this->state = self::WAITING_FOR_DECL_KEYWORD; } break; } }