/** * Parse a RFC 2822 compliant address and return * its address and personal parts. * Returns null if address is invalid * * @param $address * * @return array|null */ public static function parse($address) { static $regex = null; if (null === $regex) { $grammar = Grammar::getInstance(); $addrSpec = $grammar->getDefinition('addr-spec'); $cfws = $grammar->getDefinition('CFWS'); $phrase = $grammar->getDefinition('phrase'); $regex = '/(?<address>' . $addrSpec . ')|(?:(?<name>' . $phrase . ')(?:' . $cfws . '?)<(?<addressalt>' . $addrSpec . ')>(?:' . $cfws . '?))/'; } if (!preg_match($regex, $address, $match)) { return null; } $addr = isset($match['addressalt']) ? $match['addressalt'] : $match['address']; $name = isset($match['name']) ? $match['name'] : null; $name = trim($name, " \t\n\r\v\""); return ['address' => $addr, 'personal' => $name]; }
/** * Date Formate Database * * @param Builder $query Builder * @param string $key Column * @param string $operator String Operator * * @return string */ private function dateFormatDb($query, $key, $operator) { if (!$this->grammar) { $this->grammar = $query->getQuery()->getGrammar(); } $key = $this->grammar->wrap($key); $formatDb = sprintf("%s %s ?", $key, $operator); switch (true) { case $this->grammar instanceof \Illuminate\Database\Query\Grammars\MySqlGrammar: $formatDb = sprintf("DATE(%s) %s ?", $key, $operator); break; case $this->grammar instanceof \Illuminate\Database\Query\Grammars\PostgresGrammar: $formatDb = sprintf("DATE_TRUNC('day', %s) %s ?", $key, $operator); break; case $this->grammar instanceof \Illuminate\Database\Query\Grammars\SQLiteGrammar: $formatDb = sprintf("strftime('%%Y-%%m-%%d', %s) %s ?", $key, $operator); break; case $this->grammar instanceof \Illuminate\Database\Query\Grammars\SqlServerGrammar: $formatDb = sprintf("CAST(%s AS DATE) %s ?", $key, $operator); } return $formatDb; }
function minus($minuend, $subtrahend) { return $minuend - $subtrahend; } # N -> number $N = new RegexParser("#^(0|[1-9][0-9]*)#", function ($match) { return (int) $match; }); # P -> "-" N $P = new ConcParser(array(new StringParser("-"), $N), function ($minus, $n) { return $n; }); # Naive left-recursive grammar looks like this and raises an exception # when instantiated. try { # S -> N # S -> S P $grammar = new Grammar("S", array("S" => new LazyAltParser(array("N", new ConcParser(array("S", "P"), "minus"))), "P" => $P, "N" => $N)); var_dump(false); } catch (GrammarException $e) { # Left-recursive in S var_dump(true); } # Fix the grammar like so: # S -> N P* $grammar = new Grammar("S", array("S" => new ConcParser(array($N, new GreedyStarParser("P")), function ($n, $ps) { return array_reduce($ps, "minus", $n); # clever bit }), "P" => $P, "N" => $N)); var_dump($grammar->parse("5-4-3") === -2); # true
$jsonGrammar = new Grammar("<topobject>", array("<topobject>" => new ConcParser(array("WHITESPACE", "<object>"), function ($whitespace, $object) { return $object; }), "<object>" => new ConcParser(array("LEFT_BRACE", "WHITESPACE", "<objectcontent>", "RIGHT_BRACE", "WHITESPACE"), function ($left_brace, $whitespace0, $objectcontent, $right_brace, $whitespace1) { return $objectcontent; }), "<objectcontent>" => new LazyAltParser(array("<fullobject>", "<emptyobject>")), "<fullobject>" => new ConcParser(array("<keyvalue>", "<commakeyvaluelist>"), function ($keyvalue, $commakeyvaluelist) { $commakeyvaluelist[$keyvalue[0]] = $keyvalue[1]; return $commakeyvaluelist; }), "<emptyobject>" => new EmptyParser(function () { return array(); }), "<commakeyvaluelist>" => new GreedyStarParser("<commakeyvalue>", function () { $commakeyvaluelist = array(); foreach (func_get_args() as $commakeyvalue) { $commakeyvaluelist[$commakeyvalue[0]] = $commakeyvalue[1]; } return $commakeyvaluelist; }), "<commakeyvalue>" => new ConcParser(array("COMMA", "WHITESPACE", "<keyvalue>"), function ($comma, $whitespace, $keyvalue) { return $keyvalue; }), "<keyvalue>" => new ConcParser(array("<string>", "COLON", "WHITESPACE", "<value>"), function ($string, $colon, $whitespace, $value) { return array($string, $value); }), "<array>" => new ConcParser(array("LEFT_BRACKET", "WHITESPACE", "<arraycontent>", "RIGHT_BRACKET", "WHITESPACE"), function ($left_bracket, $whitespace0, $arraycontent, $right_bracket, $whitespace1) { return $arraycontent; }), "<arraycontent>" => new LazyAltParser(array("<fullarray>", "<emptyarray>")), "<fullarray>" => new ConcParser(array("<value>", "<commavaluelist>"), function ($value, $commavaluelist) { array_unshift($commavaluelist, $value); return $commavaluelist; }), "<emptyarray>" => new EmptyParser(function () { return array(); }), "<commavaluelist>" => new GreedyStarParser("<commavalue>"), "<commavalue>" => new ConcParser(array("COMMA", "WHITESPACE", "<value>"), function ($comma, $whitespace, $value) { return $value; }), "<value>" => new LazyAltParser(array("<string>", "<number>", "<object>", "<array>", "<true>", "<false>", "<null>")), "<string>" => new ConcParser(array("DOUBLE_QUOTE", "<stringcontent>", "DOUBLE_QUOTE", "WHITESPACE"), function ($double_quote0, $stringcontent, $double_quote1, $whitespace) { return $stringcontent; }), "<stringcontent>" => new GreedyStarParser("<char>", function () { return implode("", func_get_args()); }), "<char>" => new LazyAltParser(array("UTF8_EXCEPT", "ESCAPED_QUOTE", "ESCAPED_BACKSLASH", "ESCAPED_SLASH", "ESCAPED_B", "ESCAPED_F", "ESCAPED_N", "ESCAPED_R", "ESCAPED_T", "ESCAPED_UTF8")), "<number>" => new ConcParser(array("NUMBER", "WHITESPACE"), function ($number, $whitespace) { return $number; }), "<true>" => new ConcParser(array("TRUE", "WHITESPACE"), function ($true, $whitespace) { return true; }), "<false>" => new ConcParser(array("FALSE", "WHITESPACE"), function ($false, $whitespace) { return false; }), "<null>" => new ConcParser(array("NULL", "WHITESPACE"), function ($null, $whitespace) { return null; }), "WHITESPACE" => new RegexParser("#^[ \n\r\t]*#"), "LEFT_BRACE" => new StringParser("{"), "RIGHT_BRACE" => new StringParser("}"), "LEFT_BRACKET" => new StringParser("["), "RIGHT_BRACKET" => new StringParser("]"), "COLON" => new StringParser(":"), "COMMA" => new StringParser(","), "DOUBLE_QUOTE" => new StringParser("\""), "NUMBER" => new RegexParser("#^-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?#", function ($match) { return (double) $match; }), "TRUE" => new StringParser("true"), "FALSE" => new StringParser("false"), "NULL" => new StringParser("null"), "UTF8_EXCEPT" => new Utf8Parser(array_merge(array("\"", "\\"), array_map(function ($codepoint) { return Utf8Parser::getBytes($codepoint); }, Utf8Parser::$controls))), "ESCAPED_QUOTE" => new StringParser("\\\"", function ($string) { return substr($string, 1, 1); }), "ESCAPED_BACKSLASH" => new StringParser("\\\\", function ($string) { return substr($string, 1, 1); }), "ESCAPED_SLASH" => new StringParser("\\/", function ($string) { return substr($string, 1, 1); }), "ESCAPED_B" => new StringParser("\\b", function ($string) { return ""; }), "ESCAPED_F" => new StringParser("\\f", function ($string) { return "\f"; }), "ESCAPED_N" => new StringParser("\\n", function ($string) { return "\n"; }), "ESCAPED_R" => new StringParser("\\r", function ($string) { return "\r"; }), "ESCAPED_T" => new StringParser("\\t", function ($string) { return "\t"; }), "ESCAPED_UTF8" => new RegexParser("#^\\\\u[0-9a-fA-F]{4}#", function ($match) { return Utf8Parser::getBytes(hexdec(substr($match, 2, 4))); })));
foreach ($this->mAction as $id => $row) { $s .= "\t{$id} => array(\n"; foreach ($row as $t => $action) { if ($action[0] == 'shift') { $s .= "\t\t'{$t}' => array( 0, {$action[1]} ),\n"; } if ($action[0] == 'reduce') { $s .= "\t\t'{$t}' => array( 1, {$action[1]} ),\n"; } if ($action[0] == 'accept') { $s .= "\t\t'{$t}' => array( 2, null ),\n"; } } $s .= "\t),\n"; } $s .= ");\n\n"; $s .= "static \$goto = array(\n"; foreach ($this->mGoto as $id => $row) { $body = $this->formatAssocArray($row); $s .= "\t{$id} => {$body},\n"; } $s .= ");\n\n"; $s .= "}\n"; return $s; } } $definition = file_get_contents(dirname(__FILE__) . '/syntax.txt'); $grammar = Grammar::parse($definition); $grammar->buildLRTable(); file_put_contents('LRTableBuildReport.html', $grammar->buildHTMLDump()); file_put_contents('LRTable.php', $grammar->buildPHPFile());
$wirthGrammar = new Grammar("SYNTAX", array("SYNTAX" => new GreedyStarParser("PRODUCTION"), "PRODUCTION" => new ConcParser(array("whitespace", "IDENTIFIER", new StringParser("="), "whitespace", "EXPRESSION", new StringParser("."), "whitespace"), function ($space1, $identifier, $equals, $space2, $expression, $dot, $space3) { return array("identifier" => $identifier, "expression" => $expression); }), "EXPRESSION" => new ConcParser(array("TERM", new GreedyStarParser(new ConcParser(array(new StringParser("|"), "whitespace", "TERM"), function ($pipe, $space, $term) { return $term; }))), function ($term, $terms) { array_unshift($terms, $term); return new LazyAltParser($terms); }), "TERM" => new GreedyMultiParser("FACTOR", 1, null, function () { return new ConcParser(func_get_args()); }), "FACTOR" => new LazyAltParser(array("IDENTIFIER", "LITERAL", new ConcParser(array(new StringParser("["), "whitespace", "EXPRESSION", new StringParser("]"), "whitespace"), function ($bracket1, $space1, $expression, $bracket2, $space2) { return new GreedyMultiParser($expression, 0, 1); }), new ConcParser(array(new StringParser("("), "whitespace", "EXPRESSION", new StringParser(")"), "whitespace"), function ($paren1, $space1, $expression, $paren2, $space2) { return $expression; }), new ConcParser(array(new StringParser("{"), "whitespace", "EXPRESSION", new StringParser("}"), "whitespace"), function ($brace1, $space1, $expression, $brace2, $space2) { return new GreedyStarParser($expression); }))), "IDENTIFIER" => new ConcParser(array(new GreedyMultiParser("letter", 1, null, function () { return implode("", func_get_args()); }), "whitespace"), function ($letters, $whitespace) { return $letters; }), "LITERAL" => new ConcParser(array(new StringParser("\""), new GreedyMultiParser("character", 1, null, function () { return implode("", func_get_args()); }), new StringParser("\""), "whitespace"), function ($quote1, $chars, $quote2, $whitespace) { return new StringParser($chars); }), "digit" => new RegexParser("#^[0-9]#"), "letter" => new RegexParser("#^[a-zA-Z]#"), "character" => new RegexParser("#^([^\"]|\"\")#", function ($match0) { if ($match0 === "\"\"") { return "\""; } return $match0; }), "whitespace" => new RegexParser("#^[ \n\r\t]*#")), function ($syntax) { $parsers = array(); foreach ($syntax as $production) { if (count($parsers) === 0) { $top = $production["identifier"]; } $parsers[$production["identifier"]] = $production["expression"]; } if (count($parsers) === 0) { throw new Exception("No rules."); } return new Grammar($top, $parsers); });
$status = odbc_errormsg($connection); } echo "\"metadata\":["; for ($i = 1; $i <= $nCols; $i++) { echo "{\"type\":"; echo json_encode(odbc_field_type($result, $i)); echo ",\"name\":"; echo json_encode(odbc_field_name($result, $i)); echo ",\"len\":"; echo json_encode(odbc_field_len($result, $i)); echo ",\"precision\":"; echo json_encode(odbc_field_precision($result, $i)); echo ",\"scale\":"; echo json_encode(odbc_field_scale($result, $i)); if ($i < $nCols) { echo "},"; } else { echo "}"; } } echo "],"; $result = odbc_exec($connection, Grammar::count($_POST["table"])); if ($result && odbc_fetch_row($result)) { echo "\"totalRecords\":" . odbc_result($result, 1) . ","; } else { $status = "Error while trying to count records"; } $status = "ok"; } else { $status = "Table required"; }
/** * Debugging function, makes table legible * @param Lex * @param Grammar * @param int optionally dump specific row (state) * @return void */ function dump(Lex $Lex, Grammar $Grammar, $state = null) { $table = array(); $heads = array('' => 0); // translate cell data and get other meta data foreach ($this->table as $i => $row) { if (!is_null($state) && $i !== $state) { continue; } $table[$i] = array(); // create row header $table[$i][''] = "#{$i}"; $heads[''] = max($heads[''], strlen($table[$i][''])); // iterate over cols in this row foreach ($row as $sym => $entry) { if (is_null($sym)) { $sym = 'null'; } else { $sym = $Lex->name($sym); } // rules are event, states are odd if ($entry & 1) { $str = " #{$entry} "; } else { list($nt, $rhs) = $Grammar->get_rule($entry); $str = ' ' . $Lex->name($nt) . ' -> '; foreach ($rhs as $t) { $str .= $Lex->name($t) . ' '; } } // insert cell $table[$i][$sym] = $str; // collect known column header with max cell width in column if (!isset($heads[$sym])) { $heads[$sym] = strlen($sym); } $heads[$sym] = max($heads[$sym], strlen($str)); } } // print all headers $a = array(); $b = array(); foreach ($heads as $sym => $len) { $b[] = str_repeat('-', $len); $a[] = str_pad($sym, $len, ' ', STR_PAD_BOTH); } echo '+', implode('+', $b), "+\n"; echo '|', implode('|', $a), "|\n"; foreach ($table as $i => $row) { $c = array(); foreach ($heads as $sym => $len) { if (isset($table[$i][$sym])) { $c[] = str_pad($row[$sym], $len, ' ', STR_PAD_BOTH); } else { $c[] = str_repeat(' ', $len); } } echo '+', implode('+', $b), "+\n"; echo '|', implode('|', $c), "|\n"; } echo '+', implode('+', $b), "+\n"; }
$regexGrammar = new Grammar("<pattern>", array("<pattern>" => new ConcParser(array("<conc>", "<pipeconclist>"), function ($conc, $pipeconclist) { array_unshift($pipeconclist, $conc); return new Pattern($pipeconclist); }), "<pipeconclist>" => new GreedyStarParser("<pipeconc>"), "<pipeconc>" => new ConcParser(array(new StringParser("|"), "<conc>"), function ($pipe, $conc) { return $conc; }), "<conc>" => new GreedyStarParser("<mult>", function () { return new Conc(func_get_args()); }), "<mult>" => new ConcParser(array("<multiplicand>", "<multiplier>"), function ($multiplicand, $multiplier) { return new Mult($multiplicand, $multiplier); }), "<multiplicand>" => new LazyAltParser(array("<subpattern>", "<charclass>")), "<subpattern>" => new ConcParser(array(new StringParser("("), "<pattern>", new StringParser(")")), function ($left_parenthesis, $pattern, $right_parenthesis) { return $pattern; }), "<multiplier>" => new LazyAltParser(array("<bracemultiplier>", new StringParser("?", function ($string) { return new Multiplier(0, 1); }), new StringParser("*", function ($string) { return new Multiplier(0, null); }), new StringParser("+", function ($string) { return new Multiplier(1, null); }), new EmptyParser(function () { return new Multiplier(1, 1); }))), "<bracemultiplier>" => new ConcParser(array(new StringParser("{"), "<multiplierinterior>", new StringParser("}")), function ($left_brace, $multiplierinterior, $right_brace) { return $multiplierinterior; }), "<multiplierinterior>" => new LazyAltParser(array("<bothbounds>", "<unlimited>", "<onebound>")), "<bothbounds>" => new ConcParser(array("<integer>", "COMMA", "<integer>"), function ($integer1, $comma, $integer2) { return new Multiplier($integer1, $integer2); }), "<unlimited>" => new ConcParser(array("<integer>", "COMMA"), function ($integer, $comma) { return new Multiplier($integer, null); }), "<onebound>" => new ConcParser(array("<integer>"), function ($integer) { return new Multiplier($integer, $integer); }), "COMMA" => new StringParser(","), "<integer>" => new RegexParser("#^(0|[1-9][0-9]*)#", function ($match) { return (int) $match; }), "<charclass>" => new LazyAltParser(array(new RegexParser("#^[^|()\\[\\]?*+{}\\\\.]#", function ($match) { return new Charclass($match); }), "<bracketednegatedcharclass>", "<bracketedcharclass>", new StringParser("\\|", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\(", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\)", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\[", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\]", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\?", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\*", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\+", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\{", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\}", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\\\", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\.", function ($string) { return new Charclass(substr($string, 1, 1)); }), new StringParser("\\f", function ($string) { return new Charclass("\f"); }), new StringParser("\\n", function ($string) { return new Charclass("\n"); }), new StringParser("\\r", function ($string) { return new Charclass("\r"); }), new StringParser("\\t", function ($string) { return new Charclass("\t"); }), new StringParser("\\v", function ($string) { return new Charclass("\v"); }), new StringParser("\\w", function ($string) { return new Charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"); }), new StringParser("\\W", function ($string) { return new Charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", true); }), new StringParser("\\d", function ($string) { return new Charclass("0123456789"); }), new StringParser("\\D", function ($string) { return new Charclass("0123456789", true); }), new StringParser("\\s", function ($string) { return new Charclass(" \f\n\r\t\v"); }), new StringParser("\\S", function ($string) { return new Charclass(" \f\n\r\t\v", true); }), new StringParser(".", function ($string) { return new Charclass("", true); }))), "<bracketednegatedcharclass>" => new ConcParser(array("LEFT_BRACKET", "CARET", "<elemlist>", "RIGHT_BRACKET"), function ($left_bracket, $elemlist, $right_bracket) { return new Charclass($elemlist, true); }), "<bracketedcharclass>" => new ConcParser(array("LEFT_BRACKET", "<elemlist>", "RIGHT_BRACKET"), function ($left_bracket, $elemlist, $right_bracket) { return new Charclass($elemlist); }), "LEFT_BRACKET" => new StringParser("["), "RIGHT_BRACKET" => new StringParser("]"), "CARET" => new StringParser("^"), "<elemlist>" => new GreedyStarParser("<elem>", function () { return implode("", func_get_args()); }), "<elem>" => new LazyAltParser(array("<charrange>", "<classchar>")), "<charrange>" => new ConcParser(array("<classchar>", "HYPHEN", "<classchar>"), function ($char1, $hyphen, $char2) { $char1 = ord($char1); $char2 = ord($char2); if ($char2 < $char1) { throw new Exception("Disordered range"); } $string = ""; for ($ord = $char1; $ord <= $char2; $ord++) { $string .= chr($ord); } return $string; }), "HYPHEN" => new StringParser("-"), "<classchar>" => new LazyAltParser(array(new RegexParser("#^[^\\\\\\[\\]\\^\\-]#"), new StringParser("\\\\", function ($string) { return substr($string, 1, 1); }), new StringParser("\\[", function ($string) { return substr($string, 1, 1); }), new StringParser("\\]", function ($string) { return substr($string, 1, 1); }), new StringParser("\\^", function ($string) { return substr($string, 1, 1); }), new StringParser("\\-", function ($string) { return substr($string, 1, 1); }), new StringParser("\\f", function ($string) { return "\f"; }), new StringParser("\\n", function ($string) { return "\n"; }), new StringParser("\\r", function ($string) { return "\r"; }), new StringParser("\\t", function ($string) { return "\t"; }), new StringParser("\\v", function ($string) { return "\v"; })))));
var_dump($parser->match("f", 0) === array("j" => 1, "value" => array("f"))); var_dump($parser->match("ff", 0) === array("j" => 2, "value" => array("f", "f"))); var_dump($parser->match("fff", 0) === array("j" => 2, "value" => array("f", "f"))); $parser = new GreedyMultiParser(new StringParser("f"), 1, null); try { $parser->match("", 0); var_dump(false); } catch (ParseFailureException $e) { var_dump(true); } var_dump($parser->match("f", 0) === array("j" => 1, "value" => array("f"))); var_dump($parser->match("ff", 0) === array("j" => 2, "value" => array("f", "f"))); var_dump($parser->match("fff", 0) === array("j" => 3, "value" => array("f", "f", "f"))); var_dump($parser->match("ffg", 0) === array("j" => 2, "value" => array("f", "f"))); print "11\n"; $grammar = new Grammar("<A>", array("<A>" => new EmptyParser())); try { $grammar->parse("a"); var_dump(false); } catch (ParseFailureException $e) { var_dump(true); } var_dump($grammar->parse("") === null); print "12A\n"; try { $grammar = new Grammar("<S>", array("<S>" => new GreedyMultiParser("<A>", 7, null), "<A>" => new EmptyParser())); var_dump(false); } catch (GrammarException $e) { var_dump(true); } try {
$bnfGrammar = new Grammar("<syntax>", array("<syntax>" => new ConcParser(array("<rules>", "OPT-WHITESPACE"), function ($rules, $whitespace) { return $rules; }), "<rules>" => new GreedyMultiParser("<ruleoremptyline>", 1, null, function () { $rules = array(); foreach (func_get_args() as $rule) { // blank line if ($rule === null) { continue; } $rules[] = $rule; } return $rules; }), "<ruleoremptyline>" => new LazyAltParser(array("<rule>", "<emptyline>")), "<emptyline>" => new ConcParser(array("OPT-WHITESPACE", "EOL"), function ($whitespace, $eol) { return null; }), "<rule>" => new ConcParser(array("OPT-WHITESPACE", "RULE-NAME", "OPT-WHITESPACE", new StringParser("::="), "OPT-WHITESPACE", "<expression>", "EOL"), function ($whitespace1, $rule_name, $whitespace2, $equals, $whitespace3, $expression, $eol) { return array("rule-name" => $rule_name, "expression" => $expression); }), "<expression>" => new ConcParser(array("<list>", "<pipelists>"), function ($list, $pipelists) { array_unshift($pipelists, $list); return new LazyAltParser($pipelists); }), "<pipelists>" => new GreedyStarParser("<pipelist>"), "<pipelist>" => new ConcParser(array(new StringParser("|"), "OPT-WHITESPACE", "<list>"), function ($pipe, $whitespace, $list) { return $list; }), "<list>" => new GreedyMultiParser("<term>", 1, null, function () { return new ConcParser(func_get_args()); }), "<term>" => new ConcParser(array("TERM", "OPT-WHITESPACE"), function ($term, $whitespace) { return $term; }), "TERM" => new LazyAltParser(array("LITERAL", "RULE-NAME")), "LITERAL" => new LazyAltParser(array(new RegexParser('#^"([^"]*)"#', function ($match0, $match1) { return $match1; }), new RegexParser("#^'([^']*)'#", function ($match0, $match1) { return $match1; })), function ($text) { if ($text == "") { return new EmptyParser(function () { return ""; }); } return new StringParser($text); }), "RULE-NAME" => new RegexParser("#^<[A-Za-z\\-]*>#"), "OPT-WHITESPACE" => new RegexParser("#^[\t ]*#"), "EOL" => new LazyAltParser(array(new StringParser("\r"), new StringParser("\n")))), function ($syntax) { $parsers = array(); foreach ($syntax as $rule) { if (count($parsers) === 0) { $top = $rule["rule-name"]; } $parsers[$rule["rule-name"]] = $rule["expression"]; } if (count($parsers) === 0) { throw new Exception("No rules."); } return new Grammar($top, $parsers); });
$locoGrammar = new Grammar("<grammar>", array("<grammar>" => new ConcParser(array("<whitespace>", "<rules>"), function ($whitespace, $rules) { return $rules; }), "<rules>" => new GreedyStarParser("<ruleorblankline>", function () { $rules = array(); foreach (func_get_args() as $ruleorblankline) { if ($ruleorblankline === null) { continue; } $rules[] = $ruleorblankline; } return $rules; }), "<ruleorblankline>" => new LazyAltParser(array("<rule>", "<blankline>")), "<blankline>" => new ConcParser(array(new RegexParser("#^\r?\n#"), "<whitespace>"), function () { return null; }), "<rule>" => new ConcParser(array("<bareword>", "<whitespace>", new StringParser("::="), "<whitespace>", "<lazyaltparser>"), function ($bareword, $whitespace1, $equals, $whitespace2, $lazyaltparser) { return array("name" => $bareword, "lazyaltparser" => $lazyaltparser); }), "<lazyaltparser>" => new ConcParser(array("<concparser>", "<pipeconcparserlist>"), function ($concparser, $pipeconcparserlist) { array_unshift($pipeconcparserlist, $concparser); // make a basic lazyaltparser which returns whatever. // Since the LazyAltParser always contains 0 or more ConcParsers, // the value of $result is always an array return new LazyAltParser($pipeconcparserlist); }), "<pipeconcparserlist>" => new GreedyStarParser("<pipeconcparser>"), "<pipeconcparser>" => new ConcParser(array(new StringParser("|"), "<whitespace>", "<concparser>"), function ($pipe, $whitespace, $concparser) { return $concparser; }), "<concparser>" => new GreedyStarParser("<bnfmultiplication>", function () { // get array key numbers where multiparsers are located // in reverse order so that our splicing doesn't modify the array $multiparsers = array(); foreach (func_get_args() as $k => $internal) { if (is_a($internal, "GreedyMultiParser")) { array_unshift($multiparsers, $k); } } // We do something quite advanced here. The inner multiparsers are // spliced out into the list of arguments proper instead of forming an // internal sub-array of their own return new ConcParser(func_get_args(), function () use($multiparsers) { $args = func_get_args(); foreach ($multiparsers as $k) { array_splice($args, $k, 1, $args[$k]); } return $args; }); }), "<bnfmultiplication>" => new ConcParser(array("<bnfmultiplicand>", "<whitespace>", "<bnfmultiplier>", "<whitespace>"), function ($bnfmultiplicand, $whitespace1, $bnfmultiplier, $whitespace2) { if (is_array($bnfmultiplier)) { return new GreedyMultiParser($bnfmultiplicand, $bnfmultiplier["lower"], $bnfmultiplier["upper"]); } // otherwise assume multiplier = 1 return $bnfmultiplicand; }), "<bnfmultiplicand>" => new LazyAltParser(array("<bareword>", "<dqstringparser>", "<sqstringparser>", "<regexparser>", "<utf8except>", "<utf8parser>", "<subparser>")), "<bnfmultiplier>" => new LazyAltParser(array("<asterisk>", "<plus>", "<questionmark>", "<emptymultiplier>")), "<asterisk>" => new StringParser("*", function () { return array("lower" => 0, "upper" => null); }), "<plus>" => new StringParser("+", function () { return array("lower" => 1, "upper" => null); }), "<questionmark>" => new StringParser("?", function () { return array("lower" => 0, "upper" => 1); }), "<emptymultiplier>" => new EmptyParser(), "<dqstringparser>" => new ConcParser(array(new StringParser("\""), "<dqstring>", new StringParser("\"")), function ($quote1, $string, $quote2) { if ($string === "") { return new EmptyParser(); } return new StringParser($string); }), "<sqstringparser>" => new ConcParser(array(new StringParser("'"), "<sqstring>", new StringParser("'")), function ($apostrophe1, $string, $apostrophe2) { if ($string === "") { return new EmptyParser(); } return new StringParser($string); }), "<dqstring>" => new GreedyStarParser("<dqstrchar>", function () { return implode("", func_get_args()); }), "<sqstring>" => new GreedyStarParser("<sqstrchar>", function () { return implode("", func_get_args()); }), "<dqstrchar>" => new LazyAltParser(array(new Utf8Parser(array("\\", "\"")), new StringParser("\\\\", function ($string) { return "\\"; }), new StringParser('\\"', function ($string) { return '"'; }))), "<sqstrchar>" => new LazyAltParser(array(new Utf8Parser(array("\\", "'")), new StringParser("\\\\", function ($string) { return "\\"; }), new StringParser("\\'", function ($string) { return "'"; }))), "<regexparser>" => new ConcParser(array(new StringParser("/"), "<regex>", new StringParser("/")), function ($slash1, $regex, $slash2) { if ($regex === "") { return new EmptyParser(); } // Add the anchor and the brackets to make sure it anchors in the // correct location $regex = "/^(" . $regex . ")/"; // print("Actual regex is: ".$regex."\n"); return new RegexParser($regex); }), "<regex>" => new GreedyStarParser("<rechar>", function () { return implode("", func_get_args()); }), "<rechar>" => new LazyAltParser(array(new Utf8Parser(array("\\", "/")), new ConcParser(array(new StringParser("\\"), new Utf8Parser()), function ($backslash, $char) { return $backslash . $char; }))), "<utf8except>" => new ConcParser(array(new StringParser("[^"), "<exceptions>", new StringParser("]")), function ($left_bracket_caret, $exceptions, $right_bracket) { return new Utf8Parser($exceptions); }), "<exceptions>" => new GreedyStarParser("<exceptionchar>"), "<exceptionchar>" => new LazyAltParser(array(new Utf8Parser(array("\\", "]")), new StringParser("\\\\", function ($string) { return "\\"; }), new StringParser("\\]", function ($string) { return "]"; }))), "<utf8parser>" => new StringParser(".", function () { return new Utf8Parser(array()); }), "<subparser>" => new ConcParser(array(new StringParser("("), "<whitespace>", "<lazyaltparser>", new StringParser(")")), function ($left_parenthesis, $whitespace1, $lazyaltparser, $right_parenthesis) { return $lazyaltparser; }), "<whitespace>" => new RegexParser("#^[ \t]*#"), "<bareword>" => new RegexParser("#^[a-zA-Z_][a-zA-Z0-9_]*#")), function ($rules) { $parsers = array(); foreach ($rules as $rule) { if (count($parsers) === 0) { $top = $rule["name"]; } $parsers[$rule["name"]] = $rule["lazyaltparser"]; } return new Grammar($top, $parsers); });
$ebnfGrammar = new Grammar("<syntax>", array("<syntax>" => new ConcParser(array("<space>", "<rules>"), function ($space, $rules) { return $rules; }), "<rules>" => new GreedyStarParser("<rule>"), "<rule>" => new ConcParser(array("<bareword>", "<space>", new StringParser("="), "<space>", "<alt>", new StringParser(";"), "<space>"), function ($bareword, $space1, $equals, $space2, $alt, $semicolon, $space3) { return array("rule-name" => $bareword, "expression" => $alt); }), "<alt>" => new ConcParser(array("<conc>", "<pipeconclist>"), function ($conc, $pipeconclist) { array_unshift($pipeconclist, $conc); return new LazyAltParser($pipeconclist); }), "<pipeconclist>" => new GreedyStarParser("<pipeconc>"), "<pipeconc>" => new ConcParser(array(new StringParser("|"), "<space>", "<conc>"), function ($pipe, $space, $conc) { return $conc; }), "<conc>" => new ConcParser(array("<term>", "<commatermlist>"), function ($term, $commatermlist) { array_unshift($commatermlist, $term); // get array key numbers where multiparsers are located // in reverse order so that our splicing doesn't modify the array $multiparsers = array(); foreach ($commatermlist as $k => $internal) { if (is_a($internal, "GreedyMultiParser")) { array_unshift($multiparsers, $k); } } // We do something quite advanced here. The inner multiparsers are // spliced out into the list of arguments proper instead of forming an // internal sub-array of their own return new ConcParser($commatermlist, function () use($multiparsers) { $args = func_get_args(); foreach ($multiparsers as $k) { array_splice($args, $k, 1, $args[$k]); } return $args; }); }), "<commatermlist>" => new GreedyStarParser("<commaterm>"), "<commaterm>" => new ConcParser(array(new StringParser(","), "<space>", "<term>"), function ($comma, $space, $term) { return $term; }), "<term>" => new LazyAltParser(array("<bareword>", "<sq>", "<dq>", "<group>", "<repetition>", "<optional>")), "<bareword>" => new ConcParser(array(new RegexParser("#^([a-z][a-z ]*[a-z]|[a-z])#", function ($match0) { return $match0; }), "<space>"), function ($bareword, $space) { return $bareword; }), "<sq>" => new ConcParser(array(new RegexParser("#^'([^']*)'#", function ($match0, $match1) { if ($match1 === "") { return new EmptyParser(); } return new StringParser($match1); }), "<space>"), function ($string, $space) { return $string; }), "<dq>" => new ConcParser(array(new RegexParser('#^"([^"]*)"#', function ($match0, $match1) { if ($match1 === "") { return new EmptyParser(); } return new StringParser($match1); }), "<space>"), function ($string, $space) { return $string; }), "<group>" => new ConcParser(array(new StringParser("("), "<space>", "<alt>", new StringParser(")"), "<space>"), function ($left_paren, $space1, $alt, $right_paren, $space2) { return $alt; }), "<repetition>" => new ConcParser(array(new StringParser("{"), "<space>", "<alt>", new StringParser("}"), "<space>"), function ($left_brace, $space1, $alt, $right_brace, $space2) { return new GreedyStarParser($alt); }), "<optional>" => new ConcParser(array(new StringParser("["), "<space>", "<alt>", new StringParser("]"), "<space>"), function ($left_bracket, $space1, $alt, $right_bracket, $space2) { return new GreedyMultiParser($alt, 0, 1); }), "<space>" => new GreedyStarParser("<whitespace/comment>"), "<whitespace/comment>" => new LazyAltParser(array("<whitespace>", "<comment>")), "<whitespace>" => new RegexParser("#^[ \t\r\n]+#"), "<comment>" => new RegexParser("#^(\\(\\* [^*]* \\*\\)|\\(\\* \\*\\)|\\(\\*\\*\\))#")), function ($syntax) { $parsers = array(); foreach ($syntax as $rule) { if (count($parsers) === 0) { $top = $rule["rule-name"]; } $parsers[$rule["rule-name"]] = $rule["expression"]; } if (count($parsers) === 0) { throw new Exception("No rules."); } return new Grammar($top, $parsers); });
/** * Recursive counterpart to collect_states() * @param array reference to populate * @return void */ function collect_states_recursive(array &$states, Grammar $Grammar, array $excluded, $threadId) { // recursion check if ($this->threadId === $threadId) { //echo "Recursion in $this \n"; return; } $this->threadId = $threadId; // add denied first terminals into array $excluded = array_merge($excluded, $Grammar->excluded_terminals($this->nt)); // Follow e-transitions recursively to gather all possible states foreach ($this->etransitions as $s => $State) { $State->collect_states_passthru($states, $Grammar, $excluded, $threadId); } }
<?php namespace Ferno\Loco; use Exception; require_once __DIR__ . '/../vendor/autoload.php'; # This code is in the public domain. # http://qntm.org/loco $simpleCommentGrammar = new Grammar("<comment>", array("<comment>" => new GreedyStarParser("<blockorwhitespace>", function () { return implode("", func_get_args()); }), "<blockorwhitespace>" => new LazyAltParser(array("<h5>", "<p>", "WHITESPACE")), "<p>" => new ConcParser(array("OPEN_P", "<text>", "CLOSE_P"), function ($open_p, $text, $close_p) { return $open_p . $text . $close_p; }), "<h5>" => new ConcParser(array("OPEN_H5", "<text>", "CLOSE_H5"), function ($open_h5, $text, $close_h5) { return $open_h5 . $text . $close_h5; }), "<strong>" => new ConcParser(array("OPEN_STRONG", "<text>", "CLOSE_STRONG"), function ($open_strong, $text, $close_strong) { return $open_strong . $text . $close_strong; }), "<em>" => new ConcParser(array("OPEN_EM", "<text>", "CLOSE_EM"), function ($open_em, $text, $close_em) { return $open_em . $text . $close_em; }), "<text>" => new GreedyStarParser("<atom>", function () { return implode("", func_get_args()); }), "<atom>" => new LazyAltParser(array("<char>", "<strong>", "<em>", "FULL_BR")), "<char>" => new LazyAltParser(array("UTF8_EXCEPT", "GREATER_THAN", "LESS_THAN", "AMPERSAND")), "WHITESPACE" => new RegexParser("#^[ \n\r\t]+#"), "OPEN_P" => new RegexParser("#^<p[ \n\r\t]*>#"), "CLOSE_P" => new RegexParser("#^</p[ \n\r\t]*>#"), "OPEN_H5" => new RegexParser("#^<h5[ \n\r\t]*>#"), "CLOSE_H5" => new RegexParser("#^</h5[ \n\r\t]*>#"), "OPEN_EM" => new RegexParser("#^<em[ \n\r\t]*>#"), "CLOSE_EM" => new RegexParser("#^</em[ \n\r\t]*>#"), "OPEN_STRONG" => new RegexParser("#^<strong[ \n\r\t]*>#"), "CLOSE_STRONG" => new RegexParser("#^</strong[ \n\r\t]*>#"), "FULL_BR" => new RegexParser("#^<br[ \n\r\t]*/>#"), "UTF8_EXCEPT" => new Utf8Parser(array("<", ">", "&")), "GREATER_THAN" => new StringParser(">"), "LESS_THAN" => new StringParser("<"), "AMPERSAND" => new StringParser("&"))); // if executing this file directly, run unit tests if (__FILE__ !== $_SERVER["SCRIPT_FILENAME"]) { return; } $start = microtime(true); $string = $simpleCommentGrammar->parse("<h5> Title<br /><em\n><strong\n></strong>&</em></h5> \r\n\t <p ><</p >"); print "Parsing completed in " . (microtime(true) - $start) . " seconds\n"; var_dump($string === "<h5> Title<br /><em\n><strong\n></strong>&</em></h5> \r\n\t <p ><</p >"); foreach (array("<h5 style=\"\">", "&", "<", "salkhsfg>", "</p", "<br") as $string) { try {
<?php if (isset($_POST['table'])) { if (isset($_POST['selection'])) { $sql = Grammar::select($_POST['table'], $_POST['selection']); } else { $sql = Grammar::select_all($_POST['table']); } $result = odbc_exec($connection, $sql); $first = isset($_POST['first']) ? $_POST['first'] : 0; if (isset($_POST['rows'])) { $max = $_POST['rows']; } if ($result) { echo "\"data\":["; $j = 0; $data = odbc_fetch_array($result, ++$first); while ($data) { echo json_encode($data); $j++; if (!(isset($max) && $j >= $max)) { $data = odbc_fetch_array($result); } else { $data = false; } if ($data) { echo ","; } } echo "],"; }
/** * @override * invoke lazy creation of follow sets */ function follow_set($s) { if (!isset($this->follows)) { $this->follows = $this->build_follow_sets(); } return parent::follow_set($s); }
/** * Collect siblings states including self. * It is assumed that we only have one item at this point * @param array reference to populate * @return void */ function collect_states_passthru(&$states, Grammar $Grammar, array $excluded, $threadId) { if (isset($states[$this->id])) { // recursion return; } // check exclusions - These should only be passed when calling from a station, // so we know there is only one item and it is at the beginning of the rule. if ($excluded) { $s = $this->Item->current_symbol(); if (in_array($s, $excluded, true)) { //echo "Excluding state #$this->id because it begins with $s\n"; return; } if ($Grammar->is_terminal($s)) { //echo "Clearing exclude list in state #$this->id for descent beyond $s\n"; $excluded = array(); } //else { // echo "Passing down ",count($excluded)," exclusions through state #$this->id \n"; //} } $states[$this->id] = $this; foreach ($this->etransitions as $Station) { $Station->collect_states_recursive($states, $Grammar, $excluded, $threadId); } }