Example #1
0
function minus($minuend, $subtrahend)
{
    return $minuend - $subtrahend;
}
# N -> number
$N = new RegexParser("#^(0|[1-9][0-9]*)#", function ($match) {
    return (int) $match;
});
# P -> "-" N
$P = new ConcParser(array(new StringParser("-"), $N), function ($minus, $n) {
    return $n;
});
# Naive left-recursive grammar looks like this and raises an exception
# when instantiated.
try {
    # S -> N
    # S -> S P
    $grammar = new Grammar("S", array("S" => new LazyAltParser(array("N", new ConcParser(array("S", "P"), "minus"))), "P" => $P, "N" => $N));
    var_dump(false);
} catch (GrammarException $e) {
    # Left-recursive in S
    var_dump(true);
}
# Fix the grammar like so:
# S -> N P*
$grammar = new Grammar("S", array("S" => new ConcParser(array($N, new GreedyStarParser("P")), function ($n, $ps) {
    return array_reduce($ps, "minus", $n);
    # clever bit
}), "P" => $P, "N" => $N));
var_dump($grammar->parse("5-4-3") === -2);
# true
Example #2
0
        foreach ($this->mAction as $id => $row) {
            $s .= "\t{$id} => array(\n";
            foreach ($row as $t => $action) {
                if ($action[0] == 'shift') {
                    $s .= "\t\t'{$t}' => array( 0, {$action[1]} ),\n";
                }
                if ($action[0] == 'reduce') {
                    $s .= "\t\t'{$t}' => array( 1, {$action[1]} ),\n";
                }
                if ($action[0] == 'accept') {
                    $s .= "\t\t'{$t}' => array( 2, null ),\n";
                }
            }
            $s .= "\t),\n";
        }
        $s .= ");\n\n";
        $s .= "static \$goto = array(\n";
        foreach ($this->mGoto as $id => $row) {
            $body = $this->formatAssocArray($row);
            $s .= "\t{$id} => {$body},\n";
        }
        $s .= ");\n\n";
        $s .= "}\n";
        return $s;
    }
}
$definition = file_get_contents(dirname(__FILE__) . '/syntax.txt');
$grammar = Grammar::parse($definition);
$grammar->buildLRTable();
file_put_contents('LRTableBuildReport.html', $grammar->buildHTMLDump());
file_put_contents('LRTable.php', $grammar->buildPHPFile());
Example #3
0
File: json.php Project: ferno/loco
    return "\f";
}), "ESCAPED_N" => new StringParser("\\n", function ($string) {
    return "\n";
}), "ESCAPED_R" => new StringParser("\\r", function ($string) {
    return "\r";
}), "ESCAPED_T" => new StringParser("\\t", function ($string) {
    return "\t";
}), "ESCAPED_UTF8" => new RegexParser("#^\\\\u[0-9a-fA-F]{4}#", function ($match) {
    return Utf8Parser::getBytes(hexdec(substr($match, 2, 4)));
})));
// if executing this file directly, run unit tests
if (__FILE__ !== $_SERVER["SCRIPT_FILENAME"]) {
    return;
}
$start = microtime(true);
$parseTree = $jsonGrammar->parse(" { \"string\" : true, \"\\\"\" : false, \"\\u9874asdh\" : [ null, { }, -9488.44E+093 ] } ");
print "Parsing completed in " . (microtime(true) - $start) . " seconds\n";
var_dump(true);
// for successful parsing
// print_r($parseTree);
var_dump(count($parseTree) === 3);
var_dump($parseTree["string"] === true);
var_dump($parseTree["\""] === false);
var_dump($parseTree["顴asdh"] === array(null, array(), -9.488439999999999E+96));
print "2\n";
// failure modes
foreach (array("{ \"string ", "{ \"\\UAAAA\" ", "{ \"\\u000i\" ", "{ \"a\" : tru ", "{ \"a\" :  +9 ", "{ \"a\" :  9. ", "{ \"a\" :  0a8.52 ", "{ \"a\" :  8E ", "{ \"a\" :  08 ", "[ \"a\" ,  8 ]", " \"a\" ", "{\"\"    :7}", "{\"Ÿ\":7}", "{\"\n\"      :7}", "{\"\r\"      :7}", "{\"\t\"      :7}") as $string) {
    try {
        $jsonGrammar->parse($string);
        var_dump(false);
    } catch (Exception $e) {
Example #4
0
var_dump($parser->match("fff", 0) === array("j" => 2, "value" => array("f", "f")));
$parser = new GreedyMultiParser(new StringParser("f"), 1, null);
try {
    $parser->match("", 0);
    var_dump(false);
} catch (ParseFailureException $e) {
    var_dump(true);
}
var_dump($parser->match("f", 0) === array("j" => 1, "value" => array("f")));
var_dump($parser->match("ff", 0) === array("j" => 2, "value" => array("f", "f")));
var_dump($parser->match("fff", 0) === array("j" => 3, "value" => array("f", "f", "f")));
var_dump($parser->match("ffg", 0) === array("j" => 2, "value" => array("f", "f")));
print "11\n";
$grammar = new Grammar("<A>", array("<A>" => new EmptyParser()));
try {
    $grammar->parse("a");
    var_dump(false);
} catch (ParseFailureException $e) {
    var_dump(true);
}
var_dump($grammar->parse("") === null);
print "12A\n";
try {
    $grammar = new Grammar("<S>", array("<S>" => new GreedyMultiParser("<A>", 7, null), "<A>" => new EmptyParser()));
    var_dump(false);
} catch (GrammarException $e) {
    var_dump(true);
}
try {
    $grammar = new Grammar("<S>", array("<S>" => new GreedyStarParser("<A>"), "<A>" => new GreedyStarParser("<B>"), "<B>" => new EmptyParser()));
    var_dump(false);
Example #5
0
}), new StringParser("\""), "whitespace"), function ($quote1, $chars, $quote2, $whitespace) {
    return new StringParser($chars);
}), "digit" => new RegexParser("#^[0-9]#"), "letter" => new RegexParser("#^[a-zA-Z]#"), "character" => new RegexParser("#^([^\"]|\"\")#", function ($match0) {
    if ($match0 === "\"\"") {
        return "\"";
    }
    return $match0;
}), "whitespace" => new RegexParser("#^[ \n\r\t]*#")), function ($syntax) {
    $parsers = array();
    foreach ($syntax as $production) {
        if (count($parsers) === 0) {
            $top = $production["identifier"];
        }
        $parsers[$production["identifier"]] = $production["expression"];
    }
    if (count($parsers) === 0) {
        throw new Exception("No rules.");
    }
    return new Grammar($top, $parsers);
});
// if executing this file directly, run unit tests
if (__FILE__ !== $_SERVER["SCRIPT_FILENAME"]) {
    return;
}
// This is the syntax for Wirth syntax notation except it lacks whitespace
$string = "\n\t\tSYNTAX     = { PRODUCTION } .\n\t\tPRODUCTION = IDENTIFIER \"=\" EXPRESSION \".\" .\n\t\tEXPRESSION = TERM { \"|\" TERM } .\n\t\tTERM       = FACTOR { FACTOR } .\n\t\tFACTOR     = IDENTIFIER\n\t\t\t\t\t\t\t | LITERAL\n\t\t\t\t\t\t\t | \"[\" EXPRESSION \"]\"\n\t\t\t\t\t\t\t | \"(\" EXPRESSION \")\"\n\t\t\t\t\t\t\t | \"{\" EXPRESSION \"}\" .\n\t\tIDENTIFIER = letter { letter } .\n\t\tLITERAL    = \"\"\"\" character { character } \"\"\"\" .\n\t\tdigit      = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" .\n\t\tupper      = \"A\" | \"B\" | \"C\" | \"D\" | \"E\" | \"F\" | \"G\" | \"H\" | \"I\" | \"J\" \n\t\t           | \"K\" | \"L\" | \"M\" | \"N\" | \"O\" | \"P\" | \"Q\" | \"R\" | \"S\" | \"T\" \n\t\t           | \"U\" | \"V\" | \"W\" | \"X\" | \"Y\" | \"Z\" .\n\t\tlower      = \"a\" | \"b\" | \"c\" | \"d\" | \"e\" | \"f\" | \"g\" | \"h\" | \"i\" | \"j\" \n\t\t           | \"k\" | \"l\" | \"m\" | \"n\" | \"o\" | \"p\" | \"q\" | \"r\" | \"s\" | \"t\" \n\t\t           | \"u\" | \"v\" | \"w\" | \"x\" | \"y\" | \"z\" .\n\t\tletter     = upper | lower .\n\t\tcharacter  = letter | digit | \"=\" | \".\" | \"\"\"\"\"\" .\n\t";
$wirthGrammar->parse($string)->parse("SYNTAX={PRODUCTION}.");
var_dump(true);
# for a successful parse
?>
 
Example #6
0
File: bnf.php Project: ferno/loco
        }
        $parsers[$rule["rule-name"]] = $rule["expression"];
    }
    if (count($parsers) === 0) {
        throw new Exception("No rules.");
    }
    return new Grammar($top, $parsers);
});
// if executing this file directly, run unit tests
if (__FILE__ !== $_SERVER["SCRIPT_FILENAME"]) {
    return;
}
// Full rule set
$string = "\n\t<postal-address> ::= <name-part> <street-address> <zip-part>\n\t<name-part>      ::= <personal-part> <name-part> | <personal-part> <last-name> <opt-jr-part> <EOL>\n\t<personal-part>  ::= <initial> \".\" | <first-name>\n\t<street-address> ::= <house-num> <street-name> <opt-apt-num> <EOL>\n\t<zip-part>       ::= <town-name> \",\" <state-code> <ZIP-code> <EOL>\n\t<opt-jr-part>    ::= \"Sr.\" | \"Jr.\" | <roman-numeral> | \"\"\n\n\t<last-name>     ::= 'MacLaurin '\n\t<EOL>           ::= '\n'\n\t<initial>       ::= 'b'\n\t<first-name>    ::= 'Steve '\n\t<house-num>     ::= '173 '\n\t<street-name>   ::= 'Acacia Avenue '\n\t<opt-apt-num>   ::= '7A'\n\t<town-name>     ::= 'Stevenage'\n\t<state-code>    ::= ' KY '\n\t<ZIP-code>      ::= '33445'\n\t<roman-numeral> ::= 'g'\n";
$start = microtime(true);
$grammar2 = $bnfGrammar->parse($string);
print "Parsing completed in " . (microtime(true) - $start) . " seconds\n";
$start = microtime(true);
$grammar2->parse("Steve MacLaurin \n173 Acacia Avenue 7A\nStevenage, KY 33445\n");
print "Parsing completed in " . (microtime(true) - $start) . " seconds\n";
$string = "\n\t<syntax>         ::= <rule> | <rule> <syntax>\n\t<rule>           ::= <opt-whitespace> \"<\" <rule-name> \">\" <opt-whitespace> \"::=\" <opt-whitespace> <expression> <line-end>\n\t<opt-whitespace> ::= \" \" <opt-whitespace> | \"\"\n\t<expression>     ::= <list> | <list> \"|\" <expression>\n\t<line-end>       ::= <opt-whitespace> <EOL> <line-end> | <opt-whitespace> <EOL>\n\t<list>           ::= <term> | <term> <opt-whitespace> <list>\n\t<term>           ::= <literal> | \"<\" <rule-name> \">\"\n\t<literal>        ::= '\"' <text> '\"' | \"'\" <text> \"'\"\n\t\n\t<rule-name>      ::= 'a'\n\t<EOL>            ::= '\n'\n\t<text>           ::= 'b'\n";
$start = microtime(true);
$grammar3 = $bnfGrammar->parse($string);
print "Parsing completed in " . (microtime(true) - $start) . " seconds\n";
$start = microtime(true);
$grammar3->parse(" <a> ::= 'b' \n");
print "Parsing completed in " . (microtime(true) - $start) . " seconds\n";
// Should raise a ParseFailureException before trying to instantiate a Grammar
$string = " <incomplete ::=";
try {
    $bnfGrammar->parse($string);
Example #7
0
        }
        $parsers[$rule["name"]] = $rule["lazyaltparser"];
    }
    return new Grammar($top, $parsers);
});
// if executing this file directly, run unit tests
if (__FILE__ !== $_SERVER["SCRIPT_FILENAME"]) {
    return;
}
// parentheses inside your BNF *always* force an array to exist in the output
// *, +, ? and {m,n} are not disguised parentheses; they expand into the main expression
// in the absence of a function to call, an array is is built instead
print "0A\n";
// basic
// array("a") or new S("a")
$grammar2 = $locoGrammar->parse(" S ::= 'a' ");
var_dump($grammar2->parse("a") === array("a"));
// concatenation
// array("a", "b") or new S("a", "b")
$grammar2 = $locoGrammar->parse(" S ::= 'a' 'b' ");
var_dump($grammar2->parse("ab") === array("a", "b"));
// alternation
// array("a") or array("b") or new S("a") or new S("b")
$grammar2 = $locoGrammar->parse(" S ::= 'a' | 'b' ");
var_dump($grammar2->parse("a") === array("a"));
var_dump($grammar2->parse("b") === array("b"));
// alternation 2
// array("a") or array("b", "c") or new S("a") or new S("b", "c")
$grammar2 = $locoGrammar->parse(" S ::= 'a' | 'b' 'c' ");
var_dump($grammar2->parse("a") === array("a"));
var_dump($grammar2->parse("bc") === array("b", "c"));
Example #8
0
        if (count($parsers) === 0) {
            $top = $rule["rule-name"];
        }
        $parsers[$rule["rule-name"]] = $rule["expression"];
    }
    if (count($parsers) === 0) {
        throw new Exception("No rules.");
    }
    return new Grammar($top, $parsers);
});
// if executing this file directly, run unit tests
if (__FILE__ !== $_SERVER["SCRIPT_FILENAME"]) {
    return;
}
$string = "a = 'PROGRAM' ;";
$ebnfGrammar->parse($string)->parse("PROGRAM");
var_dump(true);
// Should raise a ParseFailureException before trying to instantiate a Grammar
// with no rules and raising a GrammarException
$string = "a = 'PROGRAM ;";
try {
    $ebnfGrammar->parse($string);
    var_dump(false);
} catch (ParseFailureException $e) {
    var_dump(true);
}
// Full rule set
$string = "\n\t\t(* a simple program syntax in EBNF - Wikipedia *)\n\t\tprogram = 'PROGRAM' , white space , identifier , white space ,\n\t\t\t\t\t\t\t 'BEGIN' , white space ,\n\t\t\t\t\t\t\t { assignment , \";\" , white space } ,\n\t\t\t\t\t\t\t 'END.' ;\n\t\tidentifier = alphabetic character , { alphabetic character | digit } ;\n\t\tnumber = [ \"-\" ] , digit , { digit } ;\n\t\tstring = '\"' , { all characters } , '\"' ;\n\t\tassignment = identifier , \":=\" , ( number | identifier | string ) ;\n\t\talphabetic character = \"A\" | \"B\" | \"C\" | \"D\" | \"E\" | \"F\" | \"G\"\n\t\t\t\t\t\t\t\t\t\t\t\t | \"H\" | \"I\" | \"J\" | \"K\" | \"L\" | \"M\" | \"N\"\n\t\t\t\t\t\t\t\t\t\t\t\t | \"O\" | \"P\" | \"Q\" | \"R\" | \"S\" | \"T\" | \"U\"\n\t\t\t\t\t\t\t\t\t\t\t\t | \"V\" | \"W\" | \"X\" | \"Y\" | \"Z\" ;\n\t\tdigit = \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" ;\n\t\twhite space = ( \" \" | \"\n\" ) , { \" \" | \"\n\" } ;\n\t\tall characters = \"H\" | \"e\" | \"l\" | \"o\" | \" \" | \"w\" | \"r\" | \"d\" | \"!\" ;\n\t";
$pascalGrammar = $ebnfGrammar->parse($string);
var_dump(true);
$string = "PROGRAM DEMO1\n" . "BEGIN\n" . "  A0:=3;\n" . "  B:=45;\n" . "  H:=-100023;\n" . "  C:=A;\n" . "  D123:=B34A;\n" . "  BABOON:=GIRAFFE;\n" . "  TEXT:=\"Hello world!\";\n" . "END.";
Example #9
0
# This code is in the public domain.
# http://qntm.org/loco
$simpleCommentGrammar = new Grammar("<comment>", array("<comment>" => new GreedyStarParser("<blockorwhitespace>", function () {
    return implode("", func_get_args());
}), "<blockorwhitespace>" => new LazyAltParser(array("<h5>", "<p>", "WHITESPACE")), "<p>" => new ConcParser(array("OPEN_P", "<text>", "CLOSE_P"), function ($open_p, $text, $close_p) {
    return $open_p . $text . $close_p;
}), "<h5>" => new ConcParser(array("OPEN_H5", "<text>", "CLOSE_H5"), function ($open_h5, $text, $close_h5) {
    return $open_h5 . $text . $close_h5;
}), "<strong>" => new ConcParser(array("OPEN_STRONG", "<text>", "CLOSE_STRONG"), function ($open_strong, $text, $close_strong) {
    return $open_strong . $text . $close_strong;
}), "<em>" => new ConcParser(array("OPEN_EM", "<text>", "CLOSE_EM"), function ($open_em, $text, $close_em) {
    return $open_em . $text . $close_em;
}), "<text>" => new GreedyStarParser("<atom>", function () {
    return implode("", func_get_args());
}), "<atom>" => new LazyAltParser(array("<char>", "<strong>", "<em>", "FULL_BR")), "<char>" => new LazyAltParser(array("UTF8_EXCEPT", "GREATER_THAN", "LESS_THAN", "AMPERSAND")), "WHITESPACE" => new RegexParser("#^[ \n\r\t]+#"), "OPEN_P" => new RegexParser("#^<p[ \n\r\t]*>#"), "CLOSE_P" => new RegexParser("#^</p[ \n\r\t]*>#"), "OPEN_H5" => new RegexParser("#^<h5[ \n\r\t]*>#"), "CLOSE_H5" => new RegexParser("#^</h5[ \n\r\t]*>#"), "OPEN_EM" => new RegexParser("#^<em[ \n\r\t]*>#"), "CLOSE_EM" => new RegexParser("#^</em[ \n\r\t]*>#"), "OPEN_STRONG" => new RegexParser("#^<strong[ \n\r\t]*>#"), "CLOSE_STRONG" => new RegexParser("#^</strong[ \n\r\t]*>#"), "FULL_BR" => new RegexParser("#^<br[ \n\r\t]*/>#"), "UTF8_EXCEPT" => new Utf8Parser(array("<", ">", "&")), "GREATER_THAN" => new StringParser("&gt;"), "LESS_THAN" => new StringParser("&lt;"), "AMPERSAND" => new StringParser("&amp;")));
// if executing this file directly, run unit tests
if (__FILE__ !== $_SERVER["SCRIPT_FILENAME"]) {
    return;
}
$start = microtime(true);
$string = $simpleCommentGrammar->parse("<h5>  Title<br /><em\n><strong\n></strong>&amp;</em></h5>   \r\n\t <p  >&lt;</p  >");
print "Parsing completed in " . (microtime(true) - $start) . " seconds\n";
var_dump($string === "<h5>  Title<br /><em\n><strong\n></strong>&amp;</em></h5>   \r\n\t <p  >&lt;</p  >");
foreach (array("<h5 style=\"\">", "&", "<", "salkhsfg>", "</p", "<br") as $string) {
    try {
        $simpleCommentGrammar->parse($string);
        var_dump(false);
    } catch (Exception $e) {
        var_dump(true);
    }
}
Example #10
0
File: regEx.php Project: ferno/loco
    }
    public function __toString()
    {
        return implode("", $this->mults);
    }
}
// Each Pattern is an alternation between several "Concs"
// This is the top-level Pattern object returned by the lexer.
class Pattern
{
    public $concs;
    public function __construct($concs)
    {
        foreach ($concs as $conc) {
            if (!is_a($conc, "Conc")) {
                throw new Exception("Not a Conc: " . var_export($conc, true));
            }
        }
        $this->concs = $concs;
    }
    public function __toString()
    {
        return implode("|", $this->concs);
    }
}
// apologies for the relative lack of exhaustive unit tests
foreach (array("a{2}", "a{2,}", "a{2,8}", "[\$%\\^]{2,8}", "[ab]*", "([ab]*a)", "([ab]*a|[bc]*c)", "([ab]*a|[bc]*c)?", "([ab]*a|[bc]*c)?b*", "[a-zA-Z]", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", "[a]", "[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789]", "[|(){},?*+\\[\\]\\^.\\\\]", "[\\f\\n\\r\\t\\v\\-]", "\\|", "\\(\\)\\{\\},\\?\\*\\+\\[\\]^.-\\f\\n\\r\\t\\v\\w\\d\\s\\W\\D\\S\\\\", "abcdef", "19\\d\\d-\\d\\d-\\d\\d", "[\$%\\^]{2,}", "[\$%\\^]{2}", "") as $string) {
    $pattern = $regexGrammar->parse($string);
    print $pattern . "\n";
    var_dump(true);
}