在哪里可以找到_简单_、易于理解的 LR(1) 解析器生成器实现?

Meh*_*dad 1 c# parsing lr1 lr-grammar

在哪里可以找到 LR(1) 解析器生成器的简单(尽可能多,但不能更简单!)实现?

我不是在寻找性能,只是在寻找生成 LR(1) 状态(项目集)的能力。
C++、C#、Java 和 Python 都适合我。

Meh*_*dad 5

我用 C# 编写了一个非常简单的代码,想在这里分享。

\n\n

它基本上填充action查找表,该表告诉您要转移到哪个状态或使用哪个规则来减少。
\n如果数字非负,则表示新状态;如果它是负数,那么它的补码(即~x)表示规则索引。

\n\n

您现在需要做的就是创建一个词法分析器并使用操作表进行实际解析。

\n\n

注 1:生成现实世界语法的状态可能会非常慢,因此在生产代码中使用它之前您可能需要三思而后行!

\n\n

注 2:您可能需要仔细检查其正确性,因为我只检查了一点。

\n\n
using System;\nusing System.Collections.Generic;\nusing System.Diagnostics;\nusing System.Linq;\nusing size_t = System.UInt32;\n\npublic class LRParser\n{\n    private string[] symbols;  // index => symbol\n    private IDictionary<string, size_t> interned = new SortedDictionary<string, size_t>();  // symbol => index\n    private int[/*state*/, /*lookahead*/] actions;  // If >= 0, represents new state after shift. If < 0, represents one\'s complement (i.e. ~x) of reduction rule.\n\n    public LRParser(params KeyValuePair<string, string[]>[] grammar)\n    {\n        this.interned.Add(string.Empty, new size_t());\n        foreach (var rule in grammar)\n        {\n            if (!this.interned.ContainsKey(rule.Key))\n            { this.interned.Add(rule.Key, (size_t)this.interned.Count); }\n            foreach (var symbol in rule.Value)\n            {\n                if (!this.interned.ContainsKey(symbol))\n                { this.interned.Add(symbol, (size_t)this.interned.Count); }\n            }\n        }\n        this.symbols = this.interned.ToArray().OrderBy(p => p.Value).Select(p => p.Key).ToArray();\n        var syntax = Array.ConvertAll(grammar, r => new KeyValuePair<size_t, size_t[]>(this.interned[r.Key], Array.ConvertAll(r.Value, s => this.interned[s])));\n        var nonterminals = Array.ConvertAll(this.symbols, s => new List<size_t>());\n        for (size_t i = 0; i < syntax.Length; i++) { nonterminals[syntax[i].Key].Add(i); }\n\n        var firsts = Array.ConvertAll(Enumerable.Range(0, this.symbols.Length).ToArray(), s => nonterminals[s].Count > 0 ? new HashSet<size_t>() : new HashSet<size_t>() { (size_t)s });\n\n        int old;\n        do\n        {\n            old = firsts.Select(l => l.Count).Sum();\n            foreach (var rule in syntax)\n            {\n                foreach (var i in First(rule.Value, firsts))\n                { firsts[rule.Key].Add(i); }\n            }\n        } while (old < firsts.Select(l => l.Count).Sum());\n\n        var actions = new Dictionary<int, IDictionary<size_t, IList<int>>>();\n        var states = new Dictionary<HashSet<Item>, int>(HashSet<Item>.CreateSetComparer());\n        var todo = new Stack<HashSet<Item>>();\n        var root = new Item(0, 0, new size_t());\n        todo.Push(new HashSet<Item>());\n        Closure(root, todo.Peek(), firsts, syntax, nonterminals);\n        states.Add(new HashSet<Item>(todo.Peek()), states.Count);\n        while (todo.Count > 0)\n        {\n            var set = todo.Pop();\n            var closure = new HashSet<Item>();\n            foreach (var item in set)\n            { Closure(item, closure, firsts, syntax, nonterminals); }\n            var grouped = Array.ConvertAll(this.symbols, _ => new HashSet<Item>());\n            foreach (var item in closure)\n            {\n                if (item.Symbol >= syntax[item.Rule].Value.Length)\n                {\n                    IDictionary<size_t, IList<int>> map;\n                    if (!actions.TryGetValue(states[set], out map))\n                    { actions[states[set]] = map = new Dictionary<size_t, IList<int>>(); }\n                    IList<int> list;\n                    if (!map.TryGetValue(item.Lookahead, out list))\n                    { map[item.Lookahead] = list = new List<int>(); }\n                    list.Add(~(int)item.Rule);\n                    continue;\n                }\n                var next = item;\n                next.Symbol++;\n                grouped[syntax[item.Rule].Value[item.Symbol]].Add(next);\n            }\n            for (size_t symbol = 0; symbol < grouped.Length; symbol++)\n            {\n                var g = new HashSet<Item>();\n                foreach (var item in grouped[symbol])\n                { Closure(item, g, firsts, syntax, nonterminals); }\n                if (g.Count > 0)\n                {\n                    int state;\n                    if (!states.TryGetValue(g, out state))\n                    {\n                        state = states.Count;\n                        states.Add(g, state);\n                        todo.Push(g);\n                    }\n\n                    IDictionary<size_t, IList<int>> map;\n                    if (!actions.TryGetValue(states[set], out map))\n                    { actions[states[set]] = map = new Dictionary<size_t, IList<int>>(); }\n                    IList<int> list;\n                    if (!map.TryGetValue(symbol, out list))\n                    { map[symbol] = list = new List<int>(); }\n\n                    list.Add(state);\n                }\n            }\n        }\n\n        this.actions = new int[states.Count, this.symbols.Length];\n        for (int i = 0; i < this.actions.GetLength(0); i++)\n        {\n            for (int j = 0; j < this.actions.GetLength(1); j++)\n            { this.actions[i, j] = int.MinValue; }\n        }\n        foreach (var p in actions)\n        {\n            foreach (var q in p.Value)\n            { this.actions[p.Key, q.Key] = q.Value.Single(); }\n        }\n\n        foreach (var state in states.OrderBy(p => p.Value))\n        {\n            Console.WriteLine("State {0}:", state.Value);\n            foreach (var item in state.Key.OrderBy(i => i.Rule).ThenBy(i => i.Symbol).ThenBy(i => i.Lookahead))\n            {\n                Console.WriteLine(\n                    "\\t{0}: {1} \\xB7 {2} | {3} \xe2\x86\x92  {0}",\n                    this.symbols[syntax[item.Rule].Key],\n                    string.Join(" ", syntax[item.Rule].Value.Take((int)item.Symbol).Select(s => this.symbols[s]).ToArray()),\n                    string.Join(" ", syntax[item.Rule].Value.Skip((int)item.Symbol).Select(s => this.symbols[s]).ToArray()),\n                    this.symbols[item.Lookahead] == string.Empty ? "\\x04" : this.symbols[item.Lookahead],\n                    string.Join(\n                        ", ",\n                        Array.ConvertAll(\n                            actions[state.Value][item.Symbol < syntax[item.Rule].Value.Length ? syntax[item.Rule].Value[item.Symbol] : item.Lookahead].ToArray(),\n                            a => a >= 0 ? string.Format("state {0}", a) : string.Format("{0} (rule {1})", this.symbols[syntax[~a].Key], ~a))));\n            }\n            Console.WriteLine();\n        }\n    }\n\n    private static void Closure(Item item, HashSet<Item> closure /*output*/, HashSet<size_t>[] firsts, KeyValuePair<size_t, size_t[]>[] syntax, IList<size_t>[] nonterminals)\n    {\n        if (closure.Add(item) && item.Symbol >= syntax[item.Rule].Value.Length)\n        {\n            foreach (var r in nonterminals[syntax[item.Rule].Value[item.Symbol]])\n            {\n                foreach (var i in First(syntax[item.Rule].Value.Skip((int)(item.Symbol + 1)), firsts))\n                { Closure(new Item(r, 0, i == new size_t() ? item.Lookahead : i), closure, firsts, syntax, nonterminals); }\n            }\n        }\n    }\n\n    private struct Item : IEquatable<Item>\n    {\n        public size_t Rule;\n        public size_t Symbol;\n        public size_t Lookahead;\n\n        public Item(size_t rule, size_t symbol, size_t lookahead)\n        {\n            this.Rule = rule;\n            this.Symbol = symbol;\n            this.Lookahead = lookahead;\n        }\n\n        public override bool Equals(object obj) { return obj is Item && this.Equals((Item)obj); }\n\n        public bool Equals(Item other)\n        { return this.Rule == other.Rule && this.Symbol == other.Symbol && this.Lookahead == other.Lookahead; }\n\n        public override int GetHashCode()\n        { return this.Rule.GetHashCode() ^ this.Symbol.GetHashCode() ^ this.Lookahead.GetHashCode(); }\n    }\n\n    private static IEnumerable<size_t> First(IEnumerable<size_t> symbols, IEnumerable<size_t>[] map)\n    {\n        foreach (var symbol in symbols)\n        {\n            bool epsilon = false;\n            foreach (var s in map[symbol])\n            {\n                if (s == new size_t()) { epsilon = true; }\n                else { yield return s; }\n            }\n            if (!epsilon) { yield break; }\n        }\n        yield return new size_t();\n    }\n\n    private static KeyValuePair<K, V> MakePair<K, V>(K k, V v) { return new KeyValuePair<K, V>(k, v); }\n\n    private static void Main(string[] args)\n    {\n        var sw = Stopwatch.StartNew();\n        var parser = new LRParser(\n            MakePair("start", new string[] { "exps" }),\n            MakePair("exps", new string[] { "exps", "exp" }),\n            MakePair("exps", new string[] { }),\n            MakePair("exp", new string[] { "INTEGER" })\n        );\n        Console.WriteLine(sw.ElapsedMilliseconds);\n    }\n}\n
Run Code Online (Sandbox Code Playgroud)\n