A fast implementation of the Nix expression language
Revision | 50462950c7e62a96aae777ca2ede6eb5dd1f3244 (tree) |
---|---|
Time | 2024-06-14 11:11:02 |
Author | Corbin <cds@corb...> |
Commiter | Corbin |
Get 2 + 2 working again.
The supercombinator preparation will be the hard part. Not tricky, just
a lot of work.
@@ -1 +1,11 @@ | ||
1 | 1 | This is a basic parser for the Nix language. |
2 | + | |
3 | +## Bibliography | |
4 | + | |
5 | +### G-Machine | |
6 | + | |
7 | +These posts were helpful for disentangling some of the weirder parts: | |
8 | + | |
9 | +* https://amelia.how/posts/the-gmachine-in-detail.html | |
10 | +* https://www.moonbitlang.com/docs/examples/gmachine-1 | |
11 | +* https://www.moonbitlang.com/docs/examples/gmachine-2 |
@@ -5,18 +5,29 @@ | ||
5 | 5 | # Quirks: |
6 | 6 | # 0) The instruction at PC 0 is always UNWIND, to ease EVAL. |
7 | 7 | |
8 | -from heap import HeapApp | |
8 | +from heap import HeapAction, HeapBinary, HeapApp, HeapAttrSet, WrongType | |
9 | 9 | |
10 | -OUTER, CONST, LOCAL, ARG, PRIM, MKAP, SLIDE, COND, EVAL, UNWIND = range(10) | |
10 | +INSTRUCTIONS = [ | |
11 | + # Core G-machine operations. | |
12 | + "outer", "const", "local", "arg", "prim", "mkApp", "slide", "cond", "eval", "unwind", | |
13 | + # Nix-specific operations. | |
14 | + "selectAttr", | |
15 | +] | |
16 | +globals().update({inst.upper(): i for i, inst in enumerate(INSTRUCTIONS)}) | |
11 | 17 | |
12 | 18 | class StackUnderflow(Exception): pass |
13 | 19 | class Overapplied(Exception): pass |
14 | 20 | |
21 | +def getArg(app): | |
22 | + if not isinstance(app, HeapApp): raise Overapplied() | |
23 | + return app.x | |
24 | + | |
15 | 25 | class G(object): |
16 | - _immutable_attrs_ = "code[*]", "consts[*]" | |
17 | - def __init__(self, code, consts): | |
26 | + _immutable_attrs_ = "code[*]", "consts[*]", "attrs[*]" | |
27 | + def __init__(self, code, consts, attrs): | |
18 | 28 | self.code = code |
19 | 29 | self.consts = consts |
30 | + self.attrs = attrs | |
20 | 31 | |
21 | 32 | def run(self, outers): |
22 | 33 | stack = [] |
@@ -25,6 +36,8 @@ class G(object): | ||
25 | 36 | pc = 1 |
26 | 37 | while pc < len(self.code): |
27 | 38 | inst = self.code[pc] |
39 | + print "Stack:", stack | |
40 | + print "PC:", pc, "Inst:", INSTRUCTIONS[inst] | |
28 | 41 | pc += 1 |
29 | 42 | if inst == OUTER: |
30 | 43 | stack.append(outers[self.code[pc]]) |
@@ -39,11 +52,12 @@ class G(object): | ||
39 | 52 | elif inst == ARG: |
40 | 53 | i = len(stack) - self.code[pc] |
41 | 54 | pc += 1 |
42 | - node = stack[i] | |
43 | - if not isinstance(node, HeapApp): raise Overapplied() | |
44 | - stack.append(node.f) | |
55 | + stack.append(getArg(stack[i])) | |
45 | 56 | elif inst == PRIM: assert False |
46 | - elif inst == MKAP: stack.append(HeapApp(stack.pop(), stack.pop())) | |
57 | + elif inst == MKAPP: | |
58 | + x = stack.pop() | |
59 | + f = stack.pop() | |
60 | + stack.append(HeapApp(f, x)) | |
47 | 61 | elif inst == SLIDE: |
48 | 62 | top = stack.pop() |
49 | 63 | stop = len(stack) - self.code[pc] |
@@ -58,10 +72,37 @@ class G(object): | ||
58 | 72 | stack = [top] |
59 | 73 | pc = 0 |
60 | 74 | elif inst == UNWIND: |
61 | - while isinstance(stack[-1], HeapApp): stack.append(stack[-1].f) | |
62 | - # XXX supercombinators | |
63 | - if dump: stack, pc = dump.pop() | |
64 | - else: return stack.pop() | |
75 | + top = stack.pop() | |
76 | + print "Unwinding:", top | |
77 | + while isinstance(top, HeapApp): | |
78 | + print "App:", top, "->", top.f | |
79 | + stack.append(top) | |
80 | + top = top.f | |
81 | + print "Unwound stack:", stack | |
82 | + # XXX supercombinators too! | |
83 | + if isinstance(top, HeapAction): | |
84 | + if len(stack) < 1: return top | |
85 | + print "Unary op:", top, stack[-1] | |
86 | + x = getArg(stack.pop()) | |
87 | + top = top.action(x) | |
88 | + stack.append(top) | |
89 | + elif isinstance(top, HeapBinary): | |
90 | + if len(stack) < 2: return top | |
91 | + print "Binary op:", top, stack[-1], stack[-2] | |
92 | + x = getArg(stack.pop()) | |
93 | + y = getArg(stack.pop()) | |
94 | + top = top.action(x, y) | |
95 | + stack.append(top) | |
96 | + elif dump: | |
97 | + print "Dump pop", pc, "->", | |
98 | + stack, pc = dump.pop() | |
99 | + print pc | |
100 | + stack.append(top) | |
101 | + else: return top | |
102 | + elif inst == SELECTATTR: | |
103 | + top = stack.pop() | |
104 | + stack.append(top.getAttr(self.attrs[self.code[pc]])) | |
105 | + pc += 1 | |
65 | 106 | else: |
66 | 107 | print "Unknown bytecode:", inst |
67 | 108 | assert False |
@@ -81,10 +122,10 @@ class Compiler(object): | ||
81 | 122 | self.code = [UNWIND] |
82 | 123 | self.consts = [] |
83 | 124 | self.locals = [] |
125 | + self.attrs = [] | |
84 | 126 | |
85 | 127 | def outer(self, name): |
86 | - try: | |
87 | - i = self.scope.index(name) | |
128 | + try: i = self.scope.index(name) | |
88 | 129 | except ValueError: raise MissingVar(name) |
89 | 130 | else: |
90 | 131 | self.code.append(OUTER) |
@@ -106,11 +147,43 @@ class Compiler(object): | ||
106 | 147 | self.code.append(ARG) |
107 | 148 | self.code.append(i) |
108 | 149 | |
109 | - def mkAp(self): self.code.append(MKAP) | |
110 | - | |
111 | 150 | def slide(self, i): |
112 | 151 | assert i >= 0, "implementation error" |
113 | 152 | self.code.append(SLIDE) |
114 | 153 | self.code.append(i) |
115 | 154 | |
116 | - def finish(self): return G(self.code[:], self.consts[:]) | |
155 | + def mkApp(self): self.code.append(MKAPP) | |
156 | + def unwind(self): self.code.append(UNWIND) | |
157 | + | |
158 | + def selectAttr(self, s): | |
159 | + try: i = self.attrs.index(s) | |
160 | + except ValueError: | |
161 | + i = len(self.attrs) | |
162 | + self.attrs.append(s) | |
163 | + self.code.append(SELECTATTR) | |
164 | + self.code.append(i) | |
165 | + | |
166 | + def finish(self): | |
167 | + # Epilogue. | |
168 | + # XXX slide? | |
169 | + self.unwind() | |
170 | + return G(self.code[:], self.consts[:], self.attrs[:]) | |
171 | + | |
172 | +def disassemble(code): | |
173 | + rv = [] | |
174 | + pc = 0 | |
175 | + i = 0 | |
176 | + while pc < len(code): | |
177 | + # We don't have "%-3d" or str.rjust(). | |
178 | + istr = str(i) | |
179 | + istr = " " * (3 - len(istr)) + istr | |
180 | + inst = code[pc] | |
181 | + pc += 1 | |
182 | + if inst in (OUTER, CONST, LOCAL, ARG, SLIDE, SELECTATTR): | |
183 | + arg = code[pc] | |
184 | + pc += 1 | |
185 | + s = "%s: %s %d" % (istr, INSTRUCTIONS[inst], arg) | |
186 | + else: s = "%s: %s" % (istr, INSTRUCTIONS[inst]) | |
187 | + rv.append(s) | |
188 | + i += 1 | |
189 | + return rv |
@@ -1,6 +1,6 @@ | ||
1 | 1 | import sys |
2 | 2 | |
3 | -from gmachine import Compiler | |
3 | +from gmachine import Compiler, disassemble | |
4 | 4 | from heap import defaultScope, InfiniteLoop |
5 | 5 | from parser import parse, NotAnExpr, ParseError |
6 | 6 |
@@ -29,6 +29,8 @@ def entryPoint(argv): | ||
29 | 29 | ast.compile(compiler) |
30 | 30 | g = compiler.finish() |
31 | 31 | print "Top-level machine:", g |
32 | + print "Disassembly:" | |
33 | + for line in disassemble(g.code): print line | |
32 | 34 | except NotAnExpr as e: |
33 | 35 | print "Error while compiling" |
34 | 36 | print "Input syntax was not an expr, but:", e.cls |
@@ -153,28 +153,31 @@ class SelectBox(EBox): | ||
153 | 153 | return "%s.%s" % (self.expr.pretty(), self.path.pretty()) |
154 | 154 | else: |
155 | 155 | return "%s.%s or %s" % (self.expr.pretty(), self.path.pretty(), self.alt.pretty()) |
156 | - # def compile(self, compiler): | |
157 | - # assert self.alt is None, "can't compile yet" | |
158 | - # return heap.HeapSelect(self.expr.compile(scope), | |
159 | - # [attr2str(attr) for attr in self.path.getattrs()]) | |
156 | + def compile(self, compiler): | |
157 | + assert self.alt is None, "can't compile yet" | |
158 | + self.expr.compile(compiler) | |
159 | + for attr in self.path.getattrs(): compiler.selectAttr(attr2str(attr)) | |
160 | 160 | |
161 | 161 | class VarBox(EBox): |
162 | 162 | def __init__(self, name): self.name = name |
163 | 163 | def pretty(self): return self.name |
164 | - # def compile(self, compiler): | |
165 | - # # XXX should probably be a compiler method | |
164 | + def compile(self, compiler): | |
165 | + # XXX should probably be a compiler method | |
166 | + if self.name in compiler.scope: return compiler.outer(self.name) | |
167 | + else: raise MissingVar(self.name) | |
168 | + # XXX locals | |
166 | 169 | # if self.name in compiler.locals: |
167 | 170 | # compiler.pick(len(compiler.locals) - compiler.locals.index(self.name)) |
168 | - # elif self.name in compiler.scope: return compiler.outer(self.name) | |
169 | - # else: raise MissingVar(self.name) | |
170 | 171 | |
171 | 172 | class AppBox(EBox): |
172 | 173 | def __init__(self, func, arg): |
173 | 174 | self.func = func |
174 | 175 | self.arg = arg |
175 | 176 | def pretty(self): return "(%s) (%s)" % (self.func.pretty(), self.arg.pretty()) |
176 | - # def compile(self, compiler): | |
177 | - # return heap.HeapApp(self.func.compile(scope), self.arg.compile(scope)) | |
177 | + def compile(self, compiler): | |
178 | + self.func.compile(compiler) | |
179 | + self.arg.compile(compiler) | |
180 | + compiler.mkApp() | |
178 | 181 | |
179 | 182 | class StrQLBox(EBox): |
180 | 183 | def __init__(self, init, pairs): |
@@ -205,11 +208,13 @@ class ExprBinaryBox(EBox): | ||
205 | 208 | self.verb = verb |
206 | 209 | def pretty(self): |
207 | 210 | return "(%s %s %s)" % (self.verb, self.left.pretty(), self.right.pretty()) |
208 | - # def compile(self, compiler): | |
209 | - # return heap.HeapApp( | |
210 | - # heap.HeapApp(heap.HeapSelect(scope["builtins"], [self.verb]), | |
211 | - # self.left.compile(scope)), | |
212 | - # self.right.compile(scope)) | |
211 | + def compile(self, compiler): | |
212 | + compiler.outer("builtins") | |
213 | + compiler.selectAttr(self.verb) | |
214 | + self.left.compile(compiler) | |
215 | + compiler.mkApp() | |
216 | + self.right.compile(compiler) | |
217 | + compiler.mkApp() | |
213 | 218 | |
214 | 219 | class BindBox(RegiuxBox): |
215 | 220 | "A box for attrset attributes." |
@@ -31,6 +31,9 @@ | ||
31 | 31 | * Gonna use the recipe developed in cammy-sampler, bf |
32 | 32 | * AST nodes are instances of subclasses of BaseBox |
33 | 33 | * Any katamorphisms on ASTs are given as methods |
34 | + * Considering implementing an IR AST | |
35 | + * Includes most expansions | |
36 | + * Goal: Ease supercombinator recognition and compilation | |
34 | 37 | * Optimizations? Consider Frances' list: |
35 | 38 | * CSE |
36 | 39 | * Basically shouldn't ever happen |
@@ -57,13 +60,10 @@ | ||
57 | 60 | * Evaluation |
58 | 61 | * Will need to transform parser AST to something lower-level |
59 | 62 | * The ancient question: AST or bytecode? |
63 | + * Bytecode! Maybe an AST first | |
60 | 64 | * How to be lazy? |
61 | - * Graph reduction? | |
62 | - * Bytecode might be a good fit here! Push-enter, etc. | |
63 | - * How to builtins? | |
64 | - * Either set up a special instruction for them or add `builtins` to scope | |
65 | - * Probably the latter, given that `builtins.derivation` is actually Nix code | |
66 | - * This is the case in both Nix and Tvix | |
65 | + * G-machine! | |
66 | + * Needs effort to properly share work | |
67 | 67 | * Store operations |
68 | 68 | * Might not be our problem at first |
69 | 69 | * Because if we can emit derivations, then the system daemon can take over? |