Nix flake for RPython interpreters
Revision | 3db301521ecd174f759d748e32507c3d3fd2fc0e (tree) |
---|---|
Time | 2024-04-28 14:35:39 |
Author | Corbin <cds@corb...> |
Commiter | Corbin |
reguix: More of the parser.
The remaining parts have to do with lexing and parsing of quasiliteral
strings. Parsing these is always...involved. It seems rply doesn't
support the CppNix approach of stateful lexing, so instead we'll have
to carefully lex quasiliterals into pieces and parse the pieces.
@@ -9,19 +9,7 @@ from rply.token import BaseBox | ||
9 | 9 | |
10 | 10 | lg = rply.LexerGenerator() |
11 | 11 | |
12 | -PATH_CHAR = "[a-zA-Z0-9\.\_\-\+]" | |
13 | -lg.add("URI", "[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+") | |
14 | -lg.add("ID", "[a-zA-Z\_][a-zA-Z0-9\_\'\-]*") | |
15 | -lg.add("INT", "[0-9]+") | |
16 | -lg.add("FLOAT", "(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?") | |
17 | -lg.add("PATH_CHAR", PATH_CHAR) | |
18 | -lg.add("PATH", "{0}*(\/{0}+)+\/?".format(PATH_CHAR)) | |
19 | -lg.add("PATH_SEG", "{0}*\/".format(PATH_CHAR)) | |
20 | -lg.add("HPATH", "\~(\/{0}+)+\/?".format(PATH_CHAR)) | |
21 | -lg.add("HPATH_START", "\~\/") | |
22 | -lg.add("SPATH", "\<{0}+(\/{0}+)*\>".format(PATH_CHAR)) | |
23 | - | |
24 | -KEYWORDS = "IF THEN ELSE ASSERT WITH LET IN REC INHERIT OR".split() | |
12 | +KEYWORDS = "IF THEN ELSE ASSERT WITH LET REC INHERIT OR IN".split() | |
25 | 13 | for kw in KEYWORDS: lg.add(kw, kw.lower()) |
26 | 14 | |
27 | 15 | lg.add("ELLIPSIS", "\.\.\.") |
@@ -45,11 +33,28 @@ lg.add("HAS", "\?") | ||
45 | 33 | |
46 | 34 | lg.add("COLON", ":") |
47 | 35 | lg.add("SEMI", ";") |
36 | +lg.add("OPEN_BRACE", "\{") | |
37 | +lg.add("CLOSE_BRACE", "\}") | |
48 | 38 | lg.add("OPEN_BRACK", "\[") |
49 | 39 | lg.add("CLOSE_BRACK", "\]") |
50 | 40 | lg.add("OPEN_PAREN", "\(") |
51 | 41 | lg.add("CLOSE_PAREN", "\)") |
52 | 42 | lg.add("DOT", "\.") |
43 | +lg.add("COMMA", ",") | |
44 | +lg.add("AT", "@") | |
45 | +lg.add("EQUALS", "\=") | |
46 | + | |
47 | +PATH_CHAR = "[a-zA-Z0-9\.\_\-\+]" | |
48 | +lg.add("URI", "[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+") | |
49 | +lg.add("ID", "[a-zA-Z\_][a-zA-Z0-9\_\'\-]*") | |
50 | +lg.add("INT", "[0-9]+") | |
51 | +lg.add("FLOAT", "(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?") | |
52 | +lg.add("PATH_CHAR", PATH_CHAR) | |
53 | +lg.add("PATH", "{0}*(\/{0}+)+\/?".format(PATH_CHAR)) | |
54 | +lg.add("PATH_SEG", "{0}*\/".format(PATH_CHAR)) | |
55 | +lg.add("HPATH", "\~(\/{0}+)+\/?".format(PATH_CHAR)) | |
56 | +lg.add("HPATH_START", "\~\/") | |
57 | +lg.add("SPATH", "\<{0}+(\/{0}+)*\>".format(PATH_CHAR)) | |
53 | 58 | |
54 | 59 | lg.ignore("[ \t\r\n]+") |
55 | 60 | lg.ignore("#[^\r\n]*") |
@@ -57,6 +62,54 @@ lg.ignore("\/\*([^*]|\*+[^*/])*\*+\/") | ||
57 | 62 | |
58 | 63 | lexer = lg.build() |
59 | 64 | |
65 | +# Syntactic classes: | |
66 | +# * exprs | |
67 | +# * formal params | |
68 | +# * attrs | |
69 | + | |
70 | +class FormalBox(BaseBox): | |
71 | + def __init__(self, name, default): | |
72 | + self.name = name | |
73 | + self.default = default | |
74 | + def pretty(self): | |
75 | + if self.default is None: | |
76 | + return self.name | |
77 | + else: | |
78 | + return "%s ? %s" % (self.name, self.default.pretty()) | |
79 | + | |
80 | +class BindsBox(BaseBox): | |
81 | + def __init__(self, binds): self.binds = binds | |
82 | + def pretty(self): | |
83 | + binds = [bind.pretty() for bind in self.binds] | |
84 | + return "{ %s }" % " ".join(binds) | |
85 | + def getbinds(self): return self.binds | |
86 | + | |
87 | +class BindExprBox(BaseBox): | |
88 | + def __init__(self, path, expr): | |
89 | + self.path = path | |
90 | + self.expr = expr | |
91 | + def pretty(self): return "%s = %s;" % (self.path.pretty(), self.expr.pretty()) | |
92 | + | |
93 | +class BindInheritBox(BaseBox): | |
94 | + def __init__(self, attrs, scope): | |
95 | + self.attrs = attrs | |
96 | + self.scope = scope | |
97 | + def pretty(self): | |
98 | + if self.scope: | |
99 | + return "inherit (%s) %s;" % (self.scope.pretty(), self.attrs.pretty()) | |
100 | + else: return "inherit %s;" % self.attrs.pretty() | |
101 | + | |
102 | +class FormalsBox(BaseBox): | |
103 | + def __init__(self, params, hasEllipsis): | |
104 | + self.params = params | |
105 | + self.hasEllipsis = hasEllipsis | |
106 | + def pretty(self): | |
107 | + params = [param.pretty() for param in self.params] | |
108 | + if self.hasEllipsis: params.append("...") | |
109 | + return "{ %s }" % ", ".join(params) | |
110 | + def getparams(self): return self.params | |
111 | + def getellipsis(self): return self.hasEllipsis | |
112 | + | |
60 | 113 | class VarBox(BaseBox): |
61 | 114 | def __init__(self, name): self.name = name |
62 | 115 | def pretty(self): return self.name |
@@ -66,6 +119,11 @@ class AttrPathBox(BaseBox): | ||
66 | 119 | def pretty(self): return ".".join([attr.pretty() for attr in self.attrs]) |
67 | 120 | def getattrs(self): return self.attrs |
68 | 121 | |
122 | +class AttrsBox(BaseBox): | |
123 | + def __init__(self, attrs): self.attrs = attrs | |
124 | + def pretty(self): return " ".join([attr.pretty() for attr in self.attrs]) | |
125 | + def getattrs(self): return self.attrs | |
126 | + | |
69 | 127 | class ListBox(BaseBox): |
70 | 128 | def __init__(self, exprs): self.exprs = exprs |
71 | 129 | def pretty(self): |
@@ -126,10 +184,17 @@ class WithBox(BaseBox): | ||
126 | 184 | def pretty(self): return "with %s; %s" % (self.scope.pretty(), self.expr.pretty()) |
127 | 185 | |
128 | 186 | class LambdaBox(BaseBox): |
129 | - def __init__(self, binding, body): | |
187 | + def __init__(self, binding, params, body): | |
130 | 188 | self.binding = binding |
189 | + self.params = params | |
131 | 190 | self.body = body |
132 | - def pretty(self): return "%s: %s" % (self.binding.pretty(), self.body.pretty()) | |
191 | + def pretty(self): | |
192 | + body = self.body.pretty() | |
193 | + if self.binding and self.params: | |
194 | + return "%s@%s: %s" % (self.binding.pretty(), self.params.pretty(), body) | |
195 | + elif self.binding: return "%s: %s" % (self.binding.pretty(), body) | |
196 | + elif self.params: return "%s: %s" % (self.params.pretty(), body) | |
197 | + else: return "_: " + body | |
133 | 198 | |
134 | 199 | class AppBox(BaseBox): |
135 | 200 | def __init__(self, func, arg): |
@@ -153,7 +218,8 @@ pg = rply.ParserGenerator(KEYWORDS + [ | ||
153 | 218 | "CONCAT", "UPDATE", |
154 | 219 | "DIV", "MINUS", "MUL", "PLUS", |
155 | 220 | "NEGATE", "NOT", |
156 | - "COLON", "SEMI", "OPEN_BRACK", "CLOSE_BRACK", "OPEN_PAREN", "CLOSE_PAREN", "DOT", | |
221 | + "COLON", "SEMI", "DOT", "COMMA", "AT", "EQUALS", "ELLIPSIS", | |
222 | + "OPEN_BRACE", "CLOSE_BRACE", "OPEN_BRACK", "CLOSE_BRACK", "OPEN_PAREN", "CLOSE_PAREN", | |
157 | 223 | ], precedence=[ |
158 | 224 | ("right", ["IMPL"]), |
159 | 225 | ("left", ["OR_OP"]), |
@@ -175,13 +241,23 @@ class ParseError(Exception): | ||
175 | 241 | @pg.error |
176 | 242 | def parseError(token): raise ParseError(token) |
177 | 243 | |
178 | -def precRule(sup, sub): | |
179 | - pg.production("expr%s : expr%s" % (sup, sub))(lambda p: p[0]) | |
244 | +def constRule(rule, pb): pg.production(rule)(lambda _: pb) | |
245 | +def enclosedRule(rule, i): pg.production(rule)(lambda p: p[i]) | |
246 | +def precRule(sup, sub): enclosedRule("expr%s : expr%s" % (sup, sub), 0) | |
180 | 247 | SPINE = "", "_function", "_if", "_op", "_app", "_select", "_simple" |
181 | 248 | for sup, sub in zip(SPINE, SPINE[1:]): precRule(sup, sub) |
182 | 249 | |
183 | 250 | @pg.production("expr_function : ID COLON expr_function") |
184 | -def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), p[2]) | |
251 | +def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), None, p[2]) | |
252 | + | |
253 | +@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE COLON expr_function") | |
254 | +def exprLambda(p): return LambdaBox(None, p[1], p[4]) | |
255 | + | |
256 | +@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE AT ID COLON expr_function") | |
257 | +def exprLambda(p): return LambdaBox(VarBox(p[4].getstr()), p[1], p[6]) | |
258 | + | |
259 | +@pg.production("expr_function : ID AT OPEN_BRACE formals CLOSE_BRACE COLON expr_function") | |
260 | +def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), p[3], p[6]) | |
185 | 261 | |
186 | 262 | @pg.production("expr_function : ASSERT expr SEMI expr_function") |
187 | 263 | def exprAssert(p): return AssertBox(p[1], p[3]) |
@@ -192,8 +268,7 @@ def exprWith(p): return WithBox(p[1], p[3]) | ||
192 | 268 | @pg.production("expr_if : IF expr THEN expr ELSE expr") |
193 | 269 | def exprIf(p): return IfBox(p[1], p[3], p[5]) |
194 | 270 | |
195 | -@pg.production("expr_op : NEGATE expr_op") | |
196 | -@pg.production("expr_op : NOT expr_op") | |
271 | +@pg.production("expr_op : NEGATE expr_op | NOT expr_op") | |
197 | 272 | def exprUnary(p): return ExprUnaryBox(p[1], p[0]) |
198 | 273 | |
199 | 274 | @pg.production("expr_op : expr_op AND expr_op") |
@@ -222,9 +297,6 @@ def exprApp(p): return AppBox(p[0], p[1]) | ||
222 | 297 | @pg.production("expr_select : expr_simple DOT attrpath | expr_simple DOT attrpath OR expr_select") |
223 | 298 | def exprSelect(p): return SelectBox(p[0], p[2], p[4] if len(p) == 5 else None) |
224 | 299 | |
225 | -@pg.production("expr_simple : OPEN_PAREN expr CLOSE_PAREN") | |
226 | -def exprParens(p): return p[1] | |
227 | - | |
228 | 300 | @pg.production("expr_simple : ID") |
229 | 301 | def exprSimpleId(p): return VarBox(p[0].getstr()) |
230 | 302 |
@@ -234,17 +306,47 @@ def exprSimpleInt(p): return IntBox(int(p[0].getstr())) | ||
234 | 306 | @pg.production("expr_simple : URI") |
235 | 307 | def exprURI(p): return StrBox(p[0].getstr()) |
236 | 308 | |
237 | -@pg.production("expr_simple : OPEN_BRACK expr_list CLOSE_BRACK") | |
238 | -def exprList(p): return p[1] | |
239 | - | |
240 | -@pg.production("expr_list :") | |
241 | -def exprListNil(p): return ListBox([]) | |
309 | +enclosedRule("expr_simple : OPEN_BRACE binds CLOSE_BRACE", 1) | |
310 | +enclosedRule("expr_simple : OPEN_PAREN expr CLOSE_PAREN", 1) | |
311 | +enclosedRule("expr_simple : OPEN_BRACK expr_list CLOSE_BRACK", 1) | |
312 | +constRule("expr_list :", ListBox([])) | |
242 | 313 | |
243 | 314 | @pg.production("expr_list : expr_list expr_select") |
244 | 315 | def exprListCons(p): return ListBox(p[0].getexprs() + [p[1]]) |
245 | 316 | |
317 | +@pg.production("binds : binds attrpath EQUALS expr SEMI") | |
318 | +def bindsExpr(p): return BindsBox(p[0].getbinds() + [BindExprBox(p[1], p[3])]) | |
319 | + | |
320 | +@pg.production("binds : binds INHERIT attrs SEMI") | |
321 | +def bindsInherit(p): | |
322 | + return BindsBox(p[0].getbinds() + [BindInheritBox(p[2], None)]) | |
323 | + | |
324 | +@pg.production("binds : binds INHERIT OPEN_PAREN expr CLOSE_PAREN attrs SEMI") | |
325 | +def bindsInherit(p): | |
326 | + return BindsBox(p[0].getbinds() + [BindInheritBox(p[5], p[3])]) | |
327 | + | |
328 | +constRule("binds :", BindsBox([])) | |
329 | + | |
330 | +@pg.production("formals : formal COMMA formals") | |
331 | +def formalsComma(p): | |
332 | + return FormalsBox([p[0]] + p[2].getparams(), p[2].getellipsis()) | |
333 | + | |
334 | +@pg.production("formals : formal") | |
335 | +def formalsFormal(p): return FormalsBox([p[0]], False) | |
336 | + | |
337 | +constRule("formals :", FormalsBox([], False)) | |
338 | +constRule("formals : ELLIPSIS", FormalsBox([], True)) | |
339 | + | |
340 | +@pg.production("formal : ID | ID HAS expr") | |
341 | +def formalId(p): return FormalBox(p[0].getstr(), p[2] if len(p) > 1 else None) | |
342 | + | |
343 | +@pg.production("attrs : attrs attr") | |
344 | +def attrsAttr(p): return AttrsBox(p[0].getattrs() + [p[1]]) | |
345 | + | |
346 | +constRule("attrs :", AttrsBox([])) | |
347 | + | |
246 | 348 | @pg.production("attrpath : attrpath DOT attr") |
247 | -def attrpathNil(p): return AttrPathBox(p[0].getattrs() + [p[1]]) | |
349 | +def attrpathNil(p): return AttrPathBox(p[0].getattrs() + [p[2]]) | |
248 | 350 | |
249 | 351 | @pg.production("attrpath : attr") |
250 | 352 | def attrpathCons(p): return AttrPathBox(p) |
@@ -43,6 +43,7 @@ | ||
43 | 43 | * Negation: -x => builtins.sub 0 x |
44 | 44 | * Arithmetic: x * y => builtins.mul x y, etc. |
45 | 45 | * Scalar comparison: x > y => y < x, x >= y => !(x < y), etc. |
46 | + * Attrset bindings | |
46 | 47 | * My ideas |
47 | 48 | * Eliminate `with` |
48 | 49 | * Motivation: It was bugging somebody (Irenes? Xe? Infinisil?) |