• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Nix flake for RPython interpreters


Commit MetaInfo

Revision3db301521ecd174f759d748e32507c3d3fd2fc0e (tree)
Time2024-04-28 14:35:39
AuthorCorbin <cds@corb...>
CommiterCorbin

Log Message

reguix: More of the parser.

The remaining parts have to do with lexing and parsing of quasiliteral
strings. Parsing these is always...involved. It seems rply doesn't
support the CppNix approach of stateful lexing, so instead we'll have
to carefully lex quasiliterals into pieces and parse the pieces.

Change Summary

Incremental Difference

--- a/regiux/main.py
+++ b/regiux/main.py
@@ -9,19 +9,7 @@ from rply.token import BaseBox
99
1010 lg = rply.LexerGenerator()
1111
12-PATH_CHAR = "[a-zA-Z0-9\.\_\-\+]"
13-lg.add("URI", "[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+")
14-lg.add("ID", "[a-zA-Z\_][a-zA-Z0-9\_\'\-]*")
15-lg.add("INT", "[0-9]+")
16-lg.add("FLOAT", "(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?")
17-lg.add("PATH_CHAR", PATH_CHAR)
18-lg.add("PATH", "{0}*(\/{0}+)+\/?".format(PATH_CHAR))
19-lg.add("PATH_SEG", "{0}*\/".format(PATH_CHAR))
20-lg.add("HPATH", "\~(\/{0}+)+\/?".format(PATH_CHAR))
21-lg.add("HPATH_START", "\~\/")
22-lg.add("SPATH", "\<{0}+(\/{0}+)*\>".format(PATH_CHAR))
23-
24-KEYWORDS = "IF THEN ELSE ASSERT WITH LET IN REC INHERIT OR".split()
12+KEYWORDS = "IF THEN ELSE ASSERT WITH LET REC INHERIT OR IN".split()
2513 for kw in KEYWORDS: lg.add(kw, kw.lower())
2614
2715 lg.add("ELLIPSIS", "\.\.\.")
@@ -45,11 +33,28 @@ lg.add("HAS", "\?")
4533
4634 lg.add("COLON", ":")
4735 lg.add("SEMI", ";")
36+lg.add("OPEN_BRACE", "\{")
37+lg.add("CLOSE_BRACE", "\}")
4838 lg.add("OPEN_BRACK", "\[")
4939 lg.add("CLOSE_BRACK", "\]")
5040 lg.add("OPEN_PAREN", "\(")
5141 lg.add("CLOSE_PAREN", "\)")
5242 lg.add("DOT", "\.")
43+lg.add("COMMA", ",")
44+lg.add("AT", "@")
45+lg.add("EQUALS", "\=")
46+
47+PATH_CHAR = "[a-zA-Z0-9\.\_\-\+]"
48+lg.add("URI", "[a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+")
49+lg.add("ID", "[a-zA-Z\_][a-zA-Z0-9\_\'\-]*")
50+lg.add("INT", "[0-9]+")
51+lg.add("FLOAT", "(([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?")
52+lg.add("PATH_CHAR", PATH_CHAR)
53+lg.add("PATH", "{0}*(\/{0}+)+\/?".format(PATH_CHAR))
54+lg.add("PATH_SEG", "{0}*\/".format(PATH_CHAR))
55+lg.add("HPATH", "\~(\/{0}+)+\/?".format(PATH_CHAR))
56+lg.add("HPATH_START", "\~\/")
57+lg.add("SPATH", "\<{0}+(\/{0}+)*\>".format(PATH_CHAR))
5358
5459 lg.ignore("[ \t\r\n]+")
5560 lg.ignore("#[^\r\n]*")
@@ -57,6 +62,54 @@ lg.ignore("\/\*([^*]|\*+[^*/])*\*+\/")
5762
5863 lexer = lg.build()
5964
65+# Syntactic classes:
66+# * exprs
67+# * formal params
68+# * attrs
69+
70+class FormalBox(BaseBox):
71+ def __init__(self, name, default):
72+ self.name = name
73+ self.default = default
74+ def pretty(self):
75+ if self.default is None:
76+ return self.name
77+ else:
78+ return "%s ? %s" % (self.name, self.default.pretty())
79+
80+class BindsBox(BaseBox):
81+ def __init__(self, binds): self.binds = binds
82+ def pretty(self):
83+ binds = [bind.pretty() for bind in self.binds]
84+ return "{ %s }" % " ".join(binds)
85+ def getbinds(self): return self.binds
86+
87+class BindExprBox(BaseBox):
88+ def __init__(self, path, expr):
89+ self.path = path
90+ self.expr = expr
91+ def pretty(self): return "%s = %s;" % (self.path.pretty(), self.expr.pretty())
92+
93+class BindInheritBox(BaseBox):
94+ def __init__(self, attrs, scope):
95+ self.attrs = attrs
96+ self.scope = scope
97+ def pretty(self):
98+ if self.scope:
99+ return "inherit (%s) %s;" % (self.scope.pretty(), self.attrs.pretty())
100+ else: return "inherit %s;" % self.attrs.pretty()
101+
102+class FormalsBox(BaseBox):
103+ def __init__(self, params, hasEllipsis):
104+ self.params = params
105+ self.hasEllipsis = hasEllipsis
106+ def pretty(self):
107+ params = [param.pretty() for param in self.params]
108+ if self.hasEllipsis: params.append("...")
109+ return "{ %s }" % ", ".join(params)
110+ def getparams(self): return self.params
111+ def getellipsis(self): return self.hasEllipsis
112+
60113 class VarBox(BaseBox):
61114 def __init__(self, name): self.name = name
62115 def pretty(self): return self.name
@@ -66,6 +119,11 @@ class AttrPathBox(BaseBox):
66119 def pretty(self): return ".".join([attr.pretty() for attr in self.attrs])
67120 def getattrs(self): return self.attrs
68121
122+class AttrsBox(BaseBox):
123+ def __init__(self, attrs): self.attrs = attrs
124+ def pretty(self): return " ".join([attr.pretty() for attr in self.attrs])
125+ def getattrs(self): return self.attrs
126+
69127 class ListBox(BaseBox):
70128 def __init__(self, exprs): self.exprs = exprs
71129 def pretty(self):
@@ -126,10 +184,17 @@ class WithBox(BaseBox):
126184 def pretty(self): return "with %s; %s" % (self.scope.pretty(), self.expr.pretty())
127185
128186 class LambdaBox(BaseBox):
129- def __init__(self, binding, body):
187+ def __init__(self, binding, params, body):
130188 self.binding = binding
189+ self.params = params
131190 self.body = body
132- def pretty(self): return "%s: %s" % (self.binding.pretty(), self.body.pretty())
191+ def pretty(self):
192+ body = self.body.pretty()
193+ if self.binding and self.params:
194+ return "%s@%s: %s" % (self.binding.pretty(), self.params.pretty(), body)
195+ elif self.binding: return "%s: %s" % (self.binding.pretty(), body)
196+ elif self.params: return "%s: %s" % (self.params.pretty(), body)
197+ else: return "_: " + body
133198
134199 class AppBox(BaseBox):
135200 def __init__(self, func, arg):
@@ -153,7 +218,8 @@ pg = rply.ParserGenerator(KEYWORDS + [
153218 "CONCAT", "UPDATE",
154219 "DIV", "MINUS", "MUL", "PLUS",
155220 "NEGATE", "NOT",
156- "COLON", "SEMI", "OPEN_BRACK", "CLOSE_BRACK", "OPEN_PAREN", "CLOSE_PAREN", "DOT",
221+ "COLON", "SEMI", "DOT", "COMMA", "AT", "EQUALS", "ELLIPSIS",
222+ "OPEN_BRACE", "CLOSE_BRACE", "OPEN_BRACK", "CLOSE_BRACK", "OPEN_PAREN", "CLOSE_PAREN",
157223 ], precedence=[
158224 ("right", ["IMPL"]),
159225 ("left", ["OR_OP"]),
@@ -175,13 +241,23 @@ class ParseError(Exception):
175241 @pg.error
176242 def parseError(token): raise ParseError(token)
177243
178-def precRule(sup, sub):
179- pg.production("expr%s : expr%s" % (sup, sub))(lambda p: p[0])
244+def constRule(rule, pb): pg.production(rule)(lambda _: pb)
245+def enclosedRule(rule, i): pg.production(rule)(lambda p: p[i])
246+def precRule(sup, sub): enclosedRule("expr%s : expr%s" % (sup, sub), 0)
180247 SPINE = "", "_function", "_if", "_op", "_app", "_select", "_simple"
181248 for sup, sub in zip(SPINE, SPINE[1:]): precRule(sup, sub)
182249
183250 @pg.production("expr_function : ID COLON expr_function")
184-def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), p[2])
251+def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), None, p[2])
252+
253+@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE COLON expr_function")
254+def exprLambda(p): return LambdaBox(None, p[1], p[4])
255+
256+@pg.production("expr_function : OPEN_BRACE formals CLOSE_BRACE AT ID COLON expr_function")
257+def exprLambda(p): return LambdaBox(VarBox(p[4].getstr()), p[1], p[6])
258+
259+@pg.production("expr_function : ID AT OPEN_BRACE formals CLOSE_BRACE COLON expr_function")
260+def exprLambda(p): return LambdaBox(VarBox(p[0].getstr()), p[3], p[6])
185261
186262 @pg.production("expr_function : ASSERT expr SEMI expr_function")
187263 def exprAssert(p): return AssertBox(p[1], p[3])
@@ -192,8 +268,7 @@ def exprWith(p): return WithBox(p[1], p[3])
192268 @pg.production("expr_if : IF expr THEN expr ELSE expr")
193269 def exprIf(p): return IfBox(p[1], p[3], p[5])
194270
195-@pg.production("expr_op : NEGATE expr_op")
196-@pg.production("expr_op : NOT expr_op")
271+@pg.production("expr_op : NEGATE expr_op | NOT expr_op")
197272 def exprUnary(p): return ExprUnaryBox(p[1], p[0])
198273
199274 @pg.production("expr_op : expr_op AND expr_op")
@@ -222,9 +297,6 @@ def exprApp(p): return AppBox(p[0], p[1])
222297 @pg.production("expr_select : expr_simple DOT attrpath | expr_simple DOT attrpath OR expr_select")
223298 def exprSelect(p): return SelectBox(p[0], p[2], p[4] if len(p) == 5 else None)
224299
225-@pg.production("expr_simple : OPEN_PAREN expr CLOSE_PAREN")
226-def exprParens(p): return p[1]
227-
228300 @pg.production("expr_simple : ID")
229301 def exprSimpleId(p): return VarBox(p[0].getstr())
230302
@@ -234,17 +306,47 @@ def exprSimpleInt(p): return IntBox(int(p[0].getstr()))
234306 @pg.production("expr_simple : URI")
235307 def exprURI(p): return StrBox(p[0].getstr())
236308
237-@pg.production("expr_simple : OPEN_BRACK expr_list CLOSE_BRACK")
238-def exprList(p): return p[1]
239-
240-@pg.production("expr_list :")
241-def exprListNil(p): return ListBox([])
309+enclosedRule("expr_simple : OPEN_BRACE binds CLOSE_BRACE", 1)
310+enclosedRule("expr_simple : OPEN_PAREN expr CLOSE_PAREN", 1)
311+enclosedRule("expr_simple : OPEN_BRACK expr_list CLOSE_BRACK", 1)
312+constRule("expr_list :", ListBox([]))
242313
243314 @pg.production("expr_list : expr_list expr_select")
244315 def exprListCons(p): return ListBox(p[0].getexprs() + [p[1]])
245316
317+@pg.production("binds : binds attrpath EQUALS expr SEMI")
318+def bindsExpr(p): return BindsBox(p[0].getbinds() + [BindExprBox(p[1], p[3])])
319+
320+@pg.production("binds : binds INHERIT attrs SEMI")
321+def bindsInherit(p):
322+ return BindsBox(p[0].getbinds() + [BindInheritBox(p[2], None)])
323+
324+@pg.production("binds : binds INHERIT OPEN_PAREN expr CLOSE_PAREN attrs SEMI")
325+def bindsInherit(p):
326+ return BindsBox(p[0].getbinds() + [BindInheritBox(p[5], p[3])])
327+
328+constRule("binds :", BindsBox([]))
329+
330+@pg.production("formals : formal COMMA formals")
331+def formalsComma(p):
332+ return FormalsBox([p[0]] + p[2].getparams(), p[2].getellipsis())
333+
334+@pg.production("formals : formal")
335+def formalsFormal(p): return FormalsBox([p[0]], False)
336+
337+constRule("formals :", FormalsBox([], False))
338+constRule("formals : ELLIPSIS", FormalsBox([], True))
339+
340+@pg.production("formal : ID | ID HAS expr")
341+def formalId(p): return FormalBox(p[0].getstr(), p[2] if len(p) > 1 else None)
342+
343+@pg.production("attrs : attrs attr")
344+def attrsAttr(p): return AttrsBox(p[0].getattrs() + [p[1]])
345+
346+constRule("attrs :", AttrsBox([]))
347+
246348 @pg.production("attrpath : attrpath DOT attr")
247-def attrpathNil(p): return AttrPathBox(p[0].getattrs() + [p[1]])
349+def attrpathNil(p): return AttrPathBox(p[0].getattrs() + [p[2]])
248350
249351 @pg.production("attrpath : attr")
250352 def attrpathCons(p): return AttrPathBox(p)
--- a/regiux/todo.txt
+++ b/regiux/todo.txt
@@ -43,6 +43,7 @@
4343 * Negation: -x => builtins.sub 0 x
4444 * Arithmetic: x * y => builtins.mul x y, etc.
4545 * Scalar comparison: x > y => y < x, x >= y => !(x < y), etc.
46+ * Attrset bindings
4647 * My ideas
4748 * Eliminate `with`
4849 * Motivation: It was bugging somebody (Irenes? Xe? Infinisil?)