• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Revision376428dbc496a40f77de92078a24ef954d49832f (tree)
Time2023-04-03 02:42:49
Authordyknon <dyknon@user...>
Commiterdyknon

Log Message

Optimized

Change Summary

Incremental Difference

--- /dev/null
+++ b/Makefile
@@ -0,0 +1,4 @@
1+test:
2+ perl -mlib=lib t/empty_root_object.t
3+ perl -mlib=lib t/additional_string.t
4+ perl -mlib=lib t/hjson_official.t
--- a/lib/Text/Hjson.pm
+++ b/lib/Text/Hjson.pm
@@ -4,6 +4,9 @@ use warnings;
44 use strict;
55 use feature qw(postderef);
66 use Carp;
7+use Text::Hjson::Grammar;
8+
9+our $grammar = "Text::Hjson::Grammar";
710
811 our $ws = qr/[\t\r\n ]/;
912 our $inline_ws = qr/[\t\r ]/;
@@ -29,11 +32,11 @@ sub null($){
2932 my ($self) = @_;
3033 undef
3134 }
32-sub array(@){
35+sub array($@){
3336 my ($self, @inner) = @_;
3437 \@inner
3538 }
36-sub object(@){
39+sub object($@){
3740 my ($self, @inner) = @_;
3841 my %ret;
3942 for(@inner){
@@ -41,176 +44,72 @@ sub object(@){
4144 }
4245 \%ret
4346 }
47+sub string($$){
48+ my ($self, $str) = @_;
49+ $str
50+}
51+sub number($$){
52+ my ($self, $num) = @_;
53+ $num
54+}
55+sub decode_quoted_string($$){
56+ my ($self, $str) = @_;
4457
45-our %element = (
46- literal => {
47- re => qr/$wsc(true|false|null)$litend/,
48- act => sub{
49- my ($self, $match) = @_;
50- {
51- true => $self->true,
52- false => $self->false,
53- null => $self->null,
54- }->{$match->[0]}
55- }
56- },
57- separator => {re => qr/$separator/},
58- optional_sep => {re => qr/$separator?/},
59- # XXX: not docuemnted, but tests say quoteless string
60- # starting with quote is not allowed.
61- quoteless_guard => {re => qr/$wsc(?!["'])/},
62- value => {
63- any => [qw(literal object array number string)],
64- act => sub{$_[2]}
65- },
66- root => {
67- any => [qw(object_member_arr value)],
68- act => sub{$_[2]}
69- },
70- object => {
71- seq => [qw(begin_object object_inner end_object)],
72- act => sub{$_[2]}
73- },
74- begin_object => {re => qr/$wsc\{/},
75- end_object => {re => qr/$wsc\}/},
76- object_inner => {
77- opt => "object_member_arr",
78- act => sub{
79- my ($self, $match) = @_;
80- $match // $self->object;
81- }
82- },
83- object_member_arr => {
84- seq => [qw(object_member object_members optional_sep)],
85- act => sub{
86- my ($self, $first, $left) = @_;
87- $self->object($first, @$left)
88- }
89- },
90- object_member => {
91- seq => [qw(object_key object_key_sep value)],
92- act => sub{
93- my ($self, $key, $sep, $val) = @_;
94- [$key, $val]
95- }
96- },
97- object_members => {
98- many => "separated_object_member",
99- act => sub{shift; [@_]}
100- },
101- separated_object_member => {
102- seq => [qw(separator object_member)],
103- act => sub{$_[2]}
104- },
105- object_key => {
106- any => [qw(string_quoted object_key_noquote)],
107- act => sub{$_[2]}
108- },
109- object_key_noquote => {
110- seq => [qw(quoteless_guard object_key_noquote_body)],
111- act => sub{$_[2]}
112- },
113- object_key_noquote_body => {
114- re => qr/($nonpunct+)/,
115- act => sub{$_[1][0]}
116- },
117- object_key_sep => {re => qr/$wsc:/},
118- array => {
119- seq => [qw(begin_array array_inner end_array)],
120- act => sub{$_[2]}
121- },
122- begin_array => {re => qr/$wsc\[/},
123- end_array => {re => qr/$wsc\]/},
124- array_inner => {
125- opt => "array_member_arr",
126- act => sub{
127- my ($self, $match) = @_;
128- $match // $self->array;
129- }
130- },
131- array_member_arr => {
132- seq => [qw(value array_members optional_sep)],
133- act => sub{
134- my ($self, $first, $left) = @_;
135- $self->array($first, @$left)
136- }
137- },
138- array_members => {
139- many => "separated_array_member",
140- act => sub{shift; [@_]}
141- },
142- separated_array_member => {
143- seq => [qw(separator value)],
144- act => sub{$_[2]}
145- },
146- number => {
147- re => qr/$wsc(-)?+((?:0|[1-9][0-9]*+)(?:\.[0-9]++)?+)(?:[eE]([-+]?+[0-9]++))?+$litend/,
148- act => sub{
149- my ($self, $match) = @_;
150- my $v = $match->[1] * 10**($match->[2] // 0);
151- $match->[0] ? -$v : $v
152- }
153- },
154- string => {
155- any => [qw(string_multiline string_quoted string_noquote)],
156- act => sub{$_[2]}
157- },
158- # XXX: not in RFC but quote by ' is allowed.
159- string_quoted => {
160- re => qr/$wsc(["'])((?:(?!\g{1})[^\0-\x1f\\]|\\["'\\\/bfnrt]|\\u[0-9A-Fa-f]{4})*+)\g{1}/,
161- act => sub{
162- my ($self, $match) = @_;
163- $match->[1] =~ s{\\(u(.{4})|.)}{
164- {
165- '"' => '"',
166- "'" => "'",
167- "\\" => "\\",
168- "/" => "/",
169- "b" => "\b",
170- "f" => "\f",
171- "n" => "\n",
172- "r" => "\r",
173- "t" => "\t",
174- }->{$1} // chr(hex($2))
175- }ger;
176- }
177- },
178- string_noquote => {
179- seq => [qw(quoteless_guard string_noquote_body)],
180- act => sub{$_[2]}
181- },
182- string_noquote_body => {
183- re => qr/(${nonpunct}(?:\t|[^\0-\x1f])*)/,
184- act => sub{
185- my ($self, $match) = @_;
186- $match->[0] =~ s/$inline_ws*+\z//r;
58+ # process \uxxxx sequences
59+ my $newstr = "";
60+ my $pos = 0;
61+ while($str =~ /\\u/g){
62+ $newstr .= substr($str, $pos, $-[0] - $pos);
63+ my $start_hex = $+[0];
64+ my $hex = substr($str, $start_hex, 4);
65+ die "imcomplete \\u sequence" if(4 != length $hex);
66+ die "non-hex char in \\u sequence" if($hex =~ /[^0-9A-Fa-f]/);
67+ my $code = hex($hex);
68+ die "low surrogate prefixed by no high surrogate in \\u sequence"
69+ if(0xdc00 <= $code < 0xe000);
70+ if(0xd800 <= $code < 0xdc00){
71+ die "high surrogate suffixed by no \\u in \\u sequence"
72+ if(substr($str, $start_hex + 4, 2) ne "\\u");
73+ my $hex = substr($str, $start_hex + 6, 4);
74+ die "imcomplete \\u sequence" if(4 != length $hex);
75+ die "non-hex char in \\u sequence" if($hex =~ /[^0-9A-Fa-f]/);
76+ my $lowcode = hex($hex);
77+ die "high surrogate suffixed by no low surrogate in \\u sequence"
78+ unless(0xdc00 <= $lowcode < 0xe000);
79+ $newstr .= chr((($code & 0x3ff) + 0x40) << 10 | $lowcode & 0x3ff);
80+ pos($str) = $pos = $start_hex + 10;
81+ }else{
82+ $newstr .= chr($code);
83+ pos($str) = $pos = $start_hex + 4;
18784 }
188- },
189- string_multiline => {
190- re => qr/($wsc)'''((?:(?!''')$any)*+)'''/,
191- act => sub{
192- my ($self, $match) = @_;
193- my $indent = length($match->[0] =~ s/.*\n//r);
194- my $s = $match->[1] =~ s/\r//r;
85+ }
86+ $str = $newstr . substr($str, $pos) if(length $newstr);
19587
196- # XXX: What is "column"
197- $s =~ s/(?<=\n)${inline_ws}{0,$indent}//g;
198- # XXX: RFC says: Whitespace on the first line is ignored.
199- # my understanding: first line is ignored if it contains only ws.
200- $s =~ s/^$inline_ws*\n//;
88+ # process other escape sequences
89+ my %escape = (
90+ '"' => '"',
91+ "'" => "'",
92+ "\\" => "\\",
93+ "/" => "/",
94+ "b" => "\b",
95+ "f" => "\f",
96+ "n" => "\n",
97+ "r" => "\r",
98+ "t" => "\t",
99+ );
100+ $str =~ s{\\(.)}{$escape{$1} // die "invalid escape sequence \\$1";}eg;
201101
202- $s =~ s/\n\z//r
203- }
204- },
205-);
102+ $str
103+}
206104
207105 sub consume($$$$){
208106 my ($self, $what, $in, $at) = @_;
209- my $act = sub{scalar ($element{$what}{act} // sub{})->($self, @_)};
210- croak "unknown consume type: $what" if(!$element{$what});
211- if($element{$what}{re}){
107+ croak "unknown grammar: $what" if(!$grammar->can($what));
108+ my %g = $grammar->$what();
109+ my $act = sub{scalar ($g{act} // sub{})->($self, @_)};
110+ if($g{re}){
212111 pos($in) = $at;
213- if($in =~ /\G$element{$what}{re}/g){
112+ if($in =~ /\G$g{re}/g){
214113 (
215114 pos($in),
216115 $act->([@{^CAPTURE}])
@@ -218,47 +117,266 @@ sub consume($$$$){
218117 }else{
219118 ()
220119 }
221- }elsif($element{$what}{seq}){
120+ }elsif($g{seq}){
222121 my @got;
223- for($element{$what}{seq}->@*){
122+ for($g{seq}->@*){
224123 ($at, my $read) = $self->consume($_, $in, $at);
225124 return () if(!defined $at);
226125 push @got, $read;
227126 }
228127 ($at, $act->(@got))
229- }elsif($element{$what}{any}){
128+ }elsif($g{any}){
230129 my ($i, $bt, $read);
231- for $i(0 .. $element{$what}{any}->$#*){
232- ($bt, $read) = $self->consume($element{$what}{any}[$i], $in, $at);
130+ for $i(0 .. $g{any}->$#*){
131+ ($bt, $read) = $self->consume($g{any}[$i], $in, $at);
233132 if(defined $bt){
234133 return ($bt, $act->($i, $read));
235134 }
236135 }
237136 ()
238- }elsif($element{$what}{many}){
137+ }elsif($g{many}){
239138 my $bt;
240139 my @got;
241140 while(1){
242- ($bt, my $read) = $self->consume($element{$what}{many}, $in, $at);
141+ ($bt, my $read) = $self->consume($g{many}, $in, $at);
243142 last if(!defined $bt);
244143 die "infinite loop detected in $what" if($at == $bt);
245144 $at = $bt;
246145 push @got, $read;
247146 }
248147 ($at, $act->(@got))
249- }elsif($element{$what}{opt}){
250- my ($bt, $read) = $self->consume($element{$what}{opt}, $in, $at);
148+ }elsif($g{opt}){
149+ my ($bt, $read) = $self->consume($g{opt}, $in, $at);
251150 ($bt//$at, $act->($read))
252151 }else{
253- die "what is $what";
152+ die "broken grammar";
153+ }
154+}
155+
156+sub consume_value($$$){
157+ my ($self, $in, $at) = @_;
158+ my $s = substr($in, $at, 1);
159+ my @try;
160+ if($s eq "{"){
161+ @try = qw(consume_object);
162+ }elsif($s eq "["){
163+ @try = qw(consume_array);
164+ }elsif(ord(0) <= ord($s) <= ord(9) || $s eq "-"){
165+ @try = qw(consume_number consume_string);
166+ }elsif($s eq "t" || $s eq "f" || $s eq "n"){
167+ @try = qw(consume_literal consume_string);
168+ }else{
169+ @try = qw(consume_string);
170+ }
171+ for(@try){
172+ my ($bt, $value) = $self->$_($in, $at);
173+ return ($bt, $value) if(defined $bt);
174+ }
175+ ()
176+}
177+
178+sub consume_root($$$){
179+ my ($self, $in, $at) = @_;
180+ my $s = substr($in, $at, 1);
181+ my @try;
182+ if($s eq "{"){
183+ @try = qw(consume_object);
184+ }elsif($s eq "["){
185+ @try = qw(consume_array);
186+ }elsif(ord(0) <= ord($s) <= ord(1) || $s eq "-"){
187+ @try = qw(consume_root_object consume_number consume_string);
188+ }elsif($s eq "t" || $s eq "f" || $s eq "n"){
189+ @try = qw(consume_root_object consume_literal consume_string);
190+ }else{
191+ @try = qw(consume_root_object consume_string);
192+ }
193+ for(@try){
194+ my ($bt, $value) = $self->$_($in, $at);
195+ return ($bt, $value) if(defined $bt);
196+ }
197+ ()
198+}
199+
200+sub consume_literal($$$){
201+ my ($self, $in, $at) = @_;
202+ $self->{cre_literal} //= qr/\G(true|false|null)$litend/;
203+ pos($in) = $at;
204+ if($in =~ /$self->{cre_literal}/g){
205+ (pos($in), {
206+ true => $self->true,
207+ false => $self->false,
208+ null => $self->null,
209+ }->{$1})
210+ }else{()}
211+}
212+
213+sub consume_number($$$){
214+ my ($self, $in, $at) = @_;
215+ $self->{cre_number} //= qr/\G(-)?+((?:0|[1-9][0-9]*+)(?:\.[0-9]++)?+)(?:[eE]([-+]?+[0-9]++))?+$litend/;
216+
217+ pos($in) = $at;
218+ if($in =~ /$self->{cre_number}/g){
219+ my $v = $2 * 10**($3 // 0);
220+ (pos($in), $self->number($1 ? -$v : $v))
221+ }else{()}
222+}
223+
224+sub consume_string($$$){
225+ my ($self, $in, $at) = @_;
226+ $self->{cre_dquoted_string} //= qr/\G"((?:[^\0-\x1f\\"]|\\.)*+)"/;
227+ $self->{cre_squoted_string} //= qr/\G'((?:[^\0-\x1f\\']|\\.)*+)'/;
228+ $self->{cre_quoteless_string} //= qr/\G([^'"\0-\x20,:\[\]{}](?:\t|[^\0-\x1f])*)/;
229+ my $start1 = substr($in, $at, 1);
230+ if($start1 eq "'"){
231+ my $start3 = substr($in, $at, 3);
232+ if($start3 eq "'''"){
233+ pos($in) = $at;
234+ die if($in !~ /($inline*)\G/);
235+ my $column = length($1);
236+ pos($in) = $at + 3;
237+ return if($in !~ /\G($any*?)'''/g);
238+ my $str = $1;
239+ $str =~ s/\r//g;
240+ # XXX: it may not compatible with js-Hjson.
241+ # but I don't understand 'column' well.
242+ $str =~ s/(?<=\n)${inline_ws}{0,$column}//g;
243+ # XXX: it is not compatible with js-Hjson. but I think its better.
244+ $str =~ s/^$inline_ws*\n//;
245+ $str =~ s/\n\z//;
246+ (pos($in), $str)
247+ }else{
248+ pos($in) = $at;
249+ return if($in !~ /$self->{cre_squoted_string}/g);
250+ (pos($in), $self->decode_quoted_string($1))
251+ }
252+ }elsif($start1 eq '"'){
253+ pos($in) = $at;
254+ return if($in !~ /$self->{cre_dquoted_string}/g);
255+ (pos($in), $self->decode_quoted_string($1))
256+ }else{
257+ pos($in) = $at;
258+ return if($in !~ /$self->{cre_quoteless_string}/g);
259+ (pos($in), $1 =~ s/$inline_ws*+\z//r)
260+ }
261+}
262+
263+sub consume_object($$$){
264+ my ($self, $in, $at) = @_;
265+ $self->{cre_wsc} //= qr/\G$wsc/;
266+
267+ return if(substr($in, $at++, 1) ne "{");
268+ pos($in) = $at;
269+ die if($in !~ /$self->{cre_wsc}/g);
270+ ($at, my @content) = $self->consume_object_inner($in, pos($in));
271+ return if(!defined $at);
272+ pos($in) = $at;
273+ die if($in !~ /$self->{cre_wsc}/g);
274+ return if(substr($in, pos($in), 1) ne "}");
275+ (pos($in)+1, $self->object(@content))
276+}
277+
278+sub consume_root_object($$$){
279+ my ($self, $in, $at) = @_;
280+ ($at, my @content) = $self->consume_object_inner($in, $at);
281+ return if(!defined $at || !@content);
282+ ($at, $self->object(@content))
283+}
284+
285+sub consume_object_inner($$$){
286+ my ($self, $in, $at) = @_;
287+ $self->{cre_wsc} //= qr/\G$wsc/;
288+ $self->{cre_quoteless_key} //= qr/\G([^'"\0-\x20,:\[\]{}][^\0-\x20,:\[\]{}]*)/;
289+ $self->{cre_dquoted_string} //= qr/\G"((?:[^\0-\x1f\\"]|\\.)*+)"/;
290+ $self->{cre_squoted_string} //= qr/\G'((?:[^\0-\x1f\\']|\\.)*+)'/;
291+ $self->{cre_value_separator} //= qr/\G$separator/;
292+
293+ my @ret;
294+ pos($in) = $at;
295+ while(1){
296+ my $keystart = substr($in, pos($in), 1);
297+ return ($at, @ret) if($keystart eq "}" || $keystart eq "");
298+ $at = pos($in);
299+ my $key;
300+ if($keystart eq '"'){
301+ return if($in !~ /$self->{cre_dquoted_string}/g); $at = pos($in);
302+ $key = $self->decode_quoted_string($1);
303+ }elsif($keystart eq "'"){
304+ return if($in !~ /$self->{cre_squoted_string}/g); $at = pos($in);
305+ $key = $self->decode_quoted_string($1);
306+ }else{
307+ return if($in !~ /$self->{cre_quoteless_key}/g); $at = pos($in);
308+ $key = $1;
309+ }
310+
311+ pos($in) = $at; die if($in !~ /$self->{cre_wsc}/g); $at = pos($in);
312+ return if(substr($in, $at++, 1) ne ":");
313+ pos($in) = $at; die if($in !~ /$self->{cre_wsc}/g); $at = pos($in);
314+
315+ ($at, my $value) = $self->consume_value($in, $at); pos($in) = $at;
316+ return if(!defined $at);
317+
318+ push(@ret, [$key, $value]);
319+ last if($in !~ /$self->{cre_value_separator}/g); $at = pos($in);
320+ die if($in !~ /$self->{cre_wsc}/g);
321+ }
322+ ($at, @ret)
323+}
324+
325+sub consume_array($$$){
326+ my ($self, $in, $at) = @_;
327+ $self->{cre_wsc} //= qr/\G$wsc/;
328+ $self->{cre_value_separator} //= qr/\G$separator/;
329+
330+ return if(substr($in, $at++, 1) ne "[");
331+ pos($in) = $at;
332+ die if($in !~ /$self->{cre_wsc}/g); $at = pos($in);
333+
334+ my @content;
335+ while(1){
336+ my $nextstart = substr($in, pos($in), 1);
337+ if($nextstart eq "]"){
338+ return (pos($in)+1, $self->array(@content));
339+ }
340+ return if($nextstart eq "");
341+ $at = pos($in);
342+
343+ ($at, my $value) = $self->consume_value($in, $at); pos($in) = $at;
344+ return if(!defined $at);
345+
346+ push(@content, $value);
347+ if($in !~ /$self->{cre_value_separator}/g){
348+ pos($in) = $at;
349+ die if($in !~ /$self->{cre_wsc}/g);
350+ return if(substr($in, pos($in), 1) ne "]");
351+ return (pos($in)+1, $self->array(@content));
352+ }
353+ $at = pos($in);
354+ die if($in !~ /$self->{cre_wsc}/g);
355+ }
356+}
357+
358+sub consume_wsc($$$){
359+ my ($self, $in, $at) = @_;
360+ $self->{cre_wsc} //= qr/\G$wsc/;
361+ pos($in) = $at;
362+ if($in =~ /$self->{cre_wsc}/g){
363+ (pos($in), undef)
364+ }else{
365+ die "bug";
254366 }
255367 }
256368
257369 sub decode($$){
258370 my ($self, $in) = @_;
259- my ($end, $value) = $self->consume("root", $in, 0);
260- croak "Invalid Hjson string" if(!defined $end);
261- pos($in) = $end;
262- croak "Garbage after Hjson value" if($in !~ /\G$wsc\z/g);
371+ $self = bless {}, $self if(!length ref $self);
372+ my ($p,) = $self->consume_wsc($in, 0);
373+ die if(!defined $p);
374+ ($p, my $value) = $self->consume_root($in, $p);
375+ croak "Invalid Hjson string" if(!defined $p);
376+ ($p,) = $self->consume_wsc($in, $p);
377+ die if(!defined $p);
378+ croak "Garbage after Hjson value" if($p != length($in));
263379 $value
264380 }
381+
382+1;
--- /dev/null
+++ b/lib/Text/Hjson/Grammar.pm
@@ -0,0 +1,181 @@
1+package Text::Hjson::Grammar;
2+
3+use warnings;
4+use strict;
5+use feature qw(postderef);
6+
7+our $ws = qr/[\t\r\n ]/;
8+our $inline_ws = qr/[\t\r ]/;
9+our $inline = qr/$inline_ws|[^\0-\x20]/;
10+our $any = qr/$ws|[^\0-\x20]/;
11+our $line_comment = qr/(?:#|\/\/)$inline*+/;
12+our $block_comment = qr/\/\*(?:[^*]|\*(?!\/))*+\*\//;
13+our $comment = qr/$line_comment|$block_comment/;
14+our $wsc = qr/(?:$ws|$comment)*+/;
15+our $litend = qr/(?![ \t]*+[^\0-\x20#,\/\[\]{}])/;
16+our $separator = qr/$wsc,|(?:$inline_ws|$comment)*+\n/;
17+our $nonpunct = qr/[^\0-\x20,:\[\]{}]/;
18+
19+sub literal{
20+ re => qr/$wsc(true|false|null)$litend/,
21+ act => sub{
22+ my ($self, $match) = @_;
23+ {
24+ true => $self->true,
25+ false => $self->false,
26+ null => $self->null,
27+ }->{$match->[0]}
28+ }
29+}
30+sub separator{re => qr/$separator/}
31+sub optional_sep{re => qr/$separator?/}
32+# XXX: not docuemnted, but tests say quoteless string
33+# starting with quote is not allowed.
34+sub quoteless_guard{re => qr/$wsc(?!["'])/}
35+sub value{
36+ any => [qw(literal object array number string)],
37+ act => sub{$_[2]}
38+}
39+sub root{
40+ any => [qw(object_member_arr value)],
41+ act => sub{$_[2]}
42+}
43+sub object{
44+ seq => [qw(begin_object object_inner end_object)],
45+ act => sub{$_[2]}
46+}
47+sub begin_object{re => qr/$wsc\{/}
48+sub end_object{re => qr/$wsc\}/}
49+sub object_inner{
50+ opt => "object_member_arr",
51+ act => sub{
52+ my ($self, $match) = @_;
53+ $match // $self->object
54+ }
55+}
56+sub object_member_arr{
57+ seq => [qw(object_member object_members optional_sep)],
58+ act => sub{
59+ my ($self, $first, $left) = @_;
60+ $self->object($first, @$left)
61+ }
62+}
63+sub object_member{
64+ seq => [qw(object_key object_key_sep value)],
65+ act => sub{
66+ my ($self, $key, $sep, $val) = @_;
67+ [$key, $val]
68+ }
69+}
70+sub object_members{
71+ many => "separated_object_member",
72+ act => sub{shift; [@_]}
73+}
74+sub separated_object_member{
75+ seq => [qw(separator object_member)],
76+ act => sub{$_[2]}
77+}
78+sub object_key{
79+ any => [qw(string_quoted object_key_noquote)],
80+ act => sub{$_[2]}
81+}
82+sub object_key_noquote{
83+ seq => [qw(quoteless_guard object_key_noquote_body)],
84+ act => sub{$_[2]}
85+}
86+sub object_key_noquote_body{
87+ re => qr/($nonpunct+)/,
88+ act => sub{$_[1][0]}
89+}
90+sub wsc{re => qr/$wsc/}
91+sub object_key_sep{re => qr/$wsc:/}
92+sub array{
93+ seq => [qw(begin_array array_inner end_array)],
94+ act => sub{$_[2]}
95+}
96+sub begin_array{re => qr/$wsc\[/}
97+sub end_array{re => qr/$wsc\]/}
98+sub array_inner{
99+ opt => "array_member_arr",
100+ act => sub{
101+ my ($self, $match) = @_;
102+ $match // $self->array;
103+ }
104+}
105+sub array_member_arr{
106+ seq => [qw(value array_members optional_sep)],
107+ act => sub{
108+ my ($self, $first, $left) = @_;
109+ $self->array($first, @$left)
110+ }
111+}
112+sub array_members{
113+ many => "separated_array_member",
114+ act => sub{shift; [@_]}
115+}
116+sub separated_array_member{
117+ seq => [qw(separator value)],
118+ act => sub{$_[2]}
119+}
120+sub number{
121+ re => qr/$wsc(-)?+((?:0|[1-9][0-9]*+)(?:\.[0-9]++)?+)(?:[eE]([-+]?+[0-9]++))?+$litend/,
122+ act => sub{
123+ my ($self, $match) = @_;
124+ my $v = $match->[1] * 10**($match->[2] // 0);
125+ $match->[0] ? -$v : $v
126+ }
127+}
128+sub string{
129+ any => [qw(string_multiline string_quoted string_noquote)],
130+ act => sub{$_[2]}
131+}
132+# XXX: not in RFC but quote by ' is allowed.
133+sub string_quoted{
134+ re => qr/$wsc(["'])((?:(?!\g{1})[^\0-\x1f\\]|\\.)*+)\g{1}/,
135+ act => sub{
136+ my ($self, $match) = @_;
137+ $self->string($self->decode_quoted_string($match->[1]))
138+ #$match->[1] =~ s{\\(u(.{4})|.)}{
139+ # {
140+ # '"' => '"',
141+ # "'" => "'",
142+ # "\\" => "\\",
143+ # "/" => "/",
144+ # "b" => "\b",
145+ # "f" => "\f",
146+ # "n" => "\n",
147+ # "r" => "\r",
148+ # "t" => "\t",
149+ # }->{$1} // chr(hex($2))
150+ #}ger;
151+ }
152+}
153+sub string_noquote{
154+ seq => [qw(quoteless_guard string_noquote_body)],
155+ act => sub{$_[2]}
156+}
157+sub string_noquote_body{
158+ re => qr/(${nonpunct}(?:\t|[^\0-\x1f])*)/,
159+ act => sub{
160+ my ($self, $match) = @_;
161+ $match->[0] =~ s/$inline_ws*+\z//r;
162+ }
163+}
164+sub string_multiline{
165+ re => qr/($wsc)'''((?:(?!''')$any)*+)'''/,
166+ act => sub{
167+ my ($self, $match) = @_;
168+ my $indent = length($match->[0] =~ s/.*\n//r);
169+ my $s = $match->[1] =~ s/\r//r;
170+
171+ # XXX: What is "column"
172+ $s =~ s/(?<=\n)${inline_ws}{0,$indent}//g;
173+ # XXX: RFC says: Whitespace on the first line is ignored.
174+ # my understanding: first line is ignored if it contains only ws.
175+ $s =~ s/^$inline_ws*\n//;
176+
177+ $s =~ s/\n\z//r
178+ }
179+}
180+
181+1;
--- /dev/null
+++ b/t/additional_string.t
@@ -0,0 +1,61 @@
1+#!/usr/bin/perl
2+
3+use warnings;
4+use strict;
5+use utf8;
6+use Test2::V0;
7+use Text::Hjson;
8+use Data::Dumper;
9+
10+my $v;
11+
12+# quoteless strings with quotation marks
13+eval{Text::Hjson->decode("{v: 'invalid\n}")};
14+ok($@, "quoteless string starting with single quote");
15+eval{Text::Hjson->decode("{v: \"invalid\n}")};
16+ok($@, "quoteless string starting with double quote");
17+$v = eval{Text::Hjson->decode("{v: valid\"\n}")};
18+is($v, {v => 'valid"'}, "quoteless string with quote at the end");
19+
20+# quoteless object keys with quotation marks
21+eval{Text::Hjson->decode("{'invalid: v\n}")};
22+ok($@, "quoteless object key starting with single quote");
23+eval{Text::Hjson->decode("{\"invalid: v\n}")};
24+ok($@, "quoteless object key starting with double quote");
25+$v = eval{Text::Hjson->decode("{valid\": v\n}")};
26+is($v, {'valid"' => "v"}, "quoteless object key with quote at the end");
27+
28+# invalid escape sequence
29+eval{Text::Hjson->decode('"\z"')};
30+ok($@, "invalid sequence \\z");
31+eval{Text::Hjson->decode('"\!"')};
32+ok($@, "invalid sequence \\!");
33+
34+# surrogates
35+eval{Text::Hjson->decode('"\ud835"')};
36+ok($@, "only high surrogate");
37+eval{Text::Hjson->decode('"\ud835!"')};
38+ok($@, "high surrogate before non-surrogate");
39+eval{Text::Hjson->decode('"\ud835\u003f"')};
40+ok($@, "high surrogate before non-surrogate \\u sequence");
41+eval{Text::Hjson->decode('"HH:\ud835\ud835"')};
42+ok($@, "repeating high surrogate");
43+eval{Text::Hjson->decode('"L:\udc2c"')};
44+ok($@, "only low surrogate");
45+eval{Text::Hjson->decode('"L:\udc2c\udc2c"')};
46+ok($@, "repeating low surrogate");
47+eval{Text::Hjson->decode('"L:\udc2c\ud835\udc2c"')};
48+ok($@, "low surrogate before high surrogate");
49+eval{Text::Hjson->decode("'\\ud835\x{dc2c}'")};
50+ok($@, "\\u encode only a half of a surrogate pair");
51+$v = eval{Text::Hjson->decode('"pairs: \ud835\udc12\ud835\udc2e\ud835\udc2b\ud835\udc2b\ud835\udc28\ud835\udc20\ud835\udc1a\ud835\udc2d\ud835\udc1e\ud835\udc2c!!"')};
52+print($@);
53+is($v, "pairs: 𝐒𝐮𝐫𝐫𝐨𝐠𝐚𝐭𝐞𝐬!!", "Correct surrogate pairs");
54+
55+# CR
56+$v = eval{Text::Hjson->decode(" '''abc\r\nde\rf\n\rghi\rjkl\n\n'''")};
57+is($v, "abc\ndef\nghijkl\n", "\\r in multiline string");
58+$v = eval{Text::Hjson->decode("abc\r\n")};
59+is($v, "abc", "\\r at the end of a quoteless string");
60+
61+done_testing
--- /dev/null
+++ b/t/empty_root_object.t
@@ -0,0 +1,15 @@
1+#!/usr/bin/perl
2+
3+use warnings;
4+use strict;
5+use utf8;
6+use Test2::V0;
7+use Text::Hjson;
8+
9+eval{Text::Hjson->decode("")};
10+ok($@, "empty toplevel object");
11+
12+eval{Text::Hjson->decode("#test\n")};
13+ok($@, "only a comment");
14+
15+done_testing;