Text-Hjson-perl
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revision	376428dbc496a40f77de92078a24ef954d49832f (tree)
Time	2023-04-03 02:42:49
Author	dyknon <dyknon@user...>
Commiter	dyknon

Log Message

Optimized

Change Summary

Incremental Difference

--- /dev/null

+++ b/Makefile

		@@ -0,0 +1,4 @@
	1	+test:
	2	+ perl -mlib=lib t/empty_root_object.t
	3	+ perl -mlib=lib t/additional_string.t
	4	+ perl -mlib=lib t/hjson_official.t

--- a/lib/Text/Hjson.pm

+++ b/lib/Text/Hjson.pm

		@@ -4,6 +4,9 @@ use warnings;
4	4	use strict;
5	5	use feature qw(postderef);
6	6	use Carp;
	7	+use Text::Hjson::Grammar;
	8	+
	9	+our $grammar = "Text::Hjson::Grammar";
7	10
8	11	our $ws = qr/[\t\r\n ]/;
9	12	our $inline_ws = qr/[\t\r ]/;

		@@ -29,11 +32,11 @@ sub null($){
29	32	my ($self) = @_;
30	33	undef
31	34	}
32		-sub array(@){
	35	+sub array($@){
33	36	my ($self, @inner) = @_;
34	37	\@inner
35	38	}
36		-sub object(@){
	39	+sub object($@){
37	40	my ($self, @inner) = @_;
38	41	my %ret;
39	42	for(@inner){

		@@ -41,176 +44,72 @@ sub object(@){
41	44	}
42	45	\%ret
43	46	}
	47	+sub string($$){
	48	+ my ($self, $str) = @_;
	49	+ $str
	50	+}
	51	+sub number($$){
	52	+ my ($self, $num) = @_;
	53	+ $num
	54	+}
	55	+sub decode_quoted_string($$){
	56	+ my ($self, $str) = @_;
44	57
45		-our %element = (
46		- literal => {
47		- re => qr/$wsc(true\|false\|null)$litend/,
48		- act => sub{
49		- my ($self, $match) = @_;
50		- {
51		- true => $self->true,
52		- false => $self->false,
53		- null => $self->null,
54		- }->{$match->[0]}
55		- }
56		- },
57		- separator => {re => qr/$separator/},
58		- optional_sep => {re => qr/$separator?/},
59		- # XXX: not docuemnted, but tests say quoteless string
60		- # starting with quote is not allowed.
61		- quoteless_guard => {re => qr/$wsc(?!["'])/},
62		- value => {
63		- any => [qw(literal object array number string)],
64		- act => sub{$_[2]}
65		- },
66		- root => {
67		- any => [qw(object_member_arr value)],
68		- act => sub{$_[2]}
69		- },
70		- object => {
71		- seq => [qw(begin_object object_inner end_object)],
72		- act => sub{$_[2]}
73		- },
74		- begin_object => {re => qr/$wsc\{/},
75		- end_object => {re => qr/$wsc\}/},
76		- object_inner => {
77		- opt => "object_member_arr",
78		- act => sub{
79		- my ($self, $match) = @_;
80		- $match // $self->object;
81		- }
82		- },
83		- object_member_arr => {
84		- seq => [qw(object_member object_members optional_sep)],
85		- act => sub{
86		- my ($self, $first, $left) = @_;
87		- $self->object($first, @$left)
88		- }
89		- },
90		- object_member => {
91		- seq => [qw(object_key object_key_sep value)],
92		- act => sub{
93		- my ($self, $key, $sep, $val) = @_;
94		- [$key, $val]
95		- }
96		- },
97		- object_members => {
98		- many => "separated_object_member",
99		- act => sub{shift; [@_]}
100		- },
101		- separated_object_member => {
102		- seq => [qw(separator object_member)],
103		- act => sub{$_[2]}
104		- },
105		- object_key => {
106		- any => [qw(string_quoted object_key_noquote)],
107		- act => sub{$_[2]}
108		- },
109		- object_key_noquote => {
110		- seq => [qw(quoteless_guard object_key_noquote_body)],
111		- act => sub{$_[2]}
112		- },
113		- object_key_noquote_body => {
114		- re => qr/($nonpunct+)/,
115		- act => sub{$_[1][0]}
116		- },
117		- object_key_sep => {re => qr/$wsc:/},
118		- array => {
119		- seq => [qw(begin_array array_inner end_array)],
120		- act => sub{$_[2]}
121		- },
122		- begin_array => {re => qr/$wsc\[/},
123		- end_array => {re => qr/$wsc\]/},
124		- array_inner => {
125		- opt => "array_member_arr",
126		- act => sub{
127		- my ($self, $match) = @_;
128		- $match // $self->array;
129		- }
130		- },
131		- array_member_arr => {
132		- seq => [qw(value array_members optional_sep)],
133		- act => sub{
134		- my ($self, $first, $left) = @_;
135		- $self->array($first, @$left)
136		- }
137		- },
138		- array_members => {
139		- many => "separated_array_member",
140		- act => sub{shift; [@_]}
141		- },
142		- separated_array_member => {
143		- seq => [qw(separator value)],
144		- act => sub{$_[2]}
145		- },
146		- number => {
147		- re => qr/$wsc(-)?+((?:0\|[1-9][0-9]*+)(?:\.[0-9]++)?+)(?:[eE]([-+]?+[0-9]++))?+$litend/,
148		- act => sub{
149		- my ($self, $match) = @_;
150		- my $v = $match->[1] * 10**($match->[2] // 0);
151		- $match->[0] ? -$v : $v
152		- }
153		- },
154		- string => {
155		- any => [qw(string_multiline string_quoted string_noquote)],
156		- act => sub{$_[2]}
157		- },
158		- # XXX: not in RFC but quote by ' is allowed.
159		- string_quoted => {
160		- re => qr/$wsc(["'])((?:(?!\g{1})[^\0-\x1f\\]\|\\["'\\\/bfnrt]\|\\u[0-9A-Fa-f]{4})*+)\g{1}/,
161		- act => sub{
162		- my ($self, $match) = @_;
163		- $match->[1] =~ s{\\(u(.{4})\|.)}{
164		- {
165		- '"' => '"',
166		- "'" => "'",
167		- "\\" => "\\",
168		- "/" => "/",
169		- "b" => "\b",
170		- "f" => "\f",
171		- "n" => "\n",
172		- "r" => "\r",
173		- "t" => "\t",
174		- }->{$1} // chr(hex($2))
175		- }ger;
176		- }
177		- },
178		- string_noquote => {
179		- seq => [qw(quoteless_guard string_noquote_body)],
180		- act => sub{$_[2]}
181		- },
182		- string_noquote_body => {
183		- re => qr/(${nonpunct}(?:\t\|[^\0-\x1f])*)/,
184		- act => sub{
185		- my ($self, $match) = @_;
186		- $match->[0] =~ s/$inline_ws*+\z//r;
	58	+ # process \uxxxx sequences
	59	+ my $newstr = "";
	60	+ my $pos = 0;
	61	+ while($str =~ /\\u/g){
	62	+ $newstr .= substr($str, $pos, $-[0] - $pos);
	63	+ my $start_hex = $+[0];
	64	+ my $hex = substr($str, $start_hex, 4);
	65	+ die "imcomplete \\u sequence" if(4 != length $hex);
	66	+ die "non-hex char in \\u sequence" if($hex =~ /[^0-9A-Fa-f]/);
	67	+ my $code = hex($hex);
	68	+ die "low surrogate prefixed by no high surrogate in \\u sequence"
	69	+ if(0xdc00 <= $code < 0xe000);
	70	+ if(0xd800 <= $code < 0xdc00){
	71	+ die "high surrogate suffixed by no \\u in \\u sequence"
	72	+ if(substr($str, $start_hex + 4, 2) ne "\\u");
	73	+ my $hex = substr($str, $start_hex + 6, 4);
	74	+ die "imcomplete \\u sequence" if(4 != length $hex);
	75	+ die "non-hex char in \\u sequence" if($hex =~ /[^0-9A-Fa-f]/);
	76	+ my $lowcode = hex($hex);
	77	+ die "high surrogate suffixed by no low surrogate in \\u sequence"
	78	+ unless(0xdc00 <= $lowcode < 0xe000);
	79	+ $newstr .= chr((($code & 0x3ff) + 0x40) << 10 \| $lowcode & 0x3ff);
	80	+ pos($str) = $pos = $start_hex + 10;
	81	+ }else{
	82	+ $newstr .= chr($code);
	83	+ pos($str) = $pos = $start_hex + 4;
187	84	}
188		- },
189		- string_multiline => {
190		- re => qr/($wsc)'''((?:(?!''')$any)*+)'''/,
191		- act => sub{
192		- my ($self, $match) = @_;
193		- my $indent = length($match->[0] =~ s/.*\n//r);
194		- my $s = $match->[1] =~ s/\r//r;
	85	+ }
	86	+ $str = $newstr . substr($str, $pos) if(length $newstr);
195	87
196		- # XXX: What is "column"
197		- $s =~ s/(?<=\n)${inline_ws}{0,$indent}//g;
198		- # XXX: RFC says: Whitespace on the first line is ignored.
199		- # my understanding: first line is ignored if it contains only ws.
200		- $s =~ s/^$inline_ws*\n//;
	88	+ # process other escape sequences
	89	+ my %escape = (
	90	+ '"' => '"',
	91	+ "'" => "'",
	92	+ "\\" => "\\",
	93	+ "/" => "/",
	94	+ "b" => "\b",
	95	+ "f" => "\f",
	96	+ "n" => "\n",
	97	+ "r" => "\r",
	98	+ "t" => "\t",
	99	+ );
	100	+ $str =~ s{\\(.)}{$escape{$1} // die "invalid escape sequence \\$1";}eg;
201	101
202		- $s =~ s/\n\z//r
203		- }
204		- },
205		-);
	102	+ $str
	103	+}
206	104
207	105	sub consume($$$$){
208	106	my ($self, $what, $in, $at) = @_;
209		- my $act = sub{scalar ($element{$what}{act} // sub{})->($self, @_)};
210		- croak "unknown consume type: $what" if(!$element{$what});
211		- if($element{$what}{re}){
	107	+ croak "unknown grammar: $what" if(!$grammar->can($what));
	108	+ my %g = $grammar->$what();
	109	+ my $act = sub{scalar ($g{act} // sub{})->($self, @_)};
	110	+ if($g{re}){
212	111	pos($in) = $at;
213		- if($in =~ /\G$element{$what}{re}/g){
	112	+ if($in =~ /\G$g{re}/g){
214	113	(
215	114	pos($in),
216	115	$act->([@{^CAPTURE}])

		@@ -218,47 +117,266 @@ sub consume($$$$){
218	117	}else{
219	118	()
220	119	}
221		- }elsif($element{$what}{seq}){
	120	+ }elsif($g{seq}){
222	121	my @got;
223		- for($element{$what}{seq}->@*){
	122	+ for($g{seq}->@*){
224	123	($at, my $read) = $self->consume($_, $in, $at);
225	124	return () if(!defined $at);
226	125	push @got, $read;
227	126	}
228	127	($at, $act->(@got))
229		- }elsif($element{$what}{any}){
	128	+ }elsif($g{any}){
230	129	my ($i, $bt, $read);
231		- for $i(0 .. $element{$what}{any}->$#*){
232		- ($bt, $read) = $self->consume($element{$what}{any}[$i], $in, $at);
	130	+ for $i(0 .. $g{any}->$#*){
	131	+ ($bt, $read) = $self->consume($g{any}[$i], $in, $at);
233	132	if(defined $bt){
234	133	return ($bt, $act->($i, $read));
235	134	}
236	135	}
237	136	()
238		- }elsif($element{$what}{many}){
	137	+ }elsif($g{many}){
239	138	my $bt;
240	139	my @got;
241	140	while(1){
242		- ($bt, my $read) = $self->consume($element{$what}{many}, $in, $at);
	141	+ ($bt, my $read) = $self->consume($g{many}, $in, $at);
243	142	last if(!defined $bt);
244	143	die "infinite loop detected in $what" if($at == $bt);
245	144	$at = $bt;
246	145	push @got, $read;
247	146	}
248	147	($at, $act->(@got))
249		- }elsif($element{$what}{opt}){
250		- my ($bt, $read) = $self->consume($element{$what}{opt}, $in, $at);
	148	+ }elsif($g{opt}){
	149	+ my ($bt, $read) = $self->consume($g{opt}, $in, $at);
251	150	($bt//$at, $act->($read))
252	151	}else{
253		- die "what is $what";
	152	+ die "broken grammar";
	153	+ }
	154	+}
	155	+
	156	+sub consume_value($$$){
	157	+ my ($self, $in, $at) = @_;
	158	+ my $s = substr($in, $at, 1);
	159	+ my @try;
	160	+ if($s eq "{"){
	161	+ @try = qw(consume_object);
	162	+ }elsif($s eq "["){
	163	+ @try = qw(consume_array);
	164	+ }elsif(ord(0) <= ord($s) <= ord(9) \|\| $s eq "-"){
	165	+ @try = qw(consume_number consume_string);
	166	+ }elsif($s eq "t" \|\| $s eq "f" \|\| $s eq "n"){
	167	+ @try = qw(consume_literal consume_string);
	168	+ }else{
	169	+ @try = qw(consume_string);
	170	+ }
	171	+ for(@try){
	172	+ my ($bt, $value) = $self->$_($in, $at);
	173	+ return ($bt, $value) if(defined $bt);
	174	+ }
	175	+ ()
	176	+}
	177	+
	178	+sub consume_root($$$){
	179	+ my ($self, $in, $at) = @_;
	180	+ my $s = substr($in, $at, 1);
	181	+ my @try;
	182	+ if($s eq "{"){
	183	+ @try = qw(consume_object);
	184	+ }elsif($s eq "["){
	185	+ @try = qw(consume_array);
	186	+ }elsif(ord(0) <= ord($s) <= ord(1) \|\| $s eq "-"){
	187	+ @try = qw(consume_root_object consume_number consume_string);
	188	+ }elsif($s eq "t" \|\| $s eq "f" \|\| $s eq "n"){
	189	+ @try = qw(consume_root_object consume_literal consume_string);
	190	+ }else{
	191	+ @try = qw(consume_root_object consume_string);
	192	+ }
	193	+ for(@try){
	194	+ my ($bt, $value) = $self->$_($in, $at);
	195	+ return ($bt, $value) if(defined $bt);
	196	+ }
	197	+ ()
	198	+}
	199	+
	200	+sub consume_literal($$$){
	201	+ my ($self, $in, $at) = @_;
	202	+ $self->{cre_literal} //= qr/\G(true\|false\|null)$litend/;
	203	+ pos($in) = $at;
	204	+ if($in =~ /$self->{cre_literal}/g){
	205	+ (pos($in), {
	206	+ true => $self->true,
	207	+ false => $self->false,
	208	+ null => $self->null,
	209	+ }->{$1})
	210	+ }else{()}
	211	+}
	212	+
	213	+sub consume_number($$$){
	214	+ my ($self, $in, $at) = @_;
	215	+ $self->{cre_number} //= qr/\G(-)?+((?:0\|[1-9][0-9]*+)(?:\.[0-9]++)?+)(?:[eE]([-+]?+[0-9]++))?+$litend/;
	216	+
	217	+ pos($in) = $at;
	218	+ if($in =~ /$self->{cre_number}/g){
	219	+ my $v = $2 * 10**($3 // 0);
	220	+ (pos($in), $self->number($1 ? -$v : $v))
	221	+ }else{()}
	222	+}
	223	+
	224	+sub consume_string($$$){
	225	+ my ($self, $in, $at) = @_;
	226	+ $self->{cre_dquoted_string} //= qr/\G"((?:[^\0-\x1f\\"]\|\\.)*+)"/;
	227	+ $self->{cre_squoted_string} //= qr/\G'((?:[^\0-\x1f\\']\|\\.)*+)'/;
	228	+ $self->{cre_quoteless_string} //= qr/\G([^'"\0-\x20,:\[\]{}](?:\t\|[^\0-\x1f])*)/;
	229	+ my $start1 = substr($in, $at, 1);
	230	+ if($start1 eq "'"){
	231	+ my $start3 = substr($in, $at, 3);
	232	+ if($start3 eq "'''"){
	233	+ pos($in) = $at;
	234	+ die if($in !~ /($inline*)\G/);
	235	+ my $column = length($1);
	236	+ pos($in) = $at + 3;
	237	+ return if($in !~ /\G($any*?)'''/g);
	238	+ my $str = $1;
	239	+ $str =~ s/\r//g;
	240	+ # XXX: it may not compatible with js-Hjson.
	241	+ # but I don't understand 'column' well.
	242	+ $str =~ s/(?<=\n)${inline_ws}{0,$column}//g;
	243	+ # XXX: it is not compatible with js-Hjson. but I think its better.
	244	+ $str =~ s/^$inline_ws*\n//;
	245	+ $str =~ s/\n\z//;
	246	+ (pos($in), $str)
	247	+ }else{
	248	+ pos($in) = $at;
	249	+ return if($in !~ /$self->{cre_squoted_string}/g);
	250	+ (pos($in), $self->decode_quoted_string($1))
	251	+ }
	252	+ }elsif($start1 eq '"'){
	253	+ pos($in) = $at;
	254	+ return if($in !~ /$self->{cre_dquoted_string}/g);
	255	+ (pos($in), $self->decode_quoted_string($1))
	256	+ }else{
	257	+ pos($in) = $at;
	258	+ return if($in !~ /$self->{cre_quoteless_string}/g);
	259	+ (pos($in), $1 =~ s/$inline_ws*+\z//r)
	260	+ }
	261	+}
	262	+
	263	+sub consume_object($$$){
	264	+ my ($self, $in, $at) = @_;
	265	+ $self->{cre_wsc} //= qr/\G$wsc/;
	266	+
	267	+ return if(substr($in, $at++, 1) ne "{");
	268	+ pos($in) = $at;
	269	+ die if($in !~ /$self->{cre_wsc}/g);
	270	+ ($at, my @content) = $self->consume_object_inner($in, pos($in));
	271	+ return if(!defined $at);
	272	+ pos($in) = $at;
	273	+ die if($in !~ /$self->{cre_wsc}/g);
	274	+ return if(substr($in, pos($in), 1) ne "}");
	275	+ (pos($in)+1, $self->object(@content))
	276	+}
	277	+
	278	+sub consume_root_object($$$){
	279	+ my ($self, $in, $at) = @_;
	280	+ ($at, my @content) = $self->consume_object_inner($in, $at);
	281	+ return if(!defined $at \|\| !@content);
	282	+ ($at, $self->object(@content))
	283	+}
	284	+
	285	+sub consume_object_inner($$$){
	286	+ my ($self, $in, $at) = @_;
	287	+ $self->{cre_wsc} //= qr/\G$wsc/;
	288	+ $self->{cre_quoteless_key} //= qr/\G([^'"\0-\x20,:\[\]{}][^\0-\x20,:\[\]{}]*)/;
	289	+ $self->{cre_dquoted_string} //= qr/\G"((?:[^\0-\x1f\\"]\|\\.)*+)"/;
	290	+ $self->{cre_squoted_string} //= qr/\G'((?:[^\0-\x1f\\']\|\\.)*+)'/;
	291	+ $self->{cre_value_separator} //= qr/\G$separator/;
	292	+
	293	+ my @ret;
	294	+ pos($in) = $at;
	295	+ while(1){
	296	+ my $keystart = substr($in, pos($in), 1);
	297	+ return ($at, @ret) if($keystart eq "}" \|\| $keystart eq "");
	298	+ $at = pos($in);
	299	+ my $key;
	300	+ if($keystart eq '"'){
	301	+ return if($in !~ /$self->{cre_dquoted_string}/g); $at = pos($in);
	302	+ $key = $self->decode_quoted_string($1);
	303	+ }elsif($keystart eq "'"){
	304	+ return if($in !~ /$self->{cre_squoted_string}/g); $at = pos($in);
	305	+ $key = $self->decode_quoted_string($1);
	306	+ }else{
	307	+ return if($in !~ /$self->{cre_quoteless_key}/g); $at = pos($in);
	308	+ $key = $1;
	309	+ }
	310	+
	311	+ pos($in) = $at; die if($in !~ /$self->{cre_wsc}/g); $at = pos($in);
	312	+ return if(substr($in, $at++, 1) ne ":");
	313	+ pos($in) = $at; die if($in !~ /$self->{cre_wsc}/g); $at = pos($in);
	314	+
	315	+ ($at, my $value) = $self->consume_value($in, $at); pos($in) = $at;
	316	+ return if(!defined $at);
	317	+
	318	+ push(@ret, [$key, $value]);
	319	+ last if($in !~ /$self->{cre_value_separator}/g); $at = pos($in);
	320	+ die if($in !~ /$self->{cre_wsc}/g);
	321	+ }
	322	+ ($at, @ret)
	323	+}
	324	+
	325	+sub consume_array($$$){
	326	+ my ($self, $in, $at) = @_;
	327	+ $self->{cre_wsc} //= qr/\G$wsc/;
	328	+ $self->{cre_value_separator} //= qr/\G$separator/;
	329	+
	330	+ return if(substr($in, $at++, 1) ne "[");
	331	+ pos($in) = $at;
	332	+ die if($in !~ /$self->{cre_wsc}/g); $at = pos($in);
	333	+
	334	+ my @content;
	335	+ while(1){
	336	+ my $nextstart = substr($in, pos($in), 1);
	337	+ if($nextstart eq "]"){
	338	+ return (pos($in)+1, $self->array(@content));
	339	+ }
	340	+ return if($nextstart eq "");
	341	+ $at = pos($in);
	342	+
	343	+ ($at, my $value) = $self->consume_value($in, $at); pos($in) = $at;
	344	+ return if(!defined $at);
	345	+
	346	+ push(@content, $value);
	347	+ if($in !~ /$self->{cre_value_separator}/g){
	348	+ pos($in) = $at;
	349	+ die if($in !~ /$self->{cre_wsc}/g);
	350	+ return if(substr($in, pos($in), 1) ne "]");
	351	+ return (pos($in)+1, $self->array(@content));
	352	+ }
	353	+ $at = pos($in);
	354	+ die if($in !~ /$self->{cre_wsc}/g);
	355	+ }
	356	+}
	357	+
	358	+sub consume_wsc($$$){
	359	+ my ($self, $in, $at) = @_;
	360	+ $self->{cre_wsc} //= qr/\G$wsc/;
	361	+ pos($in) = $at;
	362	+ if($in =~ /$self->{cre_wsc}/g){
	363	+ (pos($in), undef)
	364	+ }else{
	365	+ die "bug";
254	366	}
255	367	}
256	368
257	369	sub decode($$){
258	370	my ($self, $in) = @_;
259		- my ($end, $value) = $self->consume("root", $in, 0);
260		- croak "Invalid Hjson string" if(!defined $end);
261		- pos($in) = $end;
262		- croak "Garbage after Hjson value" if($in !~ /\G$wsc\z/g);
	371	+ $self = bless {}, $self if(!length ref $self);
	372	+ my ($p,) = $self->consume_wsc($in, 0);
	373	+ die if(!defined $p);
	374	+ ($p, my $value) = $self->consume_root($in, $p);
	375	+ croak "Invalid Hjson string" if(!defined $p);
	376	+ ($p,) = $self->consume_wsc($in, $p);
	377	+ die if(!defined $p);
	378	+ croak "Garbage after Hjson value" if($p != length($in));
263	379	$value
264	380	}
	381	+
	382	+1;

--- /dev/null

+++ b/lib/Text/Hjson/Grammar.pm

		@@ -0,0 +1,181 @@
	1	+package Text::Hjson::Grammar;
	2	+
	3	+use warnings;
	4	+use strict;
	5	+use feature qw(postderef);
	6	+
	7	+our $ws = qr/[\t\r\n ]/;
	8	+our $inline_ws = qr/[\t\r ]/;
	9	+our $inline = qr/$inline_ws\|[^\0-\x20]/;
	10	+our $any = qr/$ws\|[^\0-\x20]/;
	11	+our $line_comment = qr/(?:#\|\/\/)$inline*+/;
	12	+our $block_comment = qr/\/\(?:[^]\|\(?!\/))+\*\//;
	13	+our $comment = qr/$line_comment\|$block_comment/;
	14	+our $wsc = qr/(?:$ws\|$comment)*+/;
	15	+our $litend = qr/(?![ \t]*+[^\0-\x20#,\/\[\]{}])/;
	16	+our $separator = qr/$wsc,\|(?:$inline_ws\|$comment)*+\n/;
	17	+our $nonpunct = qr/[^\0-\x20,:\[\]{}]/;
	18	+
	19	+sub literal{
	20	+ re => qr/$wsc(true\|false\|null)$litend/,
	21	+ act => sub{
	22	+ my ($self, $match) = @_;
	23	+ {
	24	+ true => $self->true,
	25	+ false => $self->false,
	26	+ null => $self->null,
	27	+ }->{$match->[0]}
	28	+ }
	29	+}
	30	+sub separator{re => qr/$separator/}
	31	+sub optional_sep{re => qr/$separator?/}
	32	+# XXX: not docuemnted, but tests say quoteless string
	33	+# starting with quote is not allowed.
	34	+sub quoteless_guard{re => qr/$wsc(?!["'])/}
	35	+sub value{
	36	+ any => [qw(literal object array number string)],
	37	+ act => sub{$_[2]}
	38	+}
	39	+sub root{
	40	+ any => [qw(object_member_arr value)],
	41	+ act => sub{$_[2]}
	42	+}
	43	+sub object{
	44	+ seq => [qw(begin_object object_inner end_object)],
	45	+ act => sub{$_[2]}
	46	+}
	47	+sub begin_object{re => qr/$wsc\{/}
	48	+sub end_object{re => qr/$wsc\}/}
	49	+sub object_inner{
	50	+ opt => "object_member_arr",
	51	+ act => sub{
	52	+ my ($self, $match) = @_;
	53	+ $match // $self->object
	54	+ }
	55	+}
	56	+sub object_member_arr{
	57	+ seq => [qw(object_member object_members optional_sep)],
	58	+ act => sub{
	59	+ my ($self, $first, $left) = @_;
	60	+ $self->object($first, @$left)
	61	+ }
	62	+}
	63	+sub object_member{
	64	+ seq => [qw(object_key object_key_sep value)],
	65	+ act => sub{
	66	+ my ($self, $key, $sep, $val) = @_;
	67	+ [$key, $val]
	68	+ }
	69	+}
	70	+sub object_members{
	71	+ many => "separated_object_member",
	72	+ act => sub{shift; [@_]}
	73	+}
	74	+sub separated_object_member{
	75	+ seq => [qw(separator object_member)],
	76	+ act => sub{$_[2]}
	77	+}
	78	+sub object_key{
	79	+ any => [qw(string_quoted object_key_noquote)],
	80	+ act => sub{$_[2]}
	81	+}
	82	+sub object_key_noquote{
	83	+ seq => [qw(quoteless_guard object_key_noquote_body)],
	84	+ act => sub{$_[2]}
	85	+}
	86	+sub object_key_noquote_body{
	87	+ re => qr/($nonpunct+)/,
	88	+ act => sub{$_[1][0]}
	89	+}
	90	+sub wsc{re => qr/$wsc/}
	91	+sub object_key_sep{re => qr/$wsc:/}
	92	+sub array{
	93	+ seq => [qw(begin_array array_inner end_array)],
	94	+ act => sub{$_[2]}
	95	+}
	96	+sub begin_array{re => qr/$wsc\[/}
	97	+sub end_array{re => qr/$wsc\]/}
	98	+sub array_inner{
	99	+ opt => "array_member_arr",
	100	+ act => sub{
	101	+ my ($self, $match) = @_;
	102	+ $match // $self->array;
	103	+ }
	104	+}
	105	+sub array_member_arr{
	106	+ seq => [qw(value array_members optional_sep)],
	107	+ act => sub{
	108	+ my ($self, $first, $left) = @_;
	109	+ $self->array($first, @$left)
	110	+ }
	111	+}
	112	+sub array_members{
	113	+ many => "separated_array_member",
	114	+ act => sub{shift; [@_]}
	115	+}
	116	+sub separated_array_member{
	117	+ seq => [qw(separator value)],
	118	+ act => sub{$_[2]}
	119	+}
	120	+sub number{
	121	+ re => qr/$wsc(-)?+((?:0\|[1-9][0-9]*+)(?:\.[0-9]++)?+)(?:[eE]([-+]?+[0-9]++))?+$litend/,
	122	+ act => sub{
	123	+ my ($self, $match) = @_;
	124	+ my $v = $match->[1] * 10**($match->[2] // 0);
	125	+ $match->[0] ? -$v : $v
	126	+ }
	127	+}
	128	+sub string{
	129	+ any => [qw(string_multiline string_quoted string_noquote)],
	130	+ act => sub{$_[2]}
	131	+}
	132	+# XXX: not in RFC but quote by ' is allowed.
	133	+sub string_quoted{
	134	+ re => qr/$wsc(["'])((?:(?!\g{1})[^\0-\x1f\\]\|\\.)*+)\g{1}/,
	135	+ act => sub{
	136	+ my ($self, $match) = @_;
	137	+ $self->string($self->decode_quoted_string($match->[1]))
	138	+ #$match->[1] =~ s{\\(u(.{4})\|.)}{
	139	+ # {
	140	+ # '"' => '"',
	141	+ # "'" => "'",
	142	+ # "\\" => "\\",
	143	+ # "/" => "/",
	144	+ # "b" => "\b",
	145	+ # "f" => "\f",
	146	+ # "n" => "\n",
	147	+ # "r" => "\r",
	148	+ # "t" => "\t",
	149	+ # }->{$1} // chr(hex($2))
	150	+ #}ger;
	151	+ }
	152	+}
	153	+sub string_noquote{
	154	+ seq => [qw(quoteless_guard string_noquote_body)],
	155	+ act => sub{$_[2]}
	156	+}
	157	+sub string_noquote_body{
	158	+ re => qr/(${nonpunct}(?:\t\|[^\0-\x1f])*)/,
	159	+ act => sub{
	160	+ my ($self, $match) = @_;
	161	+ $match->[0] =~ s/$inline_ws*+\z//r;
	162	+ }
	163	+}
	164	+sub string_multiline{
	165	+ re => qr/($wsc)'''((?:(?!''')$any)*+)'''/,
	166	+ act => sub{
	167	+ my ($self, $match) = @_;
	168	+ my $indent = length($match->[0] =~ s/.*\n//r);
	169	+ my $s = $match->[1] =~ s/\r//r;
	170	+
	171	+ # XXX: What is "column"
	172	+ $s =~ s/(?<=\n)${inline_ws}{0,$indent}//g;
	173	+ # XXX: RFC says: Whitespace on the first line is ignored.
	174	+ # my understanding: first line is ignored if it contains only ws.
	175	+ $s =~ s/^$inline_ws*\n//;
	176	+
	177	+ $s =~ s/\n\z//r
	178	+ }
	179	+}
	180	+
	181	+1;

--- /dev/null

+++ b/t/additional_string.t

		@@ -0,0 +1,61 @@
	1	+#!/usr/bin/perl
	2	+
	3	+use warnings;
	4	+use strict;
	5	+use utf8;
	6	+use Test2::V0;
	7	+use Text::Hjson;
	8	+use Data::Dumper;
	9	+
	10	+my $v;
	11	+
	12	+# quoteless strings with quotation marks
	13	+eval{Text::Hjson->decode("{v: 'invalid\n}")};
	14	+ok($@, "quoteless string starting with single quote");
	15	+eval{Text::Hjson->decode("{v: \"invalid\n}")};
	16	+ok($@, "quoteless string starting with double quote");
	17	+$v = eval{Text::Hjson->decode("{v: valid\"\n}")};
	18	+is($v, {v => 'valid"'}, "quoteless string with quote at the end");
	19	+
	20	+# quoteless object keys with quotation marks
	21	+eval{Text::Hjson->decode("{'invalid: v\n}")};
	22	+ok($@, "quoteless object key starting with single quote");
	23	+eval{Text::Hjson->decode("{\"invalid: v\n}")};
	24	+ok($@, "quoteless object key starting with double quote");
	25	+$v = eval{Text::Hjson->decode("{valid\": v\n}")};
	26	+is($v, {'valid"' => "v"}, "quoteless object key with quote at the end");
	27	+
	28	+# invalid escape sequence
	29	+eval{Text::Hjson->decode('"\z"')};
	30	+ok($@, "invalid sequence \\z");
	31	+eval{Text::Hjson->decode('"\!"')};
	32	+ok($@, "invalid sequence \\!");
	33	+
	34	+# surrogates
	35	+eval{Text::Hjson->decode('"\ud835"')};
	36	+ok($@, "only high surrogate");
	37	+eval{Text::Hjson->decode('"\ud835!"')};
	38	+ok($@, "high surrogate before non-surrogate");
	39	+eval{Text::Hjson->decode('"\ud835\u003f"')};
	40	+ok($@, "high surrogate before non-surrogate \\u sequence");
	41	+eval{Text::Hjson->decode('"HH:\ud835\ud835"')};
	42	+ok($@, "repeating high surrogate");
	43	+eval{Text::Hjson->decode('"L:\udc2c"')};
	44	+ok($@, "only low surrogate");
	45	+eval{Text::Hjson->decode('"L:\udc2c\udc2c"')};
	46	+ok($@, "repeating low surrogate");
	47	+eval{Text::Hjson->decode('"L:\udc2c\ud835\udc2c"')};
	48	+ok($@, "low surrogate before high surrogate");
	49	+eval{Text::Hjson->decode("'\\ud835\x{dc2c}'")};
	50	+ok($@, "\\u encode only a half of a surrogate pair");
	51	+$v = eval{Text::Hjson->decode('"pairs: \ud835\udc12\ud835\udc2e\ud835\udc2b\ud835\udc2b\ud835\udc28\ud835\udc20\ud835\udc1a\ud835\udc2d\ud835\udc1e\ud835\udc2c!!"')};
	52	+print($@);
	53	+is($v, "pairs: 𝐒𝐮𝐫𝐫𝐨𝐠𝐚𝐭𝐞𝐬!!", "Correct surrogate pairs");
	54	+
	55	+# CR
	56	+$v = eval{Text::Hjson->decode(" '''abc\r\nde\rf\n\rghi\rjkl\n\n'''")};
	57	+is($v, "abc\ndef\nghijkl\n", "\\r in multiline string");
	58	+$v = eval{Text::Hjson->decode("abc\r\n")};
	59	+is($v, "abc", "\\r at the end of a quoteless string");
	60	+
	61	+done_testing

--- /dev/null

+++ b/t/empty_root_object.t

		@@ -0,0 +1,15 @@
	1	+#!/usr/bin/perl
	2	+
	3	+use warnings;
	4	+use strict;
	5	+use utf8;
	6	+use Test2::V0;
	7	+use Text::Hjson;
	8	+
	9	+eval{Text::Hjson->decode("")};
	10	+ok($@, "empty toplevel object");
	11	+
	12	+eval{Text::Hjson->decode("#test\n")};
	13	+ok($@, "only a comment");
	14	+
	15	+done_testing;

Text-Hjson-perl Fork

Commit

Tags

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Change Summary

Incremental Difference

Text-Hjson-perl
Fork