Commit | Line | Data |
---|---|---|
d5424315 DG |
1 | package JSON::PP; |
2 | ||
3 | # JSON-2.0 | |
4 | ||
5 | use 5.005; | |
6 | use strict; | |
dec273dc CBW |
7 | |
8 | use Exporter (); | |
9 | BEGIN { @JSON::PP::ISA = ('Exporter') } | |
10 | ||
d5424315 | 11 | use overload (); |
dec273dc | 12 | use JSON::PP::Boolean; |
d5424315 DG |
13 | |
14 | use Carp (); | |
d5424315 DG |
15 | #use Devel::Peek; |
16 | ||
9c399ae2 | 17 | $JSON::PP::VERSION = '2.97000'; |
d5424315 DG |
18 | |
19 | @JSON::PP::EXPORT = qw(encode_json decode_json from_json to_json); | |
20 | ||
21 | # instead of hash-access, i tried index-access for speed. | |
22 | # but this method is not faster than what i expected. so it will be changed. | |
23 | ||
24 | use constant P_ASCII => 0; | |
25 | use constant P_LATIN1 => 1; | |
26 | use constant P_UTF8 => 2; | |
27 | use constant P_INDENT => 3; | |
28 | use constant P_CANONICAL => 4; | |
29 | use constant P_SPACE_BEFORE => 5; | |
30 | use constant P_SPACE_AFTER => 6; | |
31 | use constant P_ALLOW_NONREF => 7; | |
32 | use constant P_SHRINK => 8; | |
33 | use constant P_ALLOW_BLESSED => 9; | |
34 | use constant P_CONVERT_BLESSED => 10; | |
35 | use constant P_RELAXED => 11; | |
36 | ||
37 | use constant P_LOOSE => 12; | |
38 | use constant P_ALLOW_BIGNUM => 13; | |
39 | use constant P_ALLOW_BAREKEY => 14; | |
40 | use constant P_ALLOW_SINGLEQUOTE => 15; | |
41 | use constant P_ESCAPE_SLASH => 16; | |
42 | use constant P_AS_NONBLESSED => 17; | |
43 | ||
44 | use constant P_ALLOW_UNKNOWN => 18; | |
45 | ||
46 | use constant OLD_PERL => $] < 5.008 ? 1 : 0; | |
dec273dc CBW |
47 | use constant USE_B => 0; |
48 | ||
49 | BEGIN { | |
50 | if (USE_B) { | |
51 | require B; | |
52 | } | |
53 | } | |
d5424315 DG |
54 | |
55 | BEGIN { | |
56 | my @xs_compati_bit_properties = qw( | |
57 | latin1 ascii utf8 indent canonical space_before space_after allow_nonref shrink | |
58 | allow_blessed convert_blessed relaxed allow_unknown | |
59 | ); | |
60 | my @pp_bit_properties = qw( | |
61 | allow_singlequote allow_bignum loose | |
62 | allow_barekey escape_slash as_nonblessed | |
63 | ); | |
64 | ||
a1e5c561 | 65 | # Perl version check, Unicode handling is enabled? |
d5424315 | 66 | # Helper module sets @JSON::PP::_properties. |
dec273dc | 67 | if ( OLD_PERL ) { |
d5424315 DG |
68 | my $helper = $] >= 5.006 ? 'JSON::PP::Compat5006' : 'JSON::PP::Compat5005'; |
69 | eval qq| require $helper |; | |
70 | if ($@) { Carp::croak $@; } | |
71 | } | |
72 | ||
73 | for my $name (@xs_compati_bit_properties, @pp_bit_properties) { | |
dec273dc | 74 | my $property_id = 'P_' . uc($name); |
d5424315 DG |
75 | |
76 | eval qq/ | |
77 | sub $name { | |
78 | my \$enable = defined \$_[1] ? \$_[1] : 1; | |
79 | ||
80 | if (\$enable) { | |
dec273dc | 81 | \$_[0]->{PROPS}->[$property_id] = 1; |
d5424315 DG |
82 | } |
83 | else { | |
dec273dc | 84 | \$_[0]->{PROPS}->[$property_id] = 0; |
d5424315 DG |
85 | } |
86 | ||
87 | \$_[0]; | |
88 | } | |
89 | ||
90 | sub get_$name { | |
dec273dc | 91 | \$_[0]->{PROPS}->[$property_id] ? 1 : ''; |
d5424315 DG |
92 | } |
93 | /; | |
94 | } | |
95 | ||
96 | } | |
97 | ||
98 | ||
99 | ||
100 | # Functions | |
101 | ||
d5424315 DG |
102 | my $JSON; # cache |
103 | ||
104 | sub encode_json ($) { # encode | |
105 | ($JSON ||= __PACKAGE__->new->utf8)->encode(@_); | |
106 | } | |
107 | ||
108 | ||
109 | sub decode_json { # decode | |
110 | ($JSON ||= __PACKAGE__->new->utf8)->decode(@_); | |
111 | } | |
112 | ||
113 | # Obsoleted | |
114 | ||
115 | sub to_json($) { | |
116 | Carp::croak ("JSON::PP::to_json has been renamed to encode_json."); | |
117 | } | |
118 | ||
119 | ||
120 | sub from_json($) { | |
121 | Carp::croak ("JSON::PP::from_json has been renamed to decode_json."); | |
122 | } | |
123 | ||
124 | ||
125 | # Methods | |
126 | ||
127 | sub new { | |
128 | my $class = shift; | |
129 | my $self = { | |
130 | max_depth => 512, | |
131 | max_size => 0, | |
d5424315 DG |
132 | indent_length => 3, |
133 | }; | |
134 | ||
135 | bless $self, $class; | |
136 | } | |
137 | ||
138 | ||
139 | sub encode { | |
140 | return $_[0]->PP_encode_json($_[1]); | |
141 | } | |
142 | ||
143 | ||
144 | sub decode { | |
145 | return $_[0]->PP_decode_json($_[1], 0x00000000); | |
146 | } | |
147 | ||
148 | ||
149 | sub decode_prefix { | |
150 | return $_[0]->PP_decode_json($_[1], 0x00000001); | |
151 | } | |
152 | ||
153 | ||
154 | # accessor | |
155 | ||
156 | ||
157 | # pretty printing | |
158 | ||
159 | sub pretty { | |
160 | my ($self, $v) = @_; | |
161 | my $enable = defined $v ? $v : 1; | |
162 | ||
163 | if ($enable) { # indent_length(3) for JSON::XS compatibility | |
dec273dc | 164 | $self->indent(1)->space_before(1)->space_after(1); |
d5424315 DG |
165 | } |
166 | else { | |
167 | $self->indent(0)->space_before(0)->space_after(0); | |
168 | } | |
169 | ||
170 | $self; | |
171 | } | |
172 | ||
173 | # etc | |
174 | ||
175 | sub max_depth { | |
176 | my $max = defined $_[1] ? $_[1] : 0x80000000; | |
177 | $_[0]->{max_depth} = $max; | |
178 | $_[0]; | |
179 | } | |
180 | ||
181 | ||
182 | sub get_max_depth { $_[0]->{max_depth}; } | |
183 | ||
184 | ||
185 | sub max_size { | |
186 | my $max = defined $_[1] ? $_[1] : 0; | |
187 | $_[0]->{max_size} = $max; | |
188 | $_[0]; | |
189 | } | |
190 | ||
191 | ||
192 | sub get_max_size { $_[0]->{max_size}; } | |
193 | ||
194 | ||
195 | sub filter_json_object { | |
dec273dc CBW |
196 | if (defined $_[1] and ref $_[1] eq 'CODE') { |
197 | $_[0]->{cb_object} = $_[1]; | |
198 | } else { | |
199 | delete $_[0]->{cb_object}; | |
200 | } | |
d5424315 DG |
201 | $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0; |
202 | $_[0]; | |
203 | } | |
204 | ||
205 | sub filter_json_single_key_object { | |
dec273dc CBW |
206 | if (@_ == 1 or @_ > 3) { |
207 | Carp::croak("Usage: JSON::PP::filter_json_single_key_object(self, key, callback = undef)"); | |
208 | } | |
209 | if (defined $_[2] and ref $_[2] eq 'CODE') { | |
d5424315 | 210 | $_[0]->{cb_sk_object}->{$_[1]} = $_[2]; |
dec273dc CBW |
211 | } else { |
212 | delete $_[0]->{cb_sk_object}->{$_[1]}; | |
213 | delete $_[0]->{cb_sk_object} unless %{$_[0]->{cb_sk_object} || {}}; | |
d5424315 DG |
214 | } |
215 | $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0; | |
216 | $_[0]; | |
217 | } | |
218 | ||
219 | sub indent_length { | |
220 | if (!defined $_[1] or $_[1] > 15 or $_[1] < 0) { | |
221 | Carp::carp "The acceptable range of indent_length() is 0 to 15."; | |
222 | } | |
223 | else { | |
224 | $_[0]->{indent_length} = $_[1]; | |
225 | } | |
226 | $_[0]; | |
227 | } | |
228 | ||
229 | sub get_indent_length { | |
230 | $_[0]->{indent_length}; | |
231 | } | |
232 | ||
233 | sub sort_by { | |
234 | $_[0]->{sort_by} = defined $_[1] ? $_[1] : 1; | |
235 | $_[0]; | |
236 | } | |
237 | ||
238 | sub allow_bigint { | |
dec273dc CBW |
239 | Carp::carp("allow_bigint() is obsoleted. use allow_bignum() instead."); |
240 | $_[0]->allow_bignum; | |
d5424315 DG |
241 | } |
242 | ||
243 | ############################### | |
244 | ||
245 | ### | |
246 | ### Perl => JSON | |
247 | ### | |
248 | ||
249 | ||
250 | { # Convert | |
251 | ||
252 | my $max_depth; | |
253 | my $indent; | |
254 | my $ascii; | |
255 | my $latin1; | |
256 | my $utf8; | |
257 | my $space_before; | |
258 | my $space_after; | |
259 | my $canonical; | |
260 | my $allow_blessed; | |
261 | my $convert_blessed; | |
262 | ||
263 | my $indent_length; | |
264 | my $escape_slash; | |
265 | my $bignum; | |
266 | my $as_nonblessed; | |
267 | ||
268 | my $depth; | |
269 | my $indent_count; | |
270 | my $keysort; | |
271 | ||
272 | ||
273 | sub PP_encode_json { | |
274 | my $self = shift; | |
275 | my $obj = shift; | |
276 | ||
277 | $indent_count = 0; | |
278 | $depth = 0; | |
279 | ||
dec273dc | 280 | my $props = $self->{PROPS}; |
d5424315 DG |
281 | |
282 | ($ascii, $latin1, $utf8, $indent, $canonical, $space_before, $space_after, $allow_blessed, | |
283 | $convert_blessed, $escape_slash, $bignum, $as_nonblessed) | |
dec273dc | 284 | = @{$props}[P_ASCII .. P_SPACE_AFTER, P_ALLOW_BLESSED, P_CONVERT_BLESSED, |
d5424315 DG |
285 | P_ESCAPE_SLASH, P_ALLOW_BIGNUM, P_AS_NONBLESSED]; |
286 | ||
287 | ($max_depth, $indent_length) = @{$self}{qw/max_depth indent_length/}; | |
288 | ||
289 | $keysort = $canonical ? sub { $a cmp $b } : undef; | |
290 | ||
291 | if ($self->{sort_by}) { | |
292 | $keysort = ref($self->{sort_by}) eq 'CODE' ? $self->{sort_by} | |
293 | : $self->{sort_by} =~ /\D+/ ? $self->{sort_by} | |
294 | : sub { $a cmp $b }; | |
295 | } | |
296 | ||
297 | encode_error("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)") | |
dec273dc | 298 | if(!ref $obj and !$props->[ P_ALLOW_NONREF ]); |
d5424315 DG |
299 | |
300 | my $str = $self->object_to_json($obj); | |
301 | ||
302 | $str .= "\n" if ( $indent ); # JSON::XS 2.26 compatible | |
303 | ||
304 | unless ($ascii or $latin1 or $utf8) { | |
305 | utf8::upgrade($str); | |
306 | } | |
307 | ||
dec273dc | 308 | if ($props->[ P_SHRINK ]) { |
d5424315 DG |
309 | utf8::downgrade($str, 1); |
310 | } | |
311 | ||
312 | return $str; | |
313 | } | |
314 | ||
315 | ||
316 | sub object_to_json { | |
317 | my ($self, $obj) = @_; | |
318 | my $type = ref($obj); | |
319 | ||
320 | if($type eq 'HASH'){ | |
321 | return $self->hash_to_json($obj); | |
322 | } | |
323 | elsif($type eq 'ARRAY'){ | |
324 | return $self->array_to_json($obj); | |
325 | } | |
326 | elsif ($type) { # blessed object? | |
327 | if (blessed($obj)) { | |
328 | ||
329 | return $self->value_to_json($obj) if ( $obj->isa('JSON::PP::Boolean') ); | |
330 | ||
331 | if ( $convert_blessed and $obj->can('TO_JSON') ) { | |
332 | my $result = $obj->TO_JSON(); | |
05bd2440 CBW |
333 | if ( defined $result and ref( $result ) ) { |
334 | if ( refaddr( $obj ) eq refaddr( $result ) ) { | |
d5424315 DG |
335 | encode_error( sprintf( |
336 | "%s::TO_JSON method returned same object as was passed instead of a new one", | |
337 | ref $obj | |
338 | ) ); | |
339 | } | |
340 | } | |
341 | ||
342 | return $self->object_to_json( $result ); | |
343 | } | |
344 | ||
345 | return "$obj" if ( $bignum and _is_bignum($obj) ); | |
d5424315 | 346 | |
dec273dc CBW |
347 | if ($allow_blessed) { |
348 | return $self->blessed_to_json($obj) if ($as_nonblessed); # will be removed. | |
349 | return 'null'; | |
350 | } | |
d5424315 DG |
351 | encode_error( sprintf("encountered object '%s', but neither allow_blessed " |
352 | . "nor convert_blessed settings are enabled", $obj) | |
dec273dc | 353 | ); |
d5424315 DG |
354 | } |
355 | else { | |
356 | return $self->value_to_json($obj); | |
357 | } | |
358 | } | |
359 | else{ | |
360 | return $self->value_to_json($obj); | |
361 | } | |
362 | } | |
363 | ||
364 | ||
365 | sub hash_to_json { | |
366 | my ($self, $obj) = @_; | |
367 | my @res; | |
368 | ||
369 | encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)") | |
370 | if (++$depth > $max_depth); | |
371 | ||
372 | my ($pre, $post) = $indent ? $self->_up_indent() : ('', ''); | |
373 | my $del = ($space_before ? ' ' : '') . ':' . ($space_after ? ' ' : ''); | |
374 | ||
375 | for my $k ( _sort( $obj ) ) { | |
376 | if ( OLD_PERL ) { utf8::decode($k) } # key for Perl 5.6 / be optimized | |
dec273dc | 377 | push @res, $self->string_to_json( $k ) |
d5424315 | 378 | . $del |
dec273dc | 379 | . ( ref $obj->{$k} ? $self->object_to_json( $obj->{$k} ) : $self->value_to_json( $obj->{$k} ) ); |
d5424315 DG |
380 | } |
381 | ||
382 | --$depth; | |
383 | $self->_down_indent() if ($indent); | |
384 | ||
dec273dc CBW |
385 | return '{}' unless @res; |
386 | return '{' . $pre . join( ",$pre", @res ) . $post . '}'; | |
d5424315 DG |
387 | } |
388 | ||
389 | ||
390 | sub array_to_json { | |
391 | my ($self, $obj) = @_; | |
392 | my @res; | |
393 | ||
394 | encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)") | |
395 | if (++$depth > $max_depth); | |
396 | ||
397 | my ($pre, $post) = $indent ? $self->_up_indent() : ('', ''); | |
398 | ||
399 | for my $v (@$obj){ | |
dec273dc | 400 | push @res, ref($v) ? $self->object_to_json($v) : $self->value_to_json($v); |
d5424315 DG |
401 | } |
402 | ||
403 | --$depth; | |
404 | $self->_down_indent() if ($indent); | |
405 | ||
dec273dc CBW |
406 | return '[]' unless @res; |
407 | return '[' . $pre . join( ",$pre", @res ) . $post . ']'; | |
d5424315 DG |
408 | } |
409 | ||
dec273dc CBW |
410 | sub _looks_like_number { |
411 | my $value = shift; | |
412 | if (USE_B) { | |
413 | my $b_obj = B::svref_2object(\$value); | |
414 | my $flags = $b_obj->FLAGS; | |
415 | return 1 if $flags & ( B::SVp_IOK() | B::SVp_NOK() ) and !( $flags & B::SVp_POK() ); | |
416 | return; | |
417 | } else { | |
418 | no warnings 'numeric'; | |
419 | # detect numbers | |
420 | # string & "" -> "" | |
421 | # number & "" -> 0 (with warning) | |
422 | # nan and inf can detect as numbers, so check with * 0 | |
423 | return unless length((my $dummy = "") & $value); | |
424 | return unless 0 + $value eq $value; | |
425 | return 1 if $value * 0 == 0; | |
426 | return -1; # inf/nan | |
427 | } | |
428 | } | |
d5424315 DG |
429 | |
430 | sub value_to_json { | |
431 | my ($self, $value) = @_; | |
432 | ||
433 | return 'null' if(!defined $value); | |
434 | ||
d5424315 DG |
435 | my $type = ref($value); |
436 | ||
dec273dc CBW |
437 | if (!$type) { |
438 | if (_looks_like_number($value)) { | |
439 | return $value; | |
440 | } | |
441 | return $self->string_to_json($value); | |
d5424315 DG |
442 | } |
443 | elsif( blessed($value) and $value->isa('JSON::PP::Boolean') ){ | |
444 | return $$value == 1 ? 'true' : 'false'; | |
445 | } | |
dec273dc | 446 | else { |
d5424315 DG |
447 | if ((overload::StrVal($value) =~ /=(\w+)/)[0]) { |
448 | return $self->value_to_json("$value"); | |
449 | } | |
450 | ||
451 | if ($type eq 'SCALAR' and defined $$value) { | |
452 | return $$value eq '1' ? 'true' | |
453 | : $$value eq '0' ? 'false' | |
454 | : $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ? 'null' | |
455 | : encode_error("cannot encode reference to scalar"); | |
456 | } | |
457 | ||
dec273dc CBW |
458 | if ( $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ) { |
459 | return 'null'; | |
460 | } | |
461 | else { | |
462 | if ( $type eq 'SCALAR' or $type eq 'REF' ) { | |
d5424315 | 463 | encode_error("cannot encode reference to scalar"); |
dec273dc CBW |
464 | } |
465 | else { | |
d5424315 | 466 | encode_error("encountered $value, but JSON can only represent references to arrays or hashes"); |
dec273dc CBW |
467 | } |
468 | } | |
d5424315 DG |
469 | |
470 | } | |
d5424315 DG |
471 | } |
472 | ||
473 | ||
474 | my %esc = ( | |
475 | "\n" => '\n', | |
476 | "\r" => '\r', | |
477 | "\t" => '\t', | |
478 | "\f" => '\f', | |
479 | "\b" => '\b', | |
480 | "\"" => '\"', | |
481 | "\\" => '\\\\', | |
482 | "\'" => '\\\'', | |
483 | ); | |
484 | ||
485 | ||
486 | sub string_to_json { | |
487 | my ($self, $arg) = @_; | |
488 | ||
489 | $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g; | |
490 | $arg =~ s/\//\\\//g if ($escape_slash); | |
491 | $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg; | |
492 | ||
493 | if ($ascii) { | |
494 | $arg = JSON_PP_encode_ascii($arg); | |
495 | } | |
496 | ||
497 | if ($latin1) { | |
498 | $arg = JSON_PP_encode_latin1($arg); | |
499 | } | |
500 | ||
501 | if ($utf8) { | |
502 | utf8::encode($arg); | |
503 | } | |
504 | ||
505 | return '"' . $arg . '"'; | |
506 | } | |
507 | ||
508 | ||
509 | sub blessed_to_json { | |
510 | my $reftype = reftype($_[1]) || ''; | |
511 | if ($reftype eq 'HASH') { | |
512 | return $_[0]->hash_to_json($_[1]); | |
513 | } | |
514 | elsif ($reftype eq 'ARRAY') { | |
515 | return $_[0]->array_to_json($_[1]); | |
516 | } | |
517 | else { | |
518 | return 'null'; | |
519 | } | |
520 | } | |
521 | ||
522 | ||
523 | sub encode_error { | |
524 | my $error = shift; | |
525 | Carp::croak "$error"; | |
526 | } | |
527 | ||
528 | ||
529 | sub _sort { | |
530 | defined $keysort ? (sort $keysort (keys %{$_[0]})) : keys %{$_[0]}; | |
531 | } | |
532 | ||
533 | ||
534 | sub _up_indent { | |
535 | my $self = shift; | |
536 | my $space = ' ' x $indent_length; | |
537 | ||
538 | my ($pre,$post) = ('',''); | |
539 | ||
540 | $post = "\n" . $space x $indent_count; | |
541 | ||
542 | $indent_count++; | |
543 | ||
544 | $pre = "\n" . $space x $indent_count; | |
545 | ||
546 | return ($pre,$post); | |
547 | } | |
548 | ||
549 | ||
550 | sub _down_indent { $indent_count--; } | |
551 | ||
552 | ||
553 | sub PP_encode_box { | |
554 | { | |
555 | depth => $depth, | |
556 | indent_count => $indent_count, | |
557 | }; | |
558 | } | |
559 | ||
560 | } # Convert | |
561 | ||
562 | ||
563 | sub _encode_ascii { | |
564 | join('', | |
565 | map { | |
566 | $_ <= 127 ? | |
567 | chr($_) : | |
568 | $_ <= 65535 ? | |
569 | sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_)); | |
570 | } unpack('U*', $_[0]) | |
571 | ); | |
572 | } | |
573 | ||
574 | ||
575 | sub _encode_latin1 { | |
576 | join('', | |
577 | map { | |
578 | $_ <= 255 ? | |
579 | chr($_) : | |
580 | $_ <= 65535 ? | |
581 | sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_)); | |
582 | } unpack('U*', $_[0]) | |
583 | ); | |
584 | } | |
585 | ||
586 | ||
587 | sub _encode_surrogates { # from perlunicode | |
588 | my $uni = $_[0] - 0x10000; | |
589 | return ($uni / 0x400 + 0xD800, $uni % 0x400 + 0xDC00); | |
590 | } | |
591 | ||
592 | ||
593 | sub _is_bignum { | |
594 | $_[0]->isa('Math::BigInt') or $_[0]->isa('Math::BigFloat'); | |
595 | } | |
596 | ||
597 | ||
598 | ||
599 | # | |
600 | # JSON => Perl | |
601 | # | |
602 | ||
603 | my $max_intsize; | |
604 | ||
605 | BEGIN { | |
606 | my $checkint = 1111; | |
607 | for my $d (5..64) { | |
608 | $checkint .= 1; | |
609 | my $int = eval qq| $checkint |; | |
610 | if ($int =~ /[eE]/) { | |
611 | $max_intsize = $d - 1; | |
612 | last; | |
613 | } | |
614 | } | |
615 | } | |
616 | ||
617 | { # PARSE | |
618 | ||
619 | my %escapes = ( # by Jeremy Muhlich <jmuhlich [at] bitflood.org> | |
620 | b => "\x8", | |
621 | t => "\x9", | |
622 | n => "\xA", | |
623 | f => "\xC", | |
624 | r => "\xD", | |
625 | '\\' => '\\', | |
626 | '"' => '"', | |
627 | '/' => '/', | |
628 | ); | |
629 | ||
630 | my $text; # json data | |
631 | my $at; # offset | |
a1e5c561 | 632 | my $ch; # first character |
d5424315 DG |
633 | my $len; # text length (changed according to UTF8 or NON UTF8) |
634 | # INTERNAL | |
635 | my $depth; # nest counter | |
636 | my $encoding; # json text encoding | |
637 | my $is_valid_utf8; # temp variable | |
638 | my $utf8_len; # utf8 byte length | |
639 | # FLAGS | |
640 | my $utf8; # must be utf8 | |
a1e5c561 | 641 | my $max_depth; # max nest number of objects and arrays |
d5424315 DG |
642 | my $max_size; |
643 | my $relaxed; | |
644 | my $cb_object; | |
645 | my $cb_sk_object; | |
646 | ||
647 | my $F_HOOK; | |
648 | ||
dec273dc | 649 | my $allow_bignum; # using Math::BigInt/BigFloat |
d5424315 DG |
650 | my $singlequote; # loosely quoting |
651 | my $loose; # | |
652 | my $allow_barekey; # bareKey | |
653 | ||
dec273dc CBW |
654 | sub _detect_utf_encoding { |
655 | my $text = shift; | |
656 | my @octets = unpack('C4', $text); | |
657 | return 'unknown' unless defined $octets[3]; | |
658 | return ( $octets[0] and $octets[1]) ? 'UTF-8' | |
659 | : (!$octets[0] and $octets[1]) ? 'UTF-16BE' | |
660 | : (!$octets[0] and !$octets[1]) ? 'UTF-32BE' | |
661 | : ( $octets[2] ) ? 'UTF-16LE' | |
662 | : (!$octets[2] ) ? 'UTF-32LE' | |
663 | : 'unknown'; | |
664 | } | |
d5424315 DG |
665 | |
666 | sub PP_decode_json { | |
dec273dc | 667 | my ($self, $want_offset); |
d5424315 | 668 | |
dec273dc | 669 | ($self, $text, $want_offset) = @_; |
d5424315 DG |
670 | |
671 | ($at, $ch, $depth) = (0, '', 0); | |
672 | ||
673 | if ( !defined $text or ref $text ) { | |
674 | decode_error("malformed JSON string, neither array, object, number, string or atom"); | |
675 | } | |
676 | ||
dec273dc | 677 | my $props = $self->{PROPS}; |
d5424315 | 678 | |
dec273dc CBW |
679 | ($utf8, $relaxed, $loose, $allow_bignum, $allow_barekey, $singlequote) |
680 | = @{$props}[P_UTF8, P_RELAXED, P_LOOSE .. P_ALLOW_SINGLEQUOTE]; | |
d5424315 DG |
681 | |
682 | if ( $utf8 ) { | |
dec273dc CBW |
683 | $encoding = _detect_utf_encoding($text); |
684 | if ($encoding ne 'UTF-8' and $encoding ne 'unknown') { | |
685 | require Encode; | |
686 | Encode::from_to($text, $encoding, 'utf-8'); | |
687 | } else { | |
688 | utf8::downgrade( $text, 1 ) or Carp::croak("Wide character in subroutine entry"); | |
689 | } | |
d5424315 DG |
690 | } |
691 | else { | |
692 | utf8::upgrade( $text ); | |
87f3ebe4 | 693 | utf8::encode( $text ); |
d5424315 DG |
694 | } |
695 | ||
696 | $len = length $text; | |
697 | ||
698 | ($max_depth, $max_size, $cb_object, $cb_sk_object, $F_HOOK) | |
699 | = @{$self}{qw/max_depth max_size cb_object cb_sk_object F_HOOK/}; | |
700 | ||
701 | if ($max_size > 1) { | |
702 | use bytes; | |
703 | my $bytes = length $text; | |
704 | decode_error( | |
705 | sprintf("attempted decode of JSON text of %s bytes size, but max_size is set to %s" | |
706 | , $bytes, $max_size), 1 | |
707 | ) if ($bytes > $max_size); | |
708 | } | |
709 | ||
d5424315 DG |
710 | white(); # remove head white space |
711 | ||
dec273dc | 712 | decode_error("malformed JSON string, neither array, object, number, string or atom") unless defined $ch; # Is there a first character for JSON structure? |
d5424315 DG |
713 | |
714 | my $result = value(); | |
715 | ||
dec273dc | 716 | if ( !$props->[ P_ALLOW_NONREF ] and !ref $result ) { |
d5424315 DG |
717 | decode_error( |
718 | 'JSON text must be an object or array (but found number, string, true, false or null,' | |
719 | . ' use allow_nonref to allow this)', 1); | |
720 | } | |
721 | ||
722 | Carp::croak('something wrong.') if $len < $at; # we won't arrive here. | |
723 | ||
724 | my $consumed = defined $ch ? $at - 1 : $at; # consumed JSON text length | |
725 | ||
726 | white(); # remove tail white space | |
727 | ||
dec273dc CBW |
728 | return ( $result, $consumed ) if $want_offset; # all right if decode_prefix |
729 | ||
730 | decode_error("garbage after JSON object") if defined $ch; | |
d5424315 | 731 | |
dec273dc | 732 | $result; |
d5424315 DG |
733 | } |
734 | ||
735 | ||
736 | sub next_chr { | |
737 | return $ch = undef if($at >= $len); | |
738 | $ch = substr($text, $at++, 1); | |
739 | } | |
740 | ||
741 | ||
742 | sub value { | |
743 | white(); | |
744 | return if(!defined $ch); | |
745 | return object() if($ch eq '{'); | |
746 | return array() if($ch eq '['); | |
747 | return string() if($ch eq '"' or ($singlequote and $ch eq "'")); | |
748 | return number() if($ch =~ /[0-9]/ or $ch eq '-'); | |
749 | return word(); | |
750 | } | |
751 | ||
752 | sub string { | |
d5424315 DG |
753 | my $utf16; |
754 | my $is_utf8; | |
755 | ||
756 | ($is_valid_utf8, $utf8_len) = ('', 0); | |
757 | ||
dec273dc | 758 | my $s = ''; # basically UTF8 flag on |
d5424315 DG |
759 | |
760 | if($ch eq '"' or ($singlequote and $ch eq "'")){ | |
761 | my $boundChar = $ch; | |
762 | ||
763 | OUTER: while( defined(next_chr()) ){ | |
764 | ||
765 | if($ch eq $boundChar){ | |
766 | next_chr(); | |
767 | ||
768 | if ($utf16) { | |
769 | decode_error("missing low surrogate character in surrogate pair"); | |
770 | } | |
771 | ||
772 | utf8::decode($s) if($is_utf8); | |
773 | ||
774 | return $s; | |
775 | } | |
776 | elsif($ch eq '\\'){ | |
777 | next_chr(); | |
778 | if(exists $escapes{$ch}){ | |
779 | $s .= $escapes{$ch}; | |
780 | } | |
781 | elsif($ch eq 'u'){ # UNICODE handling | |
782 | my $u = ''; | |
783 | ||
784 | for(1..4){ | |
785 | $ch = next_chr(); | |
786 | last OUTER if($ch !~ /[0-9a-fA-F]/); | |
787 | $u .= $ch; | |
788 | } | |
789 | ||
790 | # U+D800 - U+DBFF | |
791 | if ($u =~ /^[dD][89abAB][0-9a-fA-F]{2}/) { # UTF-16 high surrogate? | |
792 | $utf16 = $u; | |
793 | } | |
794 | # U+DC00 - U+DFFF | |
795 | elsif ($u =~ /^[dD][c-fC-F][0-9a-fA-F]{2}/) { # UTF-16 low surrogate? | |
796 | unless (defined $utf16) { | |
797 | decode_error("missing high surrogate character in surrogate pair"); | |
798 | } | |
799 | $is_utf8 = 1; | |
800 | $s .= JSON_PP_decode_surrogates($utf16, $u) || next; | |
801 | $utf16 = undef; | |
802 | } | |
803 | else { | |
804 | if (defined $utf16) { | |
805 | decode_error("surrogate pair expected"); | |
806 | } | |
807 | ||
808 | if ( ( my $hex = hex( $u ) ) > 127 ) { | |
809 | $is_utf8 = 1; | |
810 | $s .= JSON_PP_decode_unicode($u) || next; | |
811 | } | |
812 | else { | |
813 | $s .= chr $hex; | |
814 | } | |
815 | } | |
816 | ||
817 | } | |
818 | else{ | |
819 | unless ($loose) { | |
820 | $at -= 2; | |
821 | decode_error('illegal backslash escape sequence in string'); | |
822 | } | |
823 | $s .= $ch; | |
824 | } | |
825 | } | |
826 | else{ | |
827 | ||
828 | if ( ord $ch > 127 ) { | |
87f3ebe4 CBW |
829 | unless( $ch = is_valid_utf8($ch) ) { |
830 | $at -= 1; | |
831 | decode_error("malformed UTF-8 character in JSON string"); | |
d5424315 DG |
832 | } |
833 | else { | |
87f3ebe4 | 834 | $at += $utf8_len - 1; |
d5424315 DG |
835 | } |
836 | ||
837 | $is_utf8 = 1; | |
838 | } | |
839 | ||
840 | if (!$loose) { | |
841 | if ($ch =~ /[\x00-\x1f\x22\x5c]/) { # '/' ok | |
842 | $at--; | |
843 | decode_error('invalid character encountered while parsing JSON string'); | |
844 | } | |
845 | } | |
846 | ||
847 | $s .= $ch; | |
848 | } | |
849 | } | |
850 | } | |
851 | ||
852 | decode_error("unexpected end of string while parsing JSON string"); | |
853 | } | |
854 | ||
855 | ||
856 | sub white { | |
857 | while( defined $ch ){ | |
dec273dc | 858 | if($ch eq '' or $ch =~ /\A[ \t\r\n]\z/){ |
d5424315 DG |
859 | next_chr(); |
860 | } | |
dec273dc | 861 | elsif($relaxed and $ch eq '/'){ |
d5424315 DG |
862 | next_chr(); |
863 | if(defined $ch and $ch eq '/'){ | |
864 | 1 while(defined(next_chr()) and $ch ne "\n" and $ch ne "\r"); | |
865 | } | |
866 | elsif(defined $ch and $ch eq '*'){ | |
867 | next_chr(); | |
868 | while(1){ | |
869 | if(defined $ch){ | |
870 | if($ch eq '*'){ | |
871 | if(defined(next_chr()) and $ch eq '/'){ | |
872 | next_chr(); | |
873 | last; | |
874 | } | |
875 | } | |
876 | else{ | |
877 | next_chr(); | |
878 | } | |
879 | } | |
880 | else{ | |
881 | decode_error("Unterminated comment"); | |
882 | } | |
883 | } | |
884 | next; | |
885 | } | |
886 | else{ | |
887 | $at--; | |
888 | decode_error("malformed JSON string, neither array, object, number, string or atom"); | |
889 | } | |
890 | } | |
891 | else{ | |
892 | if ($relaxed and $ch eq '#') { # correctly? | |
893 | pos($text) = $at; | |
894 | $text =~ /\G([^\n]*(?:\r\n|\r|\n|$))/g; | |
895 | $at = pos($text); | |
896 | next_chr; | |
897 | next; | |
898 | } | |
899 | ||
900 | last; | |
901 | } | |
902 | } | |
903 | } | |
904 | ||
905 | ||
906 | sub array { | |
05bd2440 | 907 | my $a = $_[0] || []; # you can use this code to use another array ref object. |
d5424315 DG |
908 | |
909 | decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)') | |
910 | if (++$depth > $max_depth); | |
911 | ||
912 | next_chr(); | |
913 | white(); | |
914 | ||
915 | if(defined $ch and $ch eq ']'){ | |
916 | --$depth; | |
917 | next_chr(); | |
918 | return $a; | |
919 | } | |
920 | else { | |
921 | while(defined($ch)){ | |
922 | push @$a, value(); | |
923 | ||
924 | white(); | |
925 | ||
926 | if (!defined $ch) { | |
927 | last; | |
928 | } | |
929 | ||
930 | if($ch eq ']'){ | |
931 | --$depth; | |
932 | next_chr(); | |
933 | return $a; | |
934 | } | |
935 | ||
936 | if($ch ne ','){ | |
937 | last; | |
938 | } | |
939 | ||
940 | next_chr(); | |
941 | white(); | |
942 | ||
943 | if ($relaxed and $ch eq ']') { | |
944 | --$depth; | |
945 | next_chr(); | |
946 | return $a; | |
947 | } | |
948 | ||
949 | } | |
950 | } | |
951 | ||
dec273dc | 952 | $at-- if defined $ch and $ch ne ''; |
d5424315 DG |
953 | decode_error(", or ] expected while parsing array"); |
954 | } | |
955 | ||
956 | ||
957 | sub object { | |
05bd2440 | 958 | my $o = $_[0] || {}; # you can use this code to use another hash ref object. |
d5424315 DG |
959 | my $k; |
960 | ||
961 | decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)') | |
962 | if (++$depth > $max_depth); | |
963 | next_chr(); | |
964 | white(); | |
965 | ||
966 | if(defined $ch and $ch eq '}'){ | |
967 | --$depth; | |
968 | next_chr(); | |
969 | if ($F_HOOK) { | |
970 | return _json_object_hook($o); | |
971 | } | |
972 | return $o; | |
973 | } | |
974 | else { | |
975 | while (defined $ch) { | |
976 | $k = ($allow_barekey and $ch ne '"' and $ch ne "'") ? bareKey() : string(); | |
977 | white(); | |
978 | ||
979 | if(!defined $ch or $ch ne ':'){ | |
980 | $at--; | |
981 | decode_error("':' expected"); | |
982 | } | |
983 | ||
984 | next_chr(); | |
985 | $o->{$k} = value(); | |
986 | white(); | |
987 | ||
988 | last if (!defined $ch); | |
989 | ||
990 | if($ch eq '}'){ | |
991 | --$depth; | |
992 | next_chr(); | |
993 | if ($F_HOOK) { | |
994 | return _json_object_hook($o); | |
995 | } | |
996 | return $o; | |
997 | } | |
998 | ||
999 | if($ch ne ','){ | |
1000 | last; | |
1001 | } | |
1002 | ||
1003 | next_chr(); | |
1004 | white(); | |
1005 | ||
1006 | if ($relaxed and $ch eq '}') { | |
1007 | --$depth; | |
1008 | next_chr(); | |
1009 | if ($F_HOOK) { | |
1010 | return _json_object_hook($o); | |
1011 | } | |
1012 | return $o; | |
1013 | } | |
1014 | ||
1015 | } | |
1016 | ||
1017 | } | |
1018 | ||
dec273dc | 1019 | $at-- if defined $ch and $ch ne ''; |
d5424315 DG |
1020 | decode_error(", or } expected while parsing object/hash"); |
1021 | } | |
1022 | ||
1023 | ||
1024 | sub bareKey { # doesn't strictly follow Standard ECMA-262 3rd Edition | |
1025 | my $key; | |
1026 | while($ch =~ /[^\x00-\x23\x25-\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]/){ | |
1027 | $key .= $ch; | |
1028 | next_chr(); | |
1029 | } | |
1030 | return $key; | |
1031 | } | |
1032 | ||
1033 | ||
1034 | sub word { | |
1035 | my $word = substr($text,$at-1,4); | |
1036 | ||
1037 | if($word eq 'true'){ | |
1038 | $at += 3; | |
1039 | next_chr; | |
1040 | return $JSON::PP::true; | |
1041 | } | |
1042 | elsif($word eq 'null'){ | |
1043 | $at += 3; | |
1044 | next_chr; | |
1045 | return undef; | |
1046 | } | |
1047 | elsif($word eq 'fals'){ | |
1048 | $at += 3; | |
1049 | if(substr($text,$at,1) eq 'e'){ | |
1050 | $at++; | |
1051 | next_chr; | |
1052 | return $JSON::PP::false; | |
1053 | } | |
1054 | } | |
1055 | ||
1056 | $at--; # for decode_error report | |
1057 | ||
1058 | decode_error("'null' expected") if ($word =~ /^n/); | |
1059 | decode_error("'true' expected") if ($word =~ /^t/); | |
1060 | decode_error("'false' expected") if ($word =~ /^f/); | |
1061 | decode_error("malformed JSON string, neither array, object, number, string or atom"); | |
1062 | } | |
1063 | ||
1064 | ||
1065 | sub number { | |
1066 | my $n = ''; | |
1067 | my $v; | |
a1e5c561 | 1068 | my $is_dec; |
dec273dc | 1069 | my $is_exp; |
d5424315 DG |
1070 | |
1071 | if($ch eq '-'){ | |
1072 | $n = '-'; | |
1073 | next_chr; | |
1074 | if (!defined $ch or $ch !~ /\d/) { | |
1075 | decode_error("malformed number (no digits after initial minus)"); | |
1076 | } | |
1077 | } | |
1078 | ||
dec273dc CBW |
1079 | # According to RFC4627, hex or oct digits are invalid. |
1080 | if($ch eq '0'){ | |
1081 | my $peek = substr($text,$at,1); | |
1082 | if($peek =~ /^[0-9a-dfA-DF]/){ # e may be valid (exponential) | |
1083 | decode_error("malformed number (leading zero must not be followed by another digit)"); | |
1084 | } | |
1085 | $n .= $ch; | |
1086 | next_chr; | |
1087 | } | |
1088 | ||
d5424315 DG |
1089 | while(defined $ch and $ch =~ /\d/){ |
1090 | $n .= $ch; | |
1091 | next_chr; | |
1092 | } | |
1093 | ||
1094 | if(defined $ch and $ch eq '.'){ | |
1095 | $n .= '.'; | |
a1e5c561 | 1096 | $is_dec = 1; |
d5424315 DG |
1097 | |
1098 | next_chr; | |
1099 | if (!defined $ch or $ch !~ /\d/) { | |
1100 | decode_error("malformed number (no digits after decimal point)"); | |
1101 | } | |
1102 | else { | |
1103 | $n .= $ch; | |
1104 | } | |
1105 | ||
1106 | while(defined(next_chr) and $ch =~ /\d/){ | |
1107 | $n .= $ch; | |
1108 | } | |
1109 | } | |
1110 | ||
1111 | if(defined $ch and ($ch eq 'e' or $ch eq 'E')){ | |
1112 | $n .= $ch; | |
dec273dc | 1113 | $is_exp = 1; |
d5424315 DG |
1114 | next_chr; |
1115 | ||
1116 | if(defined($ch) and ($ch eq '+' or $ch eq '-')){ | |
1117 | $n .= $ch; | |
1118 | next_chr; | |
1119 | if (!defined $ch or $ch =~ /\D/) { | |
1120 | decode_error("malformed number (no digits after exp sign)"); | |
1121 | } | |
1122 | $n .= $ch; | |
1123 | } | |
1124 | elsif(defined($ch) and $ch =~ /\d/){ | |
1125 | $n .= $ch; | |
1126 | } | |
1127 | else { | |
1128 | decode_error("malformed number (no digits after exp sign)"); | |
1129 | } | |
1130 | ||
1131 | while(defined(next_chr) and $ch =~ /\d/){ | |
1132 | $n .= $ch; | |
1133 | } | |
1134 | ||
1135 | } | |
1136 | ||
1137 | $v .= $n; | |
1138 | ||
dec273dc CBW |
1139 | if ($is_dec or $is_exp) { |
1140 | if ($allow_bignum) { | |
1141 | require Math::BigFloat; | |
1142 | return Math::BigFloat->new($v); | |
d5424315 | 1143 | } |
dec273dc CBW |
1144 | } else { |
1145 | if (length $v > $max_intsize) { | |
1146 | if ($allow_bignum) { # from Adam Sussman | |
1147 | require Math::BigInt; | |
1148 | return Math::BigInt->new($v); | |
1149 | } | |
1150 | else { | |
1151 | return "$v"; | |
1152 | } | |
d5424315 DG |
1153 | } |
1154 | } | |
d5424315 | 1155 | |
a1e5c561 | 1156 | return $is_dec ? $v/1.0 : 0+$v; |
d5424315 DG |
1157 | } |
1158 | ||
1159 | ||
1160 | sub is_valid_utf8 { | |
1161 | ||
1162 | $utf8_len = $_[0] =~ /[\x00-\x7F]/ ? 1 | |
1163 | : $_[0] =~ /[\xC2-\xDF]/ ? 2 | |
1164 | : $_[0] =~ /[\xE0-\xEF]/ ? 3 | |
1165 | : $_[0] =~ /[\xF0-\xF4]/ ? 4 | |
1166 | : 0 | |
1167 | ; | |
1168 | ||
1169 | return unless $utf8_len; | |
1170 | ||
1171 | my $is_valid_utf8 = substr($text, $at - 1, $utf8_len); | |
1172 | ||
1173 | return ( $is_valid_utf8 =~ /^(?: | |
1174 | [\x00-\x7F] | |
1175 | |[\xC2-\xDF][\x80-\xBF] | |
1176 | |[\xE0][\xA0-\xBF][\x80-\xBF] | |
1177 | |[\xE1-\xEC][\x80-\xBF][\x80-\xBF] | |
1178 | |[\xED][\x80-\x9F][\x80-\xBF] | |
1179 | |[\xEE-\xEF][\x80-\xBF][\x80-\xBF] | |
1180 | |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] | |
1181 | |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] | |
1182 | |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] | |
1183 | )$/x ) ? $is_valid_utf8 : ''; | |
1184 | } | |
1185 | ||
1186 | ||
1187 | sub decode_error { | |
1188 | my $error = shift; | |
1189 | my $no_rep = shift; | |
1190 | my $str = defined $text ? substr($text, $at) : ''; | |
1191 | my $mess = ''; | |
dec273dc CBW |
1192 | my $type = 'U*'; |
1193 | ||
1194 | if ( OLD_PERL ) { | |
1195 | my $type = $] < 5.006 ? 'C*' | |
1196 | : utf8::is_utf8( $str ) ? 'U*' # 5.6 | |
1197 | : 'C*' | |
1198 | ; | |
1199 | } | |
d5424315 DG |
1200 | |
1201 | for my $c ( unpack( $type, $str ) ) { # emulate pv_uni_display() ? | |
1202 | $mess .= $c == 0x07 ? '\a' | |
1203 | : $c == 0x09 ? '\t' | |
1204 | : $c == 0x0a ? '\n' | |
1205 | : $c == 0x0d ? '\r' | |
1206 | : $c == 0x0c ? '\f' | |
1207 | : $c < 0x20 ? sprintf('\x{%x}', $c) | |
1208 | : $c == 0x5c ? '\\\\' | |
1209 | : $c < 0x80 ? chr($c) | |
1210 | : sprintf('\x{%x}', $c) | |
1211 | ; | |
1212 | if ( length $mess >= 20 ) { | |
1213 | $mess .= '...'; | |
1214 | last; | |
1215 | } | |
1216 | } | |
1217 | ||
1218 | unless ( length $mess ) { | |
1219 | $mess = '(end of string)'; | |
1220 | } | |
1221 | ||
1222 | Carp::croak ( | |
1223 | $no_rep ? "$error" : "$error, at character offset $at (before \"$mess\")" | |
1224 | ); | |
1225 | ||
1226 | } | |
1227 | ||
1228 | ||
1229 | sub _json_object_hook { | |
1230 | my $o = $_[0]; | |
1231 | my @ks = keys %{$o}; | |
1232 | ||
1233 | if ( $cb_sk_object and @ks == 1 and exists $cb_sk_object->{ $ks[0] } and ref $cb_sk_object->{ $ks[0] } ) { | |
1234 | my @val = $cb_sk_object->{ $ks[0] }->( $o->{$ks[0]} ); | |
1235 | if (@val == 1) { | |
1236 | return $val[0]; | |
1237 | } | |
1238 | } | |
1239 | ||
1240 | my @val = $cb_object->($o) if ($cb_object); | |
1241 | if (@val == 0 or @val > 1) { | |
1242 | return $o; | |
1243 | } | |
1244 | else { | |
1245 | return $val[0]; | |
1246 | } | |
1247 | } | |
1248 | ||
1249 | ||
1250 | sub PP_decode_box { | |
1251 | { | |
1252 | text => $text, | |
1253 | at => $at, | |
1254 | ch => $ch, | |
1255 | len => $len, | |
1256 | depth => $depth, | |
1257 | encoding => $encoding, | |
1258 | is_valid_utf8 => $is_valid_utf8, | |
1259 | }; | |
1260 | } | |
1261 | ||
1262 | } # PARSE | |
1263 | ||
1264 | ||
1265 | sub _decode_surrogates { # from perlunicode | |
1266 | my $uni = 0x10000 + (hex($_[0]) - 0xD800) * 0x400 + (hex($_[1]) - 0xDC00); | |
1267 | my $un = pack('U*', $uni); | |
1268 | utf8::encode( $un ); | |
1269 | return $un; | |
1270 | } | |
1271 | ||
1272 | ||
1273 | sub _decode_unicode { | |
1274 | my $un = pack('U', hex shift); | |
1275 | utf8::encode( $un ); | |
1276 | return $un; | |
1277 | } | |
1278 | ||
1279 | # | |
1280 | # Setup for various Perl versions (the code from JSON::PP58) | |
1281 | # | |
1282 | ||
1283 | BEGIN { | |
1284 | ||
1285 | unless ( defined &utf8::is_utf8 ) { | |
1286 | require Encode; | |
1287 | *utf8::is_utf8 = *Encode::is_utf8; | |
1288 | } | |
1289 | ||
dec273dc | 1290 | if ( !OLD_PERL ) { |
d5424315 DG |
1291 | *JSON::PP::JSON_PP_encode_ascii = \&_encode_ascii; |
1292 | *JSON::PP::JSON_PP_encode_latin1 = \&_encode_latin1; | |
1293 | *JSON::PP::JSON_PP_decode_surrogates = \&_decode_surrogates; | |
1294 | *JSON::PP::JSON_PP_decode_unicode = \&_decode_unicode; | |
d5424315 | 1295 | |
dec273dc CBW |
1296 | if ($] < 5.008003) { # join() in 5.8.0 - 5.8.2 is broken. |
1297 | package JSON::PP; | |
1298 | require subs; | |
1299 | subs->import('join'); | |
1300 | eval q| | |
1301 | sub join { | |
1302 | return '' if (@_ < 2); | |
1303 | my $j = shift; | |
1304 | my $str = shift; | |
1305 | for (@_) { $str .= $j . $_; } | |
1306 | return $str; | |
1307 | } | |
1308 | |; | |
1309 | } | |
d5424315 DG |
1310 | } |
1311 | ||
1312 | ||
1313 | sub JSON::PP::incr_parse { | |
1314 | local $Carp::CarpLevel = 1; | |
1315 | ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_parse( @_ ); | |
1316 | } | |
1317 | ||
1318 | ||
1319 | sub JSON::PP::incr_skip { | |
1320 | ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_skip; | |
1321 | } | |
1322 | ||
1323 | ||
1324 | sub JSON::PP::incr_reset { | |
1325 | ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_reset; | |
1326 | } | |
1327 | ||
1328 | eval q{ | |
1329 | sub JSON::PP::incr_text : lvalue { | |
1330 | $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new; | |
1331 | ||
dec273dc | 1332 | if ( $_[0]->{_incr_parser}->{incr_pos} ) { |
a1e5c561 | 1333 | Carp::croak("incr_text cannot be called when the incremental parser already started parsing"); |
d5424315 DG |
1334 | } |
1335 | $_[0]->{_incr_parser}->{incr_text}; | |
1336 | } | |
1337 | } if ( $] >= 5.006 ); | |
1338 | ||
1339 | } # Setup for various Perl versions (the code from JSON::PP58) | |
1340 | ||
1341 | ||
1342 | ############################### | |
1343 | # Utilities | |
1344 | # | |
1345 | ||
1346 | BEGIN { | |
1347 | eval 'require Scalar::Util'; | |
1348 | unless($@){ | |
1349 | *JSON::PP::blessed = \&Scalar::Util::blessed; | |
1350 | *JSON::PP::reftype = \&Scalar::Util::reftype; | |
05bd2440 | 1351 | *JSON::PP::refaddr = \&Scalar::Util::refaddr; |
d5424315 | 1352 | } |
dec273dc | 1353 | else{ # This code is from Scalar::Util. |
d5424315 DG |
1354 | # warn $@; |
1355 | eval 'sub UNIVERSAL::a_sub_not_likely_to_be_here { ref($_[0]) }'; | |
1356 | *JSON::PP::blessed = sub { | |
1357 | local($@, $SIG{__DIE__}, $SIG{__WARN__}); | |
1358 | ref($_[0]) ? eval { $_[0]->a_sub_not_likely_to_be_here } : undef; | |
1359 | }; | |
dec273dc | 1360 | require B; |
d5424315 DG |
1361 | my %tmap = qw( |
1362 | B::NULL SCALAR | |
1363 | B::HV HASH | |
1364 | B::AV ARRAY | |
1365 | B::CV CODE | |
1366 | B::IO IO | |
1367 | B::GV GLOB | |
1368 | B::REGEXP REGEXP | |
1369 | ); | |
1370 | *JSON::PP::reftype = sub { | |
1371 | my $r = shift; | |
1372 | ||
1373 | return undef unless length(ref($r)); | |
1374 | ||
1375 | my $t = ref(B::svref_2object($r)); | |
1376 | ||
1377 | return | |
1378 | exists $tmap{$t} ? $tmap{$t} | |
1379 | : length(ref($$r)) ? 'REF' | |
1380 | : 'SCALAR'; | |
1381 | }; | |
05bd2440 CBW |
1382 | *JSON::PP::refaddr = sub { |
1383 | return undef unless length(ref($_[0])); | |
1384 | ||
1385 | my $addr; | |
1386 | if(defined(my $pkg = blessed($_[0]))) { | |
1387 | $addr .= bless $_[0], 'Scalar::Util::Fake'; | |
1388 | bless $_[0], $pkg; | |
1389 | } | |
1390 | else { | |
1391 | $addr .= $_[0] | |
1392 | } | |
1393 | ||
1394 | $addr =~ /0x(\w+)/; | |
1395 | local $^W; | |
1396 | #no warnings 'portable'; | |
1397 | hex($1); | |
1398 | } | |
d5424315 DG |
1399 | } |
1400 | } | |
1401 | ||
1402 | ||
a1e5c561 | 1403 | # shamelessly copied and modified from JSON::XS code. |
d5424315 DG |
1404 | |
1405 | $JSON::PP::true = do { bless \(my $dummy = 1), "JSON::PP::Boolean" }; | |
1406 | $JSON::PP::false = do { bless \(my $dummy = 0), "JSON::PP::Boolean" }; | |
1407 | ||
9c399ae2 | 1408 | sub is_bool { blessed $_[0] and $_[0]->isa("JSON::PP::Boolean"); } |
d5424315 DG |
1409 | |
1410 | sub true { $JSON::PP::true } | |
1411 | sub false { $JSON::PP::false } | |
1412 | sub null { undef; } | |
1413 | ||
1414 | ############################### | |
1415 | ||
d5424315 DG |
1416 | package JSON::PP::IncrParser; |
1417 | ||
1418 | use strict; | |
1419 | ||
1420 | use constant INCR_M_WS => 0; # initial whitespace skipping | |
1421 | use constant INCR_M_STR => 1; # inside string | |
1422 | use constant INCR_M_BS => 2; # inside backslash | |
1423 | use constant INCR_M_JSON => 3; # outside anything, count nesting | |
1424 | use constant INCR_M_C0 => 4; | |
1425 | use constant INCR_M_C1 => 5; | |
1426 | ||
1427 | $JSON::PP::IncrParser::VERSION = '1.01'; | |
1428 | ||
d5424315 DG |
1429 | sub new { |
1430 | my ( $class ) = @_; | |
1431 | ||
1432 | bless { | |
1433 | incr_nest => 0, | |
1434 | incr_text => undef, | |
dec273dc CBW |
1435 | incr_pos => 0, |
1436 | incr_mode => 0, | |
d5424315 DG |
1437 | }, $class; |
1438 | } | |
1439 | ||
1440 | ||
1441 | sub incr_parse { | |
1442 | my ( $self, $coder, $text ) = @_; | |
1443 | ||
1444 | $self->{incr_text} = '' unless ( defined $self->{incr_text} ); | |
1445 | ||
1446 | if ( defined $text ) { | |
1447 | if ( utf8::is_utf8( $text ) and !utf8::is_utf8( $self->{incr_text} ) ) { | |
1448 | utf8::upgrade( $self->{incr_text} ) ; | |
1449 | utf8::decode( $self->{incr_text} ) ; | |
1450 | } | |
1451 | $self->{incr_text} .= $text; | |
1452 | } | |
1453 | ||
d5424315 | 1454 | if ( defined wantarray ) { |
dec273dc CBW |
1455 | my $max_size = $coder->get_max_size; |
1456 | my $p = $self->{incr_pos}; | |
1457 | my @ret; | |
1458 | { | |
d5424315 | 1459 | do { |
dec273dc CBW |
1460 | unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) { |
1461 | $self->_incr_parse( $coder ); | |
d5424315 | 1462 | |
dec273dc CBW |
1463 | if ( $max_size and $self->{incr_pos} > $max_size ) { |
1464 | Carp::croak("attempted decode of JSON text of $self->{incr_pos} bytes size, but max_size is set to $max_size"); | |
1465 | } | |
1466 | unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) { | |
1467 | # as an optimisation, do not accumulate white space in the incr buffer | |
1468 | if ( $self->{incr_mode} == INCR_M_WS and $self->{incr_pos} ) { | |
1469 | $self->{incr_pos} = 0; | |
1470 | $self->{incr_text} = ''; | |
1471 | } | |
1472 | last; | |
1473 | } | |
d5424315 DG |
1474 | } |
1475 | ||
dec273dc CBW |
1476 | my ($obj, $offset) = $coder->PP_decode_json( $self->{incr_text}, 0x00000001 ); |
1477 | push @ret, $obj; | |
1478 | use bytes; | |
1479 | $self->{incr_text} = substr( $self->{incr_text}, $offset || 0 ); | |
1480 | $self->{incr_pos} = 0; | |
1481 | $self->{incr_nest} = 0; | |
1482 | $self->{incr_mode} = 0; | |
1483 | last unless wantarray; | |
1484 | } while ( wantarray ); | |
1485 | } | |
d5424315 | 1486 | |
dec273dc | 1487 | if ( wantarray ) { |
d5424315 DG |
1488 | return @ret; |
1489 | } | |
1490 | else { # in scalar context | |
dec273dc | 1491 | return $ret[0] ? $ret[0] : undef; |
d5424315 | 1492 | } |
d5424315 | 1493 | } |
d5424315 DG |
1494 | } |
1495 | ||
1496 | ||
1497 | sub _incr_parse { | |
dec273dc CBW |
1498 | my ($self, $coder) = @_; |
1499 | my $text = $self->{incr_text}; | |
d5424315 | 1500 | my $len = length $text; |
dec273dc | 1501 | my $p = $self->{incr_pos}; |
d5424315 | 1502 | |
dec273dc | 1503 | INCR_PARSE: |
d5424315 | 1504 | while ( $len > $p ) { |
dec273dc CBW |
1505 | my $s = substr( $text, $p, 1 ); |
1506 | last INCR_PARSE unless defined $s; | |
1507 | my $mode = $self->{incr_mode}; | |
1508 | ||
1509 | if ( $mode == INCR_M_WS ) { | |
1510 | while ( $len > $p ) { | |
1511 | $s = substr( $text, $p, 1 ); | |
1512 | last INCR_PARSE unless defined $s; | |
1513 | if ( ord($s) > 0x20 ) { | |
1514 | if ( $s eq '#' ) { | |
1515 | $self->{incr_mode} = INCR_M_C0; | |
1516 | redo INCR_PARSE; | |
1517 | } else { | |
1518 | $self->{incr_mode} = INCR_M_JSON; | |
1519 | redo INCR_PARSE; | |
1520 | } | |
1521 | } | |
1522 | $p++; | |
d5424315 | 1523 | } |
dec273dc CBW |
1524 | } elsif ( $mode == INCR_M_BS ) { |
1525 | $p++; | |
1526 | $self->{incr_mode} = INCR_M_STR; | |
1527 | redo INCR_PARSE; | |
1528 | } elsif ( $mode == INCR_M_C0 or $mode == INCR_M_C1 ) { | |
1529 | while ( $len > $p ) { | |
1530 | $s = substr( $text, $p, 1 ); | |
1531 | last INCR_PARSE unless defined $s; | |
1532 | if ( $s eq "\n" ) { | |
1533 | $self->{incr_mode} = $self->{incr_mode} == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON; | |
d5424315 DG |
1534 | last; |
1535 | } | |
dec273dc | 1536 | $p++; |
d5424315 | 1537 | } |
dec273dc CBW |
1538 | next; |
1539 | } elsif ( $mode == INCR_M_STR ) { | |
1540 | while ( $len > $p ) { | |
1541 | $s = substr( $text, $p, 1 ); | |
1542 | last INCR_PARSE unless defined $s; | |
1543 | if ( $s eq '"' ) { | |
1544 | $p++; | |
1545 | $self->{incr_mode} = INCR_M_JSON; | |
1546 | ||
1547 | last INCR_PARSE unless $self->{incr_nest}; | |
1548 | redo INCR_PARSE; | |
d5424315 | 1549 | } |
dec273dc CBW |
1550 | elsif ( $s eq '\\' ) { |
1551 | $p++; | |
1552 | if ( !defined substr($text, $p, 1) ) { | |
1553 | $self->{incr_mode} = INCR_M_BS; | |
1554 | last INCR_PARSE; | |
1555 | } | |
1556 | } | |
1557 | $p++; | |
d5424315 | 1558 | } |
dec273dc CBW |
1559 | } elsif ( $mode == INCR_M_JSON ) { |
1560 | while ( $len > $p ) { | |
1561 | $s = substr( $text, $p++, 1 ); | |
1562 | if ( $s eq "\x00" ) { | |
1563 | $p--; | |
1564 | last INCR_PARSE; | |
1565 | } elsif ( $s eq "\x09" or $s eq "\x0a" or $s eq "\x0d" or $s eq "\x20" ) { | |
1566 | if ( !$self->{incr_nest} ) { | |
1567 | $p--; # do not eat the whitespace, let the next round do it | |
1568 | last INCR_PARSE; | |
1569 | } | |
1570 | next; | |
1571 | } elsif ( $s eq '"' ) { | |
1572 | $self->{incr_mode} = INCR_M_STR; | |
1573 | redo INCR_PARSE; | |
1574 | } elsif ( $s eq '[' or $s eq '{' ) { | |
1575 | if ( ++$self->{incr_nest} > $coder->get_max_depth ) { | |
1576 | Carp::croak('json text or perl structure exceeds maximum nesting level (max_depth set too low?)'); | |
1577 | } | |
1578 | next; | |
1579 | } elsif ( $s eq ']' or $s eq '}' ) { | |
1580 | if ( --$self->{incr_nest} <= 0 ) { | |
1581 | last INCR_PARSE; | |
1582 | } | |
1583 | } elsif ( $s eq '#' ) { | |
1584 | $self->{incr_mode} = INCR_M_C1; | |
1585 | redo INCR_PARSE; | |
d5424315 DG |
1586 | } |
1587 | } | |
d5424315 | 1588 | } |
d5424315 DG |
1589 | } |
1590 | ||
dec273dc CBW |
1591 | $self->{incr_pos} = $p; |
1592 | $self->{incr_parsing} = $p ? 1 : 0; # for backward compatibility | |
d5424315 DG |
1593 | } |
1594 | ||
1595 | ||
1596 | sub incr_text { | |
dec273dc | 1597 | if ( $_[0]->{incr_pos} ) { |
a1e5c561 | 1598 | Carp::croak("incr_text cannot be called when the incremental parser already started parsing"); |
d5424315 DG |
1599 | } |
1600 | $_[0]->{incr_text}; | |
1601 | } | |
1602 | ||
1603 | ||
1604 | sub incr_skip { | |
1605 | my $self = shift; | |
dec273dc CBW |
1606 | $self->{incr_text} = substr( $self->{incr_text}, $self->{incr_pos} ); |
1607 | $self->{incr_pos} = 0; | |
1608 | $self->{incr_mode} = 0; | |
1609 | $self->{incr_nest} = 0; | |
d5424315 DG |
1610 | } |
1611 | ||
1612 | ||
1613 | sub incr_reset { | |
1614 | my $self = shift; | |
1615 | $self->{incr_text} = undef; | |
dec273dc | 1616 | $self->{incr_pos} = 0; |
d5424315 DG |
1617 | $self->{incr_mode} = 0; |
1618 | $self->{incr_nest} = 0; | |
d5424315 DG |
1619 | } |
1620 | ||
1621 | ############################### | |
1622 | ||
1623 | ||
1624 | 1; | |
1625 | __END__ | |
1626 | =pod | |
1627 | ||
1628 | =head1 NAME | |
1629 | ||
1630 | JSON::PP - JSON::XS compatible pure-Perl module. | |
1631 | ||
1632 | =head1 SYNOPSIS | |
1633 | ||
1634 | use JSON::PP; | |
1635 | ||
1636 | # exported functions, they croak on error | |
1637 | # and expect/generate UTF-8 | |
1638 | ||
1639 | $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref; | |
1640 | $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text; | |
1641 | ||
1642 | # OO-interface | |
1643 | ||
dec273dc | 1644 | $json = JSON::PP->new->ascii->pretty->allow_nonref; |
d5424315 | 1645 | |
dec273dc | 1646 | $pretty_printed_json_text = $json->encode( $perl_scalar ); |
d5424315 DG |
1647 | $perl_scalar = $json->decode( $json_text ); |
1648 | ||
d5424315 DG |
1649 | # Note that JSON version 2.0 and above will automatically use |
1650 | # JSON::XS or JSON::PP, so you should be able to just: | |
1651 | ||
1652 | use JSON; | |
1653 | ||
1654 | ||
1655 | =head1 VERSION | |
1656 | ||
9c399ae2 | 1657 | 2.97000 |
d5424315 DG |
1658 | |
1659 | =head1 DESCRIPTION | |
1660 | ||
dec273dc CBW |
1661 | JSON::PP is a pure perl JSON decoder/encoder (as of RFC4627, which |
1662 | we know is obsolete but we still stick to; see below for an option | |
1663 | to support part of RFC7159), and (almost) compatible to much | |
1664 | faster L<JSON::XS> written by Marc Lehmann in C. JSON::PP works as | |
1665 | a fallback module when you use L<JSON> module without having | |
1666 | installed JSON::XS. | |
1667 | ||
1668 | Because of this fallback feature of JSON.pm, JSON::PP tries not to | |
1669 | be more JavaScript-friendly than JSON::XS (i.e. not to escape extra | |
1670 | characters such as U+2028 and U+2029 nor support RFC7159/ECMA-404), | |
1671 | in order for you not to lose such JavaScript-friendliness silently | |
1672 | when you use JSON.pm and install JSON::XS for speed or by accident. | |
1673 | If you need JavaScript-friendly RFC7159-compliant pure perl module, | |
1674 | try L<JSON::Tiny>, which is derived from L<Mojolicious> web | |
1675 | framework and is also smaller and faster than JSON::PP. | |
1676 | ||
1677 | JSON::PP has been in the Perl core since Perl 5.14, mainly for | |
1678 | CPAN toolchain modules to parse META.json. | |
d5424315 DG |
1679 | |
1680 | =head1 FUNCTIONAL INTERFACE | |
1681 | ||
dec273dc CBW |
1682 | This section is taken from JSON::XS almost verbatim. C<encode_json> |
1683 | and C<decode_json> are exported by default. | |
d5424315 DG |
1684 | |
1685 | =head2 encode_json | |
1686 | ||
1687 | $json_text = encode_json $perl_scalar | |
1688 | ||
dec273dc CBW |
1689 | Converts the given Perl data structure to a UTF-8 encoded, binary string |
1690 | (that is, the string contains octets only). Croaks on error. | |
d5424315 DG |
1691 | |
1692 | This function call is functionally identical to: | |
1693 | ||
1694 | $json_text = JSON::PP->new->utf8->encode($perl_scalar) | |
1695 | ||
dec273dc CBW |
1696 | Except being faster. |
1697 | ||
d5424315 DG |
1698 | =head2 decode_json |
1699 | ||
1700 | $perl_scalar = decode_json $json_text | |
1701 | ||
1702 | The opposite of C<encode_json>: expects an UTF-8 (binary) string and tries | |
1703 | to parse that as an UTF-8 encoded JSON text, returning the resulting | |
dec273dc | 1704 | reference. Croaks on error. |
d5424315 DG |
1705 | |
1706 | This function call is functionally identical to: | |
1707 | ||
1708 | $perl_scalar = JSON::PP->new->utf8->decode($json_text) | |
1709 | ||
dec273dc CBW |
1710 | Except being faster. |
1711 | ||
d5424315 DG |
1712 | =head2 JSON::PP::is_bool |
1713 | ||
1714 | $is_boolean = JSON::PP::is_bool($scalar) | |
1715 | ||
1716 | Returns true if the passed scalar represents either JSON::PP::true or | |
1717 | JSON::PP::false, two constants that act like C<1> and C<0> respectively | |
1718 | and are also used to represent JSON C<true> and C<false> in Perl strings. | |
1719 | ||
d5424315 DG |
1720 | See L<MAPPING>, below, for more information on how JSON values are mapped to |
1721 | Perl. | |
1722 | ||
dec273dc | 1723 | =head1 OBJECT-ORIENTED INTERFACE |
d5424315 | 1724 | |
dec273dc | 1725 | This section is also taken from JSON::XS. |
d5424315 | 1726 | |
dec273dc CBW |
1727 | The object oriented interface lets you configure your own encoding or |
1728 | decoding style, within the limits of supported formats. | |
d5424315 DG |
1729 | |
1730 | =head2 new | |
1731 | ||
7942a65c | 1732 | $json = JSON::PP->new |
d5424315 | 1733 | |
dec273dc CBW |
1734 | Creates a new JSON::PP object that can be used to de/encode JSON |
1735 | strings. All boolean flags described below are by default I<disabled>. | |
d5424315 | 1736 | |
dec273dc | 1737 | The mutators for flags all return the JSON::PP object again and thus calls can |
d5424315 DG |
1738 | be chained: |
1739 | ||
1740 | my $json = JSON::PP->new->utf8->space_after->encode({a => [1,2]}) | |
1741 | => {"a": [1, 2]} | |
1742 | ||
1743 | =head2 ascii | |
1744 | ||
1745 | $json = $json->ascii([$enable]) | |
1746 | ||
1747 | $enabled = $json->get_ascii | |
1748 | ||
dec273dc CBW |
1749 | If C<$enable> is true (or missing), then the C<encode> method will not |
1750 | generate characters outside the code range C<0..127> (which is ASCII). Any | |
1751 | Unicode characters outside that range will be escaped using either a | |
1752 | single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL escape sequence, | |
1753 | as per RFC4627. The resulting encoded JSON text can be treated as a native | |
1754 | Unicode string, an ascii-encoded, latin1-encoded or UTF-8 encoded string, | |
1755 | or any other superset of ASCII. | |
1756 | ||
1757 | If C<$enable> is false, then the C<encode> method will not escape Unicode | |
1758 | characters unless required by the JSON syntax or other flags. This results | |
1759 | in a faster and more compact format. | |
d5424315 | 1760 | |
dec273dc | 1761 | See also the section I<ENCODING/CODESET FLAG NOTES> later in this document. |
d5424315 | 1762 | |
dec273dc CBW |
1763 | The main use for this flag is to produce JSON texts that can be |
1764 | transmitted over a 7-bit channel, as the encoded JSON texts will not | |
1765 | contain any 8 bit characters. | |
d5424315 DG |
1766 | |
1767 | JSON::PP->new->ascii(1)->encode([chr 0x10401]) | |
1768 | => ["\ud801\udc01"] | |
1769 | ||
1770 | =head2 latin1 | |
1771 | ||
1772 | $json = $json->latin1([$enable]) | |
1773 | ||
1774 | $enabled = $json->get_latin1 | |
1775 | ||
dec273dc CBW |
1776 | If C<$enable> is true (or missing), then the C<encode> method will encode |
1777 | the resulting JSON text as latin1 (or iso-8859-1), escaping any characters | |
1778 | outside the code range C<0..255>. The resulting string can be treated as a | |
1779 | latin1-encoded JSON text or a native Unicode string. The C<decode> method | |
1780 | will not be affected in any way by this flag, as C<decode> by default | |
1781 | expects Unicode, which is a strict superset of latin1. | |
d5424315 | 1782 | |
dec273dc CBW |
1783 | If C<$enable> is false, then the C<encode> method will not escape Unicode |
1784 | characters unless required by the JSON syntax or other flags. | |
d5424315 | 1785 | |
dec273dc CBW |
1786 | See also the section I<ENCODING/CODESET FLAG NOTES> later in this document. |
1787 | ||
1788 | The main use for this flag is efficiently encoding binary data as JSON | |
1789 | text, as most octets will not be escaped, resulting in a smaller encoded | |
1790 | size. The disadvantage is that the resulting JSON text is encoded | |
1791 | in latin1 (and must correctly be treated as such when storing and | |
1792 | transferring), a rare encoding for JSON. It is therefore most useful when | |
1793 | you want to store data structures known to contain binary data efficiently | |
1794 | in files or databases, not when talking to other JSON encoders/decoders. | |
d5424315 | 1795 | |
dec273dc CBW |
1796 | JSON::PP->new->latin1->encode (["\x{89}\x{abc}"] |
1797 | => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not) | |
d5424315 DG |
1798 | |
1799 | =head2 utf8 | |
1800 | ||
1801 | $json = $json->utf8([$enable]) | |
1802 | ||
1803 | $enabled = $json->get_utf8 | |
1804 | ||
dec273dc CBW |
1805 | If C<$enable> is true (or missing), then the C<encode> method will encode |
1806 | the JSON result into UTF-8, as required by many protocols, while the | |
1807 | C<decode> method expects to be handled an UTF-8-encoded string. Please | |
1808 | note that UTF-8-encoded strings do not contain any characters outside the | |
1809 | range C<0..255>, they are thus useful for bytewise/binary I/O. In future | |
1810 | versions, enabling this option might enable autodetection of the UTF-16 | |
1811 | and UTF-32 encoding families, as described in RFC4627. | |
d5424315 | 1812 | |
dec273dc CBW |
1813 | If C<$enable> is false, then the C<encode> method will return the JSON |
1814 | string as a (non-encoded) Unicode string, while C<decode> expects thus a | |
1815 | Unicode string. Any decoding or encoding (e.g. to UTF-8 or UTF-16) needs | |
1816 | to be done yourself, e.g. using the Encode module. | |
d5424315 | 1817 | |
dec273dc | 1818 | See also the section I<ENCODING/CODESET FLAG NOTES> later in this document. |
d5424315 DG |
1819 | |
1820 | Example, output UTF-16BE-encoded JSON: | |
1821 | ||
1822 | use Encode; | |
1823 | $jsontext = encode "UTF-16BE", JSON::PP->new->encode ($object); | |
1824 | ||
1825 | Example, decode UTF-32LE-encoded JSON: | |
1826 | ||
1827 | use Encode; | |
1828 | $object = JSON::PP->new->decode (decode "UTF-32LE", $jsontext); | |
1829 | ||
d5424315 DG |
1830 | =head2 pretty |
1831 | ||
1832 | $json = $json->pretty([$enable]) | |
1833 | ||
1834 | This enables (or disables) all of the C<indent>, C<space_before> and | |
dec273dc CBW |
1835 | C<space_after> (and in the future possibly more) flags in one call to |
1836 | generate the most readable (or most compact) form possible. | |
d5424315 DG |
1837 | |
1838 | =head2 indent | |
1839 | ||
1840 | $json = $json->indent([$enable]) | |
1841 | ||
1842 | $enabled = $json->get_indent | |
1843 | ||
dec273dc CBW |
1844 | If C<$enable> is true (or missing), then the C<encode> method will use a multiline |
1845 | format as output, putting every array member or object/hash key-value pair | |
1846 | into its own line, indenting them properly. | |
1847 | ||
1848 | If C<$enable> is false, no newlines or indenting will be produced, and the | |
1849 | resulting JSON text is guaranteed not to contain any C<newlines>. | |
1850 | ||
1851 | This setting has no effect when decoding JSON texts. | |
1852 | ||
d5424315 DG |
1853 | The default indent space length is three. |
1854 | You can use C<indent_length> to change the length. | |
1855 | ||
1856 | =head2 space_before | |
1857 | ||
1858 | $json = $json->space_before([$enable]) | |
1859 | ||
1860 | $enabled = $json->get_space_before | |
1861 | ||
1862 | If C<$enable> is true (or missing), then the C<encode> method will add an extra | |
1863 | optional space before the C<:> separating keys from values in JSON objects. | |
1864 | ||
1865 | If C<$enable> is false, then the C<encode> method will not add any extra | |
1866 | space at those places. | |
1867 | ||
dec273dc CBW |
1868 | This setting has no effect when decoding JSON texts. You will also |
1869 | most likely combine this setting with C<space_after>. | |
d5424315 DG |
1870 | |
1871 | Example, space_before enabled, space_after and indent disabled: | |
1872 | ||
1873 | {"key" :"value"} | |
1874 | ||
1875 | =head2 space_after | |
1876 | ||
1877 | $json = $json->space_after([$enable]) | |
1878 | ||
1879 | $enabled = $json->get_space_after | |
1880 | ||
1881 | If C<$enable> is true (or missing), then the C<encode> method will add an extra | |
1882 | optional space after the C<:> separating keys from values in JSON objects | |
1883 | and extra whitespace after the C<,> separating key-value pairs and array | |
1884 | members. | |
1885 | ||
1886 | If C<$enable> is false, then the C<encode> method will not add any extra | |
1887 | space at those places. | |
1888 | ||
1889 | This setting has no effect when decoding JSON texts. | |
1890 | ||
1891 | Example, space_before and indent disabled, space_after enabled: | |
1892 | ||
1893 | {"key": "value"} | |
1894 | ||
1895 | =head2 relaxed | |
1896 | ||
1897 | $json = $json->relaxed([$enable]) | |
1898 | ||
1899 | $enabled = $json->get_relaxed | |
1900 | ||
1901 | If C<$enable> is true (or missing), then C<decode> will accept some | |
1902 | extensions to normal JSON syntax (see below). C<encode> will not be | |
1903 | affected in anyway. I<Be aware that this option makes you accept invalid | |
1904 | JSON texts as if they were valid!>. I suggest only to use this option to | |
1905 | parse application-specific files written by humans (configuration files, | |
1906 | resource files etc.) | |
1907 | ||
1908 | If C<$enable> is false (the default), then C<decode> will only accept | |
1909 | valid JSON texts. | |
1910 | ||
1911 | Currently accepted extensions are: | |
1912 | ||
1913 | =over 4 | |
1914 | ||
1915 | =item * list items can have an end-comma | |
1916 | ||
1917 | JSON I<separates> array elements and key-value pairs with commas. This | |
1918 | can be annoying if you write JSON texts manually and want to be able to | |
1919 | quickly append elements, so this extension accepts comma at the end of | |
1920 | such items not just between them: | |
1921 | ||
1922 | [ | |
1923 | 1, | |
1924 | 2, <- this comma not normally allowed | |
1925 | ] | |
1926 | { | |
1927 | "k1": "v1", | |
1928 | "k2": "v2", <- this comma not normally allowed | |
1929 | } | |
1930 | ||
1931 | =item * shell-style '#'-comments | |
1932 | ||
1933 | Whenever JSON allows whitespace, shell-style comments are additionally | |
1934 | allowed. They are terminated by the first carriage-return or line-feed | |
1935 | character, after which more white-space and comments are allowed. | |
1936 | ||
1937 | [ | |
1938 | 1, # this comment not allowed in JSON | |
1939 | # neither this one... | |
1940 | ] | |
1941 | ||
dec273dc CBW |
1942 | =item * C-style multiple-line '/* */'-comments (JSON::PP only) |
1943 | ||
1944 | Whenever JSON allows whitespace, C-style multiple-line comments are additionally | |
1945 | allowed. Everything between C</*> and C<*/> is a comment, after which | |
1946 | more white-space and comments are allowed. | |
1947 | ||
1948 | [ | |
1949 | 1, /* this comment not allowed in JSON */ | |
1950 | /* neither this one... */ | |
1951 | ] | |
1952 | ||
1953 | =item * C++-style one-line '//'-comments (JSON::PP only) | |
1954 | ||
1955 | Whenever JSON allows whitespace, C++-style one-line comments are additionally | |
1956 | allowed. They are terminated by the first carriage-return or line-feed | |
1957 | character, after which more white-space and comments are allowed. | |
1958 | ||
1959 | [ | |
1960 | 1, // this comment not allowed in JSON | |
1961 | // neither this one... | |
1962 | ] | |
1963 | ||
d5424315 DG |
1964 | =back |
1965 | ||
1966 | =head2 canonical | |
1967 | ||
1968 | $json = $json->canonical([$enable]) | |
1969 | ||
1970 | $enabled = $json->get_canonical | |
1971 | ||
1972 | If C<$enable> is true (or missing), then the C<encode> method will output JSON objects | |
1973 | by sorting their keys. This is adding a comparatively high overhead. | |
1974 | ||
1975 | If C<$enable> is false, then the C<encode> method will output key-value | |
1976 | pairs in the order Perl stores them (which will likely change between runs | |
dec273dc CBW |
1977 | of the same script, and can change even within the same run from 5.18 |
1978 | onwards). | |
d5424315 DG |
1979 | |
1980 | This option is useful if you want the same data structure to be encoded as | |
1981 | the same JSON text (given the same overall settings). If it is disabled, | |
1982 | the same hash might be encoded differently even if contains the same data, | |
1983 | as key-value pairs have no inherent ordering in Perl. | |
1984 | ||
1985 | This setting has no effect when decoding JSON texts. | |
1986 | ||
dec273dc | 1987 | This setting has currently no effect on tied hashes. |
d5424315 DG |
1988 | |
1989 | =head2 allow_nonref | |
1990 | ||
1991 | $json = $json->allow_nonref([$enable]) | |
1992 | ||
1993 | $enabled = $json->get_allow_nonref | |
1994 | ||
1995 | If C<$enable> is true (or missing), then the C<encode> method can convert a | |
1996 | non-reference into its corresponding string, number or null JSON value, | |
1997 | which is an extension to RFC4627. Likewise, C<decode> will accept those JSON | |
1998 | values instead of croaking. | |
1999 | ||
2000 | If C<$enable> is false, then the C<encode> method will croak if it isn't | |
2001 | passed an arrayref or hashref, as JSON texts must either be an object | |
2002 | or array. Likewise, C<decode> will croak if given something that is not a | |
2003 | JSON object or array. | |
2004 | ||
dec273dc CBW |
2005 | Example, encode a Perl scalar as JSON value with enabled C<allow_nonref>, |
2006 | resulting in an invalid JSON text: | |
2007 | ||
d5424315 DG |
2008 | JSON::PP->new->allow_nonref->encode ("Hello, World!") |
2009 | => "Hello, World!" | |
2010 | ||
2011 | =head2 allow_unknown | |
2012 | ||
2013 | $json = $json->allow_unknown ([$enable]) | |
2014 | ||
2015 | $enabled = $json->get_allow_unknown | |
2016 | ||
dec273dc | 2017 | If C<$enable> is true (or missing), then C<encode> will I<not> throw an |
d5424315 | 2018 | exception when it encounters values it cannot represent in JSON (for |
dec273dc CBW |
2019 | example, filehandles) but instead will encode a JSON C<null> value. Note |
2020 | that blessed objects are not included here and are handled separately by | |
2021 | c<allow_blessed>. | |
d5424315 | 2022 | |
dec273dc | 2023 | If C<$enable> is false (the default), then C<encode> will throw an |
d5424315 DG |
2024 | exception when it encounters anything it cannot encode as JSON. |
2025 | ||
dec273dc CBW |
2026 | This option does not affect C<decode> in any way, and it is recommended to |
2027 | leave it off unless you know your communications partner. | |
d5424315 DG |
2028 | |
2029 | =head2 allow_blessed | |
2030 | ||
2031 | $json = $json->allow_blessed([$enable]) | |
2032 | ||
2033 | $enabled = $json->get_allow_blessed | |
2034 | ||
dec273dc CBW |
2035 | See L<OBJECT SERIALISATION> for details. |
2036 | ||
d5424315 | 2037 | If C<$enable> is true (or missing), then the C<encode> method will not |
dec273dc CBW |
2038 | barf when it encounters a blessed reference that it cannot convert |
2039 | otherwise. Instead, a JSON C<null> value is encoded instead of the object. | |
d5424315 DG |
2040 | |
2041 | If C<$enable> is false (the default), then C<encode> will throw an | |
dec273dc CBW |
2042 | exception when it encounters a blessed object that it cannot convert |
2043 | otherwise. | |
2044 | ||
2045 | This setting has no effect on C<decode>. | |
d5424315 DG |
2046 | |
2047 | =head2 convert_blessed | |
2048 | ||
2049 | $json = $json->convert_blessed([$enable]) | |
2050 | ||
2051 | $enabled = $json->get_convert_blessed | |
2052 | ||
dec273dc CBW |
2053 | See L<OBJECT SERIALISATION> for details. |
2054 | ||
d5424315 DG |
2055 | If C<$enable> is true (or missing), then C<encode>, upon encountering a |
2056 | blessed object, will check for the availability of the C<TO_JSON> method | |
dec273dc CBW |
2057 | on the object's class. If found, it will be called in scalar context and |
2058 | the resulting scalar will be encoded instead of the object. | |
d5424315 DG |
2059 | |
2060 | The C<TO_JSON> method may safely call die if it wants. If C<TO_JSON> | |
2061 | returns other blessed objects, those will be handled in the same | |
2062 | way. C<TO_JSON> must take care of not causing an endless recursion cycle | |
2063 | (== crash) in this case. The name of C<TO_JSON> was chosen because other | |
2064 | methods called by the Perl core (== not by the user of the object) are | |
dec273dc | 2065 | usually in upper case letters and to avoid collisions with any C<to_json> |
d5424315 DG |
2066 | function or method. |
2067 | ||
dec273dc CBW |
2068 | If C<$enable> is false (the default), then C<encode> will not consider |
2069 | this type of conversion. | |
d5424315 | 2070 | |
dec273dc | 2071 | This setting has no effect on C<decode>. |
d5424315 DG |
2072 | |
2073 | =head2 filter_json_object | |
2074 | ||
2075 | $json = $json->filter_json_object([$coderef]) | |
2076 | ||
2077 | When C<$coderef> is specified, it will be called from C<decode> each | |
dec273dc CBW |
2078 | time it decodes a JSON object. The only argument is a reference to the |
2079 | newly-created hash. If the code references returns a single scalar (which | |
2080 | need not be a reference), this value (i.e. a copy of that scalar to avoid | |
2081 | aliasing) is inserted into the deserialised data structure. If it returns | |
2082 | an empty list (NOTE: I<not> C<undef>, which is a valid scalar), the | |
2083 | original deserialised hash will be inserted. This setting can slow down | |
2084 | decoding considerably. | |
d5424315 DG |
2085 | |
2086 | When C<$coderef> is omitted or undefined, any existing callback will | |
2087 | be removed and C<decode> will not change the deserialised hash in any | |
2088 | way. | |
2089 | ||
2090 | Example, convert all JSON objects into the integer 5: | |
2091 | ||
2092 | my $js = JSON::PP->new->filter_json_object (sub { 5 }); | |
2093 | # returns [5] | |
2094 | $js->decode ('[{}]'); # the given subroutine takes a hash reference. | |
2095 | # throw an exception because allow_nonref is not enabled | |
2096 | # so a lone 5 is not allowed. | |
2097 | $js->decode ('{"a":1, "b":2}'); | |
2098 | ||
2099 | =head2 filter_json_single_key_object | |
2100 | ||
2101 | $json = $json->filter_json_single_key_object($key [=> $coderef]) | |
2102 | ||
2103 | Works remotely similar to C<filter_json_object>, but is only called for | |
2104 | JSON objects having a single key named C<$key>. | |
2105 | ||
2106 | This C<$coderef> is called before the one specified via | |
2107 | C<filter_json_object>, if any. It gets passed the single value in the JSON | |
2108 | object. If it returns a single value, it will be inserted into the data | |
2109 | structure. If it returns nothing (not even C<undef> but the empty list), | |
2110 | the callback from C<filter_json_object> will be called next, as if no | |
2111 | single-key callback were specified. | |
2112 | ||
2113 | If C<$coderef> is omitted or undefined, the corresponding callback will be | |
2114 | disabled. There can only ever be one callback for a given key. | |
2115 | ||
2116 | As this callback gets called less often then the C<filter_json_object> | |
2117 | one, decoding speed will not usually suffer as much. Therefore, single-key | |
2118 | objects make excellent targets to serialise Perl objects into, especially | |
2119 | as single-key JSON objects are as close to the type-tagged value concept | |
2120 | as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not | |
2121 | support this in any way, so you need to make sure your data never looks | |
2122 | like a serialised Perl hash. | |
2123 | ||
2124 | Typical names for the single object key are C<__class_whatever__>, or | |
2125 | C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even | |
2126 | things like C<__class_md5sum(classname)__>, to reduce the risk of clashing | |
2127 | with real hashes. | |
2128 | ||
2129 | Example, decode JSON objects of the form C<< { "__widget__" => <id> } >> | |
2130 | into the corresponding C<< $WIDGET{<id>} >> object: | |
2131 | ||
2132 | # return whatever is in $WIDGET{5}: | |
2133 | JSON::PP | |
2134 | ->new | |
2135 | ->filter_json_single_key_object (__widget__ => sub { | |
2136 | $WIDGET{ $_[0] } | |
2137 | }) | |
2138 | ->decode ('{"__widget__": 5') | |
2139 | ||
2140 | # this can be used with a TO_JSON method in some "widget" class | |
2141 | # for serialisation to json: | |
2142 | sub WidgetBase::TO_JSON { | |
2143 | my ($self) = @_; | |
2144 | ||
2145 | unless ($self->{id}) { | |
2146 | $self->{id} = ..get..some..id..; | |
2147 | $WIDGET{$self->{id}} = $self; | |
2148 | } | |
2149 | ||
2150 | { __widget__ => $self->{id} } | |
2151 | } | |
2152 | ||
2153 | =head2 shrink | |
2154 | ||
2155 | $json = $json->shrink([$enable]) | |
2156 | ||
2157 | $enabled = $json->get_shrink | |
2158 | ||
dec273dc CBW |
2159 | If C<$enable> is true (or missing), the string returned by C<encode> will |
2160 | be shrunk (i.e. downgraded if possible). | |
d5424315 | 2161 | |
dec273dc CBW |
2162 | The actual definition of what shrink does might change in future versions, |
2163 | but it will always try to save space at the expense of time. | |
d5424315 | 2164 | |
dec273dc | 2165 | If C<$enable> is false, then JSON::PP does nothing. |
d5424315 DG |
2166 | |
2167 | =head2 max_depth | |
2168 | ||
2169 | $json = $json->max_depth([$maximum_nesting_depth]) | |
2170 | ||
2171 | $max_depth = $json->get_max_depth | |
2172 | ||
2173 | Sets the maximum nesting level (default C<512>) accepted while encoding | |
2174 | or decoding. If a higher nesting level is detected in JSON text or a Perl | |
2175 | data structure, then the encoder and decoder will stop and croak at that | |
2176 | point. | |
2177 | ||
2178 | Nesting level is defined by number of hash- or arrayrefs that the encoder | |
2179 | needs to traverse to reach a given point or the number of C<{> or C<[> | |
2180 | characters without their matching closing parenthesis crossed to reach a | |
2181 | given character in a string. | |
2182 | ||
dec273dc CBW |
2183 | Setting the maximum depth to one disallows any nesting, so that ensures |
2184 | that the object is only a single hash/object or array. | |
2185 | ||
d5424315 DG |
2186 | If no argument is given, the highest possible setting will be used, which |
2187 | is rarely useful. | |
2188 | ||
a1e5c561 | 2189 | See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful. |
d5424315 | 2190 | |
d5424315 DG |
2191 | =head2 max_size |
2192 | ||
2193 | $json = $json->max_size([$maximum_string_size]) | |
2194 | ||
2195 | $max_size = $json->get_max_size | |
2196 | ||
2197 | Set the maximum length a JSON text may have (in bytes) where decoding is | |
2198 | being attempted. The default is C<0>, meaning no limit. When C<decode> | |
2199 | is called on a string that is longer then this many bytes, it will not | |
2200 | attempt to decode the string but throw an exception. This setting has no | |
2201 | effect on C<encode> (yet). | |
2202 | ||
2203 | If no argument is given, the limit check will be deactivated (same as when | |
2204 | C<0> is specified). | |
2205 | ||
a1e5c561 | 2206 | See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful. |
d5424315 DG |
2207 | |
2208 | =head2 encode | |
2209 | ||
2210 | $json_text = $json->encode($perl_scalar) | |
2211 | ||
dec273dc CBW |
2212 | Converts the given Perl value or data structure to its JSON |
2213 | representation. Croaks on error. | |
d5424315 DG |
2214 | |
2215 | =head2 decode | |
2216 | ||
2217 | $perl_scalar = $json->decode($json_text) | |
2218 | ||
2219 | The opposite of C<encode>: expects a JSON text and tries to parse it, | |
2220 | returning the resulting simple scalar or reference. Croaks on error. | |
2221 | ||
d5424315 DG |
2222 | =head2 decode_prefix |
2223 | ||
2224 | ($perl_scalar, $characters) = $json->decode_prefix($json_text) | |
2225 | ||
2226 | This works like the C<decode> method, but instead of raising an exception | |
2227 | when there is trailing garbage after the first JSON object, it will | |
2228 | silently stop parsing there and return the number of characters consumed | |
2229 | so far. | |
2230 | ||
dec273dc CBW |
2231 | This is useful if your JSON texts are not delimited by an outer protocol |
2232 | and you need to know where the JSON text ends. | |
2233 | ||
2234 | JSON::PP->new->decode_prefix ("[1] the tail") | |
2235 | => ([1], 3) | |
2236 | ||
2237 | =head1 FLAGS FOR JSON::PP ONLY | |
2238 | ||
2239 | The following flags and properties are for JSON::PP only. If you use | |
2240 | any of these, you can't make your application run faster by replacing | |
2241 | JSON::PP with JSON::XS. If you need these and also speed boost, | |
2242 | try L<Cpanel::JSON::XS>, a fork of JSON::XS by Reini Urban, which | |
2243 | supports some of these. | |
2244 | ||
2245 | =head2 allow_singlequote | |
2246 | ||
2247 | $json = $json->allow_singlequote([$enable]) | |
2248 | $enabled = $json->get_allow_singlequote | |
2249 | ||
2250 | If C<$enable> is true (or missing), then C<decode> will accept | |
2251 | invalid JSON texts that contain strings that begin and end with | |
2252 | single quotation marks. C<encode> will not be affected in anyway. | |
2253 | I<Be aware that this option makes you accept invalid JSON texts | |
2254 | as if they were valid!>. I suggest only to use this option to | |
2255 | parse application-specific files written by humans (configuration | |
2256 | files, resource files etc.) | |
2257 | ||
2258 | If C<$enable> is false (the default), then C<decode> will only accept | |
2259 | valid JSON texts. | |
2260 | ||
2261 | $json->allow_singlequote->decode(qq|{"foo":'bar'}|); | |
2262 | $json->allow_singlequote->decode(qq|{'foo':"bar"}|); | |
2263 | $json->allow_singlequote->decode(qq|{'foo':'bar'}|); | |
2264 | ||
2265 | =head2 allow_barekey | |
2266 | ||
2267 | $json = $json->allow_barekey([$enable]) | |
2268 | $enabled = $json->get_allow_barekey | |
2269 | ||
2270 | If C<$enable> is true (or missing), then C<decode> will accept | |
2271 | invalid JSON texts that contain JSON objects whose names don't | |
2272 | begin and end with quotation marks. C<encode> will not be affected | |
2273 | in anyway. I<Be aware that this option makes you accept invalid JSON | |
2274 | texts as if they were valid!>. I suggest only to use this option to | |
2275 | parse application-specific files written by humans (configuration | |
2276 | files, resource files etc.) | |
2277 | ||
2278 | If C<$enable> is false (the default), then C<decode> will only accept | |
2279 | valid JSON texts. | |
2280 | ||
2281 | $json->allow_barekey->decode(qq|{foo:"bar"}|); | |
2282 | ||
2283 | =head2 allow_bignum | |
2284 | ||
2285 | $json = $json->allow_bignum([$enable]) | |
2286 | $enabled = $json->get_allow_bignum | |
2287 | ||
2288 | If C<$enable> is true (or missing), then C<decode> will convert | |
2289 | big integers Perl cannot handle as integer into L<Math::BigInt> | |
2290 | objects and convert floating numbers into L<Math::BigFloat> | |
2291 | objects. C<encode> will convert C<Math::BigInt> and C<Math::BigFloat> | |
2292 | objects into JSON numbers. | |
2293 | ||
2294 | $json->allow_nonref->allow_bignum; | |
2295 | $bigfloat = $json->decode('2.000000000000000000000000001'); | |
2296 | print $json->encode($bigfloat); | |
2297 | # => 2.000000000000000000000000001 | |
2298 | ||
2299 | See also L<MAPPING>. | |
2300 | ||
2301 | =head2 loose | |
2302 | ||
2303 | $json = $json->loose([$enable]) | |
2304 | $enabled = $json->get_loose | |
2305 | ||
2306 | If C<$enable> is true (or missing), then C<decode> will accept | |
2307 | invalid JSON texts that contain unescaped [\x00-\x1f\x22\x5c] | |
2308 | characters. C<encode> will not be affected in anyway. | |
2309 | I<Be aware that this option makes you accept invalid JSON texts | |
2310 | as if they were valid!>. I suggest only to use this option to | |
2311 | parse application-specific files written by humans (configuration | |
2312 | files, resource files etc.) | |
2313 | ||
2314 | If C<$enable> is false (the default), then C<decode> will only accept | |
2315 | valid JSON texts. | |
2316 | ||
2317 | $json->loose->decode(qq|["abc | |
2318 | def"]|); | |
2319 | ||
2320 | =head2 escape_slash | |
2321 | ||
2322 | $json = $json->escape_slash([$enable]) | |
2323 | $enabled = $json->get_escape_slash | |
2324 | ||
2325 | If C<$enable> is true (or missing), then C<encode> will explicitly | |
2326 | escape I<slash> (solidus; C<U+002F>) characters to reduce the risk of | |
2327 | XSS (cross site scripting) that may be caused by C<< </script> >> | |
2328 | in a JSON text, with the cost of bloating the size of JSON texts. | |
2329 | ||
2330 | This option may be useful when you embed JSON in HTML, but embedding | |
2331 | arbitrary JSON in HTML (by some HTML template toolkit or by string | |
2332 | interpolation) is risky in general. You must escape necessary | |
2333 | characters in correct order, depending on the context. | |
2334 | ||
2335 | C<decode> will not be affected in anyway. | |
2336 | ||
2337 | =head2 indent_length | |
2338 | ||
2339 | $json = $json->indent_length($number_of_spaces) | |
2340 | $length = $json->get_indent_length | |
2341 | ||
2342 | This option is only useful when you also enable C<indent> or C<pretty>. | |
2343 | ||
2344 | JSON::XS indents with three spaces when you C<encode> (if requested | |
2345 | by C<indent> or C<pretty>), and the number cannot be changed. | |
2346 | JSON::PP allows you to change/get the number of indent spaces with these | |
2347 | mutator/accessor. The default number of spaces is three (the same as | |
2348 | JSON::XS), and the acceptable range is from C<0> (no indentation; | |
2349 | it'd be better to disable indentation by C<indent(0)>) to C<15>. | |
2350 | ||
2351 | =head2 sort_by | |
2352 | ||
2353 | $json = $json->sort_by($code_ref) | |
2354 | $json = $json->sort_by($subroutine_name) | |
2355 | ||
2356 | If you just want to sort keys (names) in JSON objects when you | |
2357 | C<encode>, enable C<canonical> option (see above) that allows you to | |
2358 | sort object keys alphabetically. | |
2359 | ||
2360 | If you do need to sort non-alphabetically for whatever reasons, | |
2361 | you can give a code reference (or a subroutine name) to C<sort_by>, | |
2362 | then the argument will be passed to Perl's C<sort> built-in function. | |
2363 | ||
2364 | As the sorting is done in the JSON::PP scope, you usually need to | |
2365 | prepend C<JSON::PP::> to the subroutine name, and the special variables | |
2366 | C<$a> and C<$b> used in the subrontine used by C<sort> function. | |
2367 | ||
2368 | Example: | |
2369 | ||
2370 | my %ORDER = (id => 1, class => 2, name => 3); | |
2371 | $json->sort_by(sub { | |
2372 | ($ORDER{$JSON::PP::a} // 999) <=> ($ORDER{$JSON::PP::b} // 999) | |
2373 | or $JSON::PP::a cmp $JSON::PP::b | |
2374 | }); | |
2375 | print $json->encode([ | |
2376 | {name => 'CPAN', id => 1, href => 'http://cpan.org'} | |
2377 | ]); | |
2378 | # [{"id":1,"name":"CPAN","href":"http://cpan.org"}] | |
2379 | ||
2380 | Note that C<sort_by> affects all the plain hashes in the data structure. | |
2381 | If you need finer control, C<tie> necessary hashes with a module that | |
2382 | implements ordered hash (such as L<Hash::Ordered> and L<Tie::IxHash>). | |
2383 | C<canonical> and C<sort_by> don't affect the key order in C<tie>d | |
2384 | hashes. | |
2385 | ||
2386 | use Hash::Ordered; | |
2387 | tie my %hash, 'Hash::Ordered', | |
2388 | (name => 'CPAN', id => 1, href => 'http://cpan.org'); | |
2389 | print $json->encode([\%hash]); | |
2390 | # [{"name":"CPAN","id":1,"href":"http://cpan.org"}] # order is kept | |
d5424315 DG |
2391 | |
2392 | =head1 INCREMENTAL PARSING | |
2393 | ||
dec273dc | 2394 | This section is also taken from JSON::XS. |
d5424315 | 2395 | |
dec273dc CBW |
2396 | In some cases, there is the need for incremental parsing of JSON |
2397 | texts. While this module always has to keep both JSON text and resulting | |
2398 | Perl data structure in memory at one time, it does allow you to parse a | |
2399 | JSON stream incrementally. It does so by accumulating text until it has | |
2400 | a full JSON object, which it then can decode. This process is similar to | |
2401 | using C<decode_prefix> to see if a full JSON object is available, but | |
2402 | is much more efficient (and can be implemented with a minimum of method | |
2403 | calls). | |
d5424315 | 2404 | |
dec273dc | 2405 | JSON::PP will only attempt to parse the JSON text once it is sure it |
d5424315 DG |
2406 | has enough text to get a decisive result, using a very simple but |
2407 | truly incremental parser. This means that it sometimes won't stop as | |
dec273dc CBW |
2408 | early as the full parser, for example, it doesn't detect mismatched |
2409 | parentheses. The only thing it guarantees is that it starts decoding as | |
d5424315 DG |
2410 | soon as a syntactically valid JSON text has been seen. This means you need |
2411 | to set resource limits (e.g. C<max_size>) to ensure the parser will stop | |
2412 | parsing in the presence if syntax errors. | |
2413 | ||
2414 | The following methods implement this incremental parser. | |
2415 | ||
2416 | =head2 incr_parse | |
2417 | ||
2418 | $json->incr_parse( [$string] ) # void context | |
2419 | ||
2420 | $obj_or_undef = $json->incr_parse( [$string] ) # scalar context | |
2421 | ||
2422 | @obj_or_empty = $json->incr_parse( [$string] ) # list context | |
2423 | ||
2424 | This is the central parsing function. It can both append new text and | |
2425 | extract objects from the stream accumulated so far (both of these | |
2426 | functions are optional). | |
2427 | ||
2428 | If C<$string> is given, then this string is appended to the already | |
2429 | existing JSON fragment stored in the C<$json> object. | |
2430 | ||
2431 | After that, if the function is called in void context, it will simply | |
2432 | return without doing anything further. This can be used to add more text | |
2433 | in as many chunks as you want. | |
2434 | ||
2435 | If the method is called in scalar context, then it will try to extract | |
2436 | exactly I<one> JSON object. If that is successful, it will return this | |
2437 | object, otherwise it will return C<undef>. If there is a parse error, | |
2438 | this method will croak just as C<decode> would do (one can then use | |
a1e5c561 | 2439 | C<incr_skip> to skip the erroneous part). This is the most common way of |
d5424315 DG |
2440 | using the method. |
2441 | ||
2442 | And finally, in list context, it will try to extract as many objects | |
2443 | from the stream as it can find and return them, or the empty list | |
dec273dc CBW |
2444 | otherwise. For this to work, there must be no separators (other than |
2445 | whitespace) between the JSON objects or arrays, instead they must be | |
2446 | concatenated back-to-back. If an error occurs, an exception will be | |
2447 | raised as in the scalar context case. Note that in this case, any | |
2448 | previously-parsed JSON texts will be lost. | |
d5424315 | 2449 | |
dec273dc CBW |
2450 | Example: Parse some JSON arrays/objects in a given string and return |
2451 | them. | |
d5424315 | 2452 | |
dec273dc | 2453 | my @objs = JSON::PP->new->incr_parse ("[5][7][1,2]"); |
d5424315 DG |
2454 | |
2455 | =head2 incr_text | |
2456 | ||
2457 | $lvalue_string = $json->incr_text | |
2458 | ||
2459 | This method returns the currently stored JSON fragment as an lvalue, that | |
2460 | is, you can manipulate it. This I<only> works when a preceding call to | |
2461 | C<incr_parse> in I<scalar context> successfully returned an object. Under | |
2462 | all other circumstances you must not call this function (I mean it. | |
2463 | although in simple tests it might actually work, it I<will> fail under | |
2464 | real world conditions). As a special exception, you can also call this | |
2465 | method before having parsed anything. | |
2466 | ||
dec273dc CBW |
2467 | That means you can only use this function to look at or manipulate text |
2468 | before or after complete JSON objects, not while the parser is in the | |
2469 | middle of parsing a JSON object. | |
2470 | ||
d5424315 DG |
2471 | This function is useful in two cases: a) finding the trailing text after a |
2472 | JSON object or b) parsing multiple JSON objects separated by non-JSON text | |
2473 | (such as commas). | |
2474 | ||
d5424315 DG |
2475 | =head2 incr_skip |
2476 | ||
2477 | $json->incr_skip | |
2478 | ||
dec273dc CBW |
2479 | This will reset the state of the incremental parser and will remove |
2480 | the parsed text from the input buffer so far. This is useful after | |
2481 | C<incr_parse> died, in which case the input buffer and incremental parser | |
2482 | state is left unchanged, to skip the text parsed so far and to reset the | |
2483 | parse state. | |
2484 | ||
2485 | The difference to C<incr_reset> is that only text until the parse error | |
2486 | occurred is removed. | |
d5424315 DG |
2487 | |
2488 | =head2 incr_reset | |
2489 | ||
2490 | $json->incr_reset | |
2491 | ||
2492 | This completely resets the incremental parser, that is, after this call, | |
2493 | it will be as if the parser had never parsed anything. | |
2494 | ||
a1e5c561 | 2495 | This is useful if you want to repeatedly parse JSON objects and want to |
d5424315 DG |
2496 | ignore any trailing data, which means you have to reset the parser after |
2497 | each successful decode. | |
2498 | ||
d5424315 DG |
2499 | =head1 MAPPING |
2500 | ||
dec273dc CBW |
2501 | Most of this section is also taken from JSON::XS. |
2502 | ||
2503 | This section describes how JSON::PP maps Perl values to JSON values and | |
2504 | vice versa. These mappings are designed to "do the right thing" in most | |
2505 | circumstances automatically, preserving round-tripping characteristics | |
2506 | (what you put in comes out as something equivalent). | |
d5424315 | 2507 | |
dec273dc CBW |
2508 | For the more enlightened: note that in the following descriptions, |
2509 | lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl> | |
2510 | refers to the abstract Perl language itself. | |
d5424315 DG |
2511 | |
2512 | =head2 JSON -> PERL | |
2513 | ||
2514 | =over 4 | |
2515 | ||
2516 | =item object | |
2517 | ||
2518 | A JSON object becomes a reference to a hash in Perl. No ordering of object | |
dec273dc | 2519 | keys is preserved (JSON does not preserve object key ordering itself). |
d5424315 DG |
2520 | |
2521 | =item array | |
2522 | ||
2523 | A JSON array becomes a reference to an array in Perl. | |
2524 | ||
2525 | =item string | |
2526 | ||
2527 | A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON | |
2528 | are represented by the same codepoints in the Perl string, so no manual | |
2529 | decoding is necessary. | |
2530 | ||
2531 | =item number | |
2532 | ||
2533 | A JSON number becomes either an integer, numeric (floating point) or | |
2534 | string scalar in perl, depending on its range and any fractional parts. On | |
2535 | the Perl level, there is no difference between those as Perl handles all | |
2536 | the conversion details, but an integer may take slightly less memory and | |
2537 | might represent more values exactly than floating point numbers. | |
2538 | ||
dec273dc | 2539 | If the number consists of digits only, JSON::PP will try to represent |
d5424315 DG |
2540 | it as an integer value. If that fails, it will try to represent it as |
2541 | a numeric (floating point) value if that is possible without loss of | |
2542 | precision. Otherwise it will preserve the number as a string value (in | |
2543 | which case you lose roundtripping ability, as the JSON number will be | |
a1e5c561 | 2544 | re-encoded to a JSON string). |
d5424315 DG |
2545 | |
2546 | Numbers containing a fractional or exponential part will always be | |
2547 | represented as numeric (floating point) values, possibly at a loss of | |
2548 | precision (in which case you might lose perfect roundtripping ability, but | |
2549 | the JSON number will still be re-encoded as a JSON number). | |
2550 | ||
2551 | Note that precision is not accuracy - binary floating point values cannot | |
2552 | represent most decimal fractions exactly, and when converting from and to | |
dec273dc | 2553 | floating point, JSON::PP only guarantees precision up to but not including |
a1e5c561 | 2554 | the least significant bit. |
d5424315 | 2555 | |
dec273dc CBW |
2556 | When C<allow_bignum> is enabled, big integer values and any numeric |
2557 | values will be converted into L<Math::BigInt> and L<Math::BigFloat> | |
2558 | objects respectively, without becoming string scalars or losing | |
2559 | precision. | |
d5424315 DG |
2560 | |
2561 | =item true, false | |
2562 | ||
2563 | These JSON atoms become C<JSON::PP::true> and C<JSON::PP::false>, | |
2564 | respectively. They are overloaded to act almost exactly like the numbers | |
a1e5c561 | 2565 | C<1> and C<0>. You can check whether a scalar is a JSON boolean by using |
dec273dc | 2566 | the C<JSON::PP::is_bool> function. |
d5424315 DG |
2567 | |
2568 | =item null | |
2569 | ||
2570 | A JSON null atom becomes C<undef> in Perl. | |
2571 | ||
dec273dc CBW |
2572 | =item shell-style comments (C<< # I<text> >>) |
2573 | ||
2574 | As a nonstandard extension to the JSON syntax that is enabled by the | |
2575 | C<relaxed> setting, shell-style comments are allowed. They can start | |
2576 | anywhere outside strings and go till the end of the line. | |
d5424315 DG |
2577 | |
2578 | =back | |
2579 | ||
2580 | ||
2581 | =head2 PERL -> JSON | |
2582 | ||
2583 | The mapping from Perl to JSON is slightly more difficult, as Perl is a | |
2584 | truly typeless language, so we can only guess which JSON type is meant by | |
2585 | a Perl value. | |
2586 | ||
2587 | =over 4 | |
2588 | ||
2589 | =item hash references | |
2590 | ||
dec273dc CBW |
2591 | Perl hash references become JSON objects. As there is no inherent |
2592 | ordering in hash keys (or JSON objects), they will usually be encoded | |
2593 | in a pseudo-random order. JSON::PP can optionally sort the hash keys | |
2594 | (determined by the I<canonical> flag and/or I<sort_by> property), so | |
2595 | the same data structure will serialise to the same JSON text (given | |
2596 | same settings and version of JSON::PP), but this incurs a runtime | |
2597 | overhead and is only rarely useful, e.g. when you want to compare some | |
2598 | JSON text against another for equality. | |
d5424315 DG |
2599 | |
2600 | =item array references | |
2601 | ||
2602 | Perl array references become JSON arrays. | |
2603 | ||
2604 | =item other references | |
2605 | ||
2606 | Other unblessed references are generally not allowed and will cause an | |
2607 | exception to be thrown, except for references to the integers C<0> and | |
2608 | C<1>, which get turned into C<false> and C<true> atoms in JSON. You can | |
dec273dc CBW |
2609 | also use C<JSON::PP::false> and C<JSON::PP::true> to improve |
2610 | readability. | |
d5424315 | 2611 | |
dec273dc | 2612 | to_json [\0, JSON::PP::true] # yields [false,true] |
d5424315 | 2613 | |
dec273dc | 2614 | =item JSON::PP::true, JSON::PP::false |
d5424315 DG |
2615 | |
2616 | These special values become JSON true and JSON false values, | |
2617 | respectively. You can also use C<\1> and C<\0> directly if you want. | |
2618 | ||
dec273dc | 2619 | =item JSON::PP::null |
d5424315 | 2620 | |
dec273dc | 2621 | This special value becomes JSON null. |
d5424315 | 2622 | |
dec273dc | 2623 | =item blessed objects |
d5424315 | 2624 | |
dec273dc CBW |
2625 | Blessed objects are not directly representable in JSON, but C<JSON::PP> |
2626 | allows various ways of handling objects. See L<OBJECT SERIALISATION>, | |
2627 | below, for details. | |
d5424315 DG |
2628 | |
2629 | =item simple scalars | |
2630 | ||
2631 | Simple Perl scalars (any scalar that is not a reference) are the most | |
dec273dc | 2632 | difficult objects to encode: JSON::PP will encode undefined scalars as |
d5424315 DG |
2633 | JSON C<null> values, scalars that have last been used in a string context |
2634 | before encoding as JSON strings, and anything else as number value: | |
2635 | ||
2636 | # dump as number | |
2637 | encode_json [2] # yields [2] | |
2638 | encode_json [-3.0e17] # yields [-3e+17] | |
2639 | my $value = 5; encode_json [$value] # yields [5] | |
2640 | ||
2641 | # used as string, so dump as string | |
2642 | print $value; | |
2643 | encode_json [$value] # yields ["5"] | |
2644 | ||
2645 | # undef becomes null | |
2646 | encode_json [undef] # yields [null] | |
2647 | ||
2648 | You can force the type to be a string by stringifying it: | |
2649 | ||
2650 | my $x = 3.1; # some variable containing a number | |
2651 | "$x"; # stringified | |
2652 | $x .= ""; # another, more awkward way to stringify | |
2653 | print $x; # perl does it for you, too, quite often | |
dec273dc | 2654 | # (but for older perls) |
d5424315 DG |
2655 | |
2656 | You can force the type to be a number by numifying it: | |
2657 | ||
2658 | my $x = "3"; # some variable containing a string | |
2659 | $x += 0; # numify it, ensuring it will be dumped as a number | |
a1e5c561 | 2660 | $x *= 1; # same thing, the choice is yours. |
d5424315 | 2661 | |
a1e5c561 | 2662 | You cannot currently force the type in other, less obscure, ways. |
d5424315 DG |
2663 | |
2664 | Note that numerical precision has the same meaning as under Perl (so | |
2665 | binary to decimal conversion follows the same rules as in Perl, which | |
2666 | can differ to other languages). Also, your perl interpreter might expose | |
2667 | extensions to the floating point numbers of your platform, such as | |
2668 | infinities or NaN's - these cannot be represented in JSON, and it is an | |
2669 | error to pass those in. | |
2670 | ||
dec273dc CBW |
2671 | JSON::PP (and JSON::XS) trusts what you pass to C<encode> method |
2672 | (or C<encode_json> function) is a clean, validated data structure with | |
2673 | values that can be represented as valid JSON values only, because it's | |
2674 | not from an external data source (as opposed to JSON texts you pass to | |
2675 | C<decode> or C<decode_json>, which JSON::PP considers tainted and | |
2676 | doesn't trust). As JSON::PP doesn't know exactly what you and consumers | |
2677 | of your JSON texts want the unexpected values to be (you may want to | |
2678 | convert them into null, or to stringify them with or without | |
2679 | normalisation (string representation of infinities/NaN may vary | |
2680 | depending on platforms), or to croak without conversion), you're advised | |
2681 | to do what you and your consumers need before you encode, and also not | |
2682 | to numify values that may start with values that look like a number | |
2683 | (including infinities/NaN), without validating. | |
d5424315 DG |
2684 | |
2685 | =back | |
2686 | ||
dec273dc | 2687 | =head2 OBJECT SERIALISATION |
d5424315 | 2688 | |
dec273dc | 2689 | As for Perl objects, JSON::PP only supports a pure JSON representation (without the ability to deserialise the object automatically again). |
d5424315 | 2690 | |
dec273dc | 2691 | =head3 SERIALISATION |
d5424315 | 2692 | |
dec273dc CBW |
2693 | What happens when C<JSON::PP> encounters a Perl object depends on the |
2694 | C<allow_blessed>, C<convert_blessed> and C<allow_bignum> settings, which are | |
2695 | used in this order: | |
d5424315 | 2696 | |
dec273dc | 2697 | =over 4 |
d5424315 | 2698 | |
dec273dc | 2699 | =item 1. C<convert_blessed> is enabled and the object has a C<TO_JSON> method. |
d5424315 | 2700 | |
dec273dc CBW |
2701 | In this case, the C<TO_JSON> method of the object is invoked in scalar |
2702 | context. It must return a single scalar that can be directly encoded into | |
2703 | JSON. This scalar replaces the object in the JSON text. | |
d5424315 | 2704 | |
dec273dc CBW |
2705 | For example, the following C<TO_JSON> method will convert all L<URI> |
2706 | objects to JSON strings when serialised. The fact that these values | |
2707 | originally were L<URI> objects is lost. | |
d5424315 | 2708 | |
dec273dc CBW |
2709 | sub URI::TO_JSON { |
2710 | my ($uri) = @_; | |
2711 | $uri->as_string | |
2712 | } | |
d5424315 | 2713 | |
dec273dc | 2714 | =item 2. C<allow_bignum> is enabled and the object is a C<Math::BigInt> or C<Math::BigFloat>. |
d5424315 | 2715 | |
dec273dc | 2716 | The object will be serialised as a JSON number value. |
d5424315 | 2717 | |
dec273dc | 2718 | =item 3. C<allow_blessed> is enabled. |
d5424315 | 2719 | |
dec273dc | 2720 | The object will be serialised as a JSON null value. |
d5424315 | 2721 | |
dec273dc | 2722 | =item 4. none of the above |
d5424315 | 2723 | |
dec273dc CBW |
2724 | If none of the settings are enabled or the respective methods are missing, |
2725 | C<JSON::PP> throws an exception. | |
d5424315 | 2726 | |
dec273dc | 2727 | =back |
d5424315 | 2728 | |
dec273dc | 2729 | =head1 ENCODING/CODESET FLAG NOTES |
d5424315 | 2730 | |
dec273dc | 2731 | This section is taken from JSON::XS. |
d5424315 | 2732 | |
dec273dc CBW |
2733 | The interested reader might have seen a number of flags that signify |
2734 | encodings or codesets - C<utf8>, C<latin1> and C<ascii>. There seems to be | |
2735 | some confusion on what these do, so here is a short comparison: | |
d5424315 | 2736 | |
dec273dc CBW |
2737 | C<utf8> controls whether the JSON text created by C<encode> (and expected |
2738 | by C<decode>) is UTF-8 encoded or not, while C<latin1> and C<ascii> only | |
2739 | control whether C<encode> escapes character values outside their respective | |
2740 | codeset range. Neither of these flags conflict with each other, although | |
2741 | some combinations make less sense than others. | |
d5424315 | 2742 | |
dec273dc CBW |
2743 | Care has been taken to make all flags symmetrical with respect to |
2744 | C<encode> and C<decode>, that is, texts encoded with any combination of | |
2745 | these flag values will be correctly decoded when the same flags are used | |
2746 | - in general, if you use different flag settings while encoding vs. when | |
2747 | decoding you likely have a bug somewhere. | |
d5424315 | 2748 | |
dec273dc CBW |
2749 | Below comes a verbose discussion of these flags. Note that a "codeset" is |
2750 | simply an abstract set of character-codepoint pairs, while an encoding | |
2751 | takes those codepoint numbers and I<encodes> them, in our case into | |
2752 | octets. Unicode is (among other things) a codeset, UTF-8 is an encoding, | |
2753 | and ISO-8859-1 (= latin 1) and ASCII are both codesets I<and> encodings at | |
2754 | the same time, which can be confusing. | |
d5424315 | 2755 | |
dec273dc | 2756 | =over 4 |
d5424315 | 2757 | |
dec273dc CBW |
2758 | =item C<utf8> flag disabled |
2759 | ||
2760 | When C<utf8> is disabled (the default), then C<encode>/C<decode> generate | |
2761 | and expect Unicode strings, that is, characters with high ordinal Unicode | |
2762 | values (> 255) will be encoded as such characters, and likewise such | |
2763 | characters are decoded as-is, no changes to them will be done, except | |
2764 | "(re-)interpreting" them as Unicode codepoints or Unicode characters, | |
2765 | respectively (to Perl, these are the same thing in strings unless you do | |
2766 | funny/weird/dumb stuff). | |
2767 | ||
2768 | This is useful when you want to do the encoding yourself (e.g. when you | |
2769 | want to have UTF-16 encoded JSON texts) or when some other layer does | |
2770 | the encoding for you (for example, when printing to a terminal using a | |
2771 | filehandle that transparently encodes to UTF-8 you certainly do NOT want | |
2772 | to UTF-8 encode your data first and have Perl encode it another time). | |
2773 | ||
2774 | =item C<utf8> flag enabled | |
2775 | ||
2776 | If the C<utf8>-flag is enabled, C<encode>/C<decode> will encode all | |
2777 | characters using the corresponding UTF-8 multi-byte sequence, and will | |
2778 | expect your input strings to be encoded as UTF-8, that is, no "character" | |
2779 | of the input string must have any value > 255, as UTF-8 does not allow | |
2780 | that. | |
2781 | ||
2782 | The C<utf8> flag therefore switches between two modes: disabled means you | |
2783 | will get a Unicode string in Perl, enabled means you get an UTF-8 encoded | |
2784 | octet/binary string in Perl. | |
2785 | ||
2786 | =item C<latin1> or C<ascii> flags enabled | |
2787 | ||
2788 | With C<latin1> (or C<ascii>) enabled, C<encode> will escape characters | |
2789 | with ordinal values > 255 (> 127 with C<ascii>) and encode the remaining | |
2790 | characters as specified by the C<utf8> flag. | |
2791 | ||
2792 | If C<utf8> is disabled, then the result is also correctly encoded in those | |
2793 | character sets (as both are proper subsets of Unicode, meaning that a | |
2794 | Unicode string with all character values < 256 is the same thing as a | |
2795 | ISO-8859-1 string, and a Unicode string with all character values < 128 is | |
2796 | the same thing as an ASCII string in Perl). | |
2797 | ||
2798 | If C<utf8> is enabled, you still get a correct UTF-8-encoded string, | |
2799 | regardless of these flags, just some more characters will be escaped using | |
2800 | C<\uXXXX> then before. | |
2801 | ||
2802 | Note that ISO-8859-1-I<encoded> strings are not compatible with UTF-8 | |
2803 | encoding, while ASCII-encoded strings are. That is because the ISO-8859-1 | |
2804 | encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 I<codeset> being | |
2805 | a subset of Unicode), while ASCII is. | |
2806 | ||
2807 | Surprisingly, C<decode> will ignore these flags and so treat all input | |
2808 | values as governed by the C<utf8> flag. If it is disabled, this allows you | |
2809 | to decode ISO-8859-1- and ASCII-encoded strings, as both strict subsets of | |
2810 | Unicode. If it is enabled, you can correctly decode UTF-8 encoded strings. | |
2811 | ||
2812 | So neither C<latin1> nor C<ascii> are incompatible with the C<utf8> flag - | |
2813 | they only govern when the JSON output engine escapes a character or not. | |
2814 | ||
2815 | The main use for C<latin1> is to relatively efficiently store binary data | |
2816 | as JSON, at the expense of breaking compatibility with most JSON decoders. | |
2817 | ||
2818 | The main use for C<ascii> is to force the output to not contain characters | |
2819 | with values > 127, which means you can interpret the resulting string | |
2820 | as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about any character set and | |
2821 | 8-bit-encoding, and still get the same data structure back. This is useful | |
2822 | when your channel for JSON transfer is not 8-bit clean or the encoding | |
2823 | might be mangled in between (e.g. in mail), and works because ASCII is a | |
2824 | proper subset of most 8-bit and multibyte encodings in use in the world. | |
d5424315 DG |
2825 | |
2826 | =back | |
2827 | ||
d5424315 DG |
2828 | =head1 SEE ALSO |
2829 | ||
dec273dc CBW |
2830 | The F<json_pp> command line utility for quick experiments. |
2831 | ||
2832 | L<JSON::XS>, L<Cpanel::JSON::XS>, and L<JSON::Tiny> for faster alternatives. | |
2833 | L<JSON> and L<JSON::MaybeXS> for easy migration. | |
d5424315 | 2834 | |
dec273dc | 2835 | L<JSON::PP::Compat5005> and L<JSON::PP::Compat5006> for older perl users. |
d5424315 DG |
2836 | |
2837 | RFC4627 (L<http://www.ietf.org/rfc/rfc4627.txt>) | |
2838 | ||
2839 | =head1 AUTHOR | |
2840 | ||
2841 | Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt> | |
2842 | ||
2843 | ||
2844 | =head1 COPYRIGHT AND LICENSE | |
2845 | ||
a1e5c561 | 2846 | Copyright 2007-2016 by Makamaka Hannyaharamitu |
d5424315 DG |
2847 | |
2848 | This library is free software; you can redistribute it and/or modify | |
2849 | it under the same terms as Perl itself. | |
2850 | ||
2851 | =cut |