This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
upgrade cpan/JSON-PP from 4.00 to 4.02
[perl5.git] / cpan / JSON-PP / lib / JSON / PP.pm
CommitLineData
d5424315
DG
1package JSON::PP;
2
3# JSON-2.0
4
5use 5.005;
6use strict;
dec273dc
CBW
7
8use Exporter ();
9BEGIN { @JSON::PP::ISA = ('Exporter') }
10
d5424315 11use overload ();
dec273dc 12use JSON::PP::Boolean;
d5424315
DG
13
14use Carp ();
d5424315
DG
15#use Devel::Peek;
16
929e53be 17$JSON::PP::VERSION = '4.02';
d5424315
DG
18
19@JSON::PP::EXPORT = qw(encode_json decode_json from_json to_json);
20
21# instead of hash-access, i tried index-access for speed.
22# but this method is not faster than what i expected. so it will be changed.
23
24use constant P_ASCII => 0;
25use constant P_LATIN1 => 1;
26use constant P_UTF8 => 2;
27use constant P_INDENT => 3;
28use constant P_CANONICAL => 4;
29use constant P_SPACE_BEFORE => 5;
30use constant P_SPACE_AFTER => 6;
31use constant P_ALLOW_NONREF => 7;
32use constant P_SHRINK => 8;
33use constant P_ALLOW_BLESSED => 9;
34use constant P_CONVERT_BLESSED => 10;
35use constant P_RELAXED => 11;
36
37use constant P_LOOSE => 12;
38use constant P_ALLOW_BIGNUM => 13;
39use constant P_ALLOW_BAREKEY => 14;
40use constant P_ALLOW_SINGLEQUOTE => 15;
41use constant P_ESCAPE_SLASH => 16;
42use constant P_AS_NONBLESSED => 17;
43
44use constant P_ALLOW_UNKNOWN => 18;
5eabe055 45use constant P_ALLOW_TAGS => 19;
d5424315
DG
46
47use constant OLD_PERL => $] < 5.008 ? 1 : 0;
5eabe055 48use constant USE_B => $ENV{PERL_JSON_PP_USE_B} || 0;
dec273dc
CBW
49
50BEGIN {
5eabe055
N
51 if (USE_B) {
52 require B;
53 }
dec273dc 54}
d5424315
DG
55
56BEGIN {
57 my @xs_compati_bit_properties = qw(
58 latin1 ascii utf8 indent canonical space_before space_after allow_nonref shrink
59 allow_blessed convert_blessed relaxed allow_unknown
5eabe055 60 allow_tags
d5424315
DG
61 );
62 my @pp_bit_properties = qw(
63 allow_singlequote allow_bignum loose
64 allow_barekey escape_slash as_nonblessed
65 );
66
a1e5c561 67 # Perl version check, Unicode handling is enabled?
d5424315 68 # Helper module sets @JSON::PP::_properties.
dec273dc 69 if ( OLD_PERL ) {
d5424315
DG
70 my $helper = $] >= 5.006 ? 'JSON::PP::Compat5006' : 'JSON::PP::Compat5005';
71 eval qq| require $helper |;
72 if ($@) { Carp::croak $@; }
73 }
74
75 for my $name (@xs_compati_bit_properties, @pp_bit_properties) {
dec273dc 76 my $property_id = 'P_' . uc($name);
d5424315
DG
77
78 eval qq/
79 sub $name {
80 my \$enable = defined \$_[1] ? \$_[1] : 1;
81
82 if (\$enable) {
dec273dc 83 \$_[0]->{PROPS}->[$property_id] = 1;
d5424315
DG
84 }
85 else {
dec273dc 86 \$_[0]->{PROPS}->[$property_id] = 0;
d5424315
DG
87 }
88
89 \$_[0];
90 }
91
92 sub get_$name {
dec273dc 93 \$_[0]->{PROPS}->[$property_id] ? 1 : '';
d5424315
DG
94 }
95 /;
96 }
97
98}
99
100
101
102# Functions
103
d5424315
DG
104my $JSON; # cache
105
106sub encode_json ($) { # encode
107 ($JSON ||= __PACKAGE__->new->utf8)->encode(@_);
108}
109
110
111sub decode_json { # decode
112 ($JSON ||= __PACKAGE__->new->utf8)->decode(@_);
113}
114
115# Obsoleted
116
117sub to_json($) {
118 Carp::croak ("JSON::PP::to_json has been renamed to encode_json.");
119}
120
121
122sub from_json($) {
123 Carp::croak ("JSON::PP::from_json has been renamed to decode_json.");
124}
125
126
127# Methods
128
129sub new {
130 my $class = shift;
131 my $self = {
132 max_depth => 512,
133 max_size => 0,
d5424315
DG
134 indent_length => 3,
135 };
136
5eabe055
N
137 $self->{PROPS}[P_ALLOW_NONREF] = 1;
138
d5424315
DG
139 bless $self, $class;
140}
141
142
143sub encode {
144 return $_[0]->PP_encode_json($_[1]);
145}
146
147
148sub decode {
149 return $_[0]->PP_decode_json($_[1], 0x00000000);
150}
151
152
153sub decode_prefix {
154 return $_[0]->PP_decode_json($_[1], 0x00000001);
155}
156
157
158# accessor
159
160
161# pretty printing
162
163sub pretty {
164 my ($self, $v) = @_;
165 my $enable = defined $v ? $v : 1;
166
167 if ($enable) { # indent_length(3) for JSON::XS compatibility
dec273dc 168 $self->indent(1)->space_before(1)->space_after(1);
d5424315
DG
169 }
170 else {
171 $self->indent(0)->space_before(0)->space_after(0);
172 }
173
174 $self;
175}
176
177# etc
178
179sub max_depth {
180 my $max = defined $_[1] ? $_[1] : 0x80000000;
181 $_[0]->{max_depth} = $max;
182 $_[0];
183}
184
185
186sub get_max_depth { $_[0]->{max_depth}; }
187
188
189sub max_size {
190 my $max = defined $_[1] ? $_[1] : 0;
191 $_[0]->{max_size} = $max;
192 $_[0];
193}
194
195
196sub get_max_size { $_[0]->{max_size}; }
197
5eabe055
N
198sub boolean_values {
199 my $self = shift;
200 if (@_) {
201 my ($false, $true) = @_;
202 $self->{false} = $false;
203 $self->{true} = $true;
204 return ($false, $true);
205 } else {
206 delete $self->{false};
207 delete $self->{true};
208 return;
209 }
210}
211
212sub get_boolean_values {
213 my $self = shift;
214 if (exists $self->{true} and exists $self->{false}) {
215 return @$self{qw/false true/};
216 }
217 return;
218}
d5424315
DG
219
220sub filter_json_object {
dec273dc
CBW
221 if (defined $_[1] and ref $_[1] eq 'CODE') {
222 $_[0]->{cb_object} = $_[1];
223 } else {
224 delete $_[0]->{cb_object};
225 }
d5424315
DG
226 $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0;
227 $_[0];
228}
229
230sub filter_json_single_key_object {
dec273dc
CBW
231 if (@_ == 1 or @_ > 3) {
232 Carp::croak("Usage: JSON::PP::filter_json_single_key_object(self, key, callback = undef)");
233 }
234 if (defined $_[2] and ref $_[2] eq 'CODE') {
d5424315 235 $_[0]->{cb_sk_object}->{$_[1]} = $_[2];
dec273dc
CBW
236 } else {
237 delete $_[0]->{cb_sk_object}->{$_[1]};
238 delete $_[0]->{cb_sk_object} unless %{$_[0]->{cb_sk_object} || {}};
d5424315
DG
239 }
240 $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0;
241 $_[0];
242}
243
244sub indent_length {
245 if (!defined $_[1] or $_[1] > 15 or $_[1] < 0) {
246 Carp::carp "The acceptable range of indent_length() is 0 to 15.";
247 }
248 else {
249 $_[0]->{indent_length} = $_[1];
250 }
251 $_[0];
252}
253
254sub get_indent_length {
255 $_[0]->{indent_length};
256}
257
258sub sort_by {
259 $_[0]->{sort_by} = defined $_[1] ? $_[1] : 1;
260 $_[0];
261}
262
263sub allow_bigint {
dec273dc
CBW
264 Carp::carp("allow_bigint() is obsoleted. use allow_bignum() instead.");
265 $_[0]->allow_bignum;
d5424315
DG
266}
267
268###############################
269
270###
271### Perl => JSON
272###
273
274
275{ # Convert
276
277 my $max_depth;
278 my $indent;
279 my $ascii;
280 my $latin1;
281 my $utf8;
282 my $space_before;
283 my $space_after;
284 my $canonical;
285 my $allow_blessed;
286 my $convert_blessed;
287
288 my $indent_length;
289 my $escape_slash;
290 my $bignum;
291 my $as_nonblessed;
5eabe055 292 my $allow_tags;
d5424315
DG
293
294 my $depth;
295 my $indent_count;
296 my $keysort;
297
298
299 sub PP_encode_json {
300 my $self = shift;
301 my $obj = shift;
302
303 $indent_count = 0;
304 $depth = 0;
305
dec273dc 306 my $props = $self->{PROPS};
d5424315
DG
307
308 ($ascii, $latin1, $utf8, $indent, $canonical, $space_before, $space_after, $allow_blessed,
5eabe055 309 $convert_blessed, $escape_slash, $bignum, $as_nonblessed, $allow_tags)
dec273dc 310 = @{$props}[P_ASCII .. P_SPACE_AFTER, P_ALLOW_BLESSED, P_CONVERT_BLESSED,
5eabe055 311 P_ESCAPE_SLASH, P_ALLOW_BIGNUM, P_AS_NONBLESSED, P_ALLOW_TAGS];
d5424315
DG
312
313 ($max_depth, $indent_length) = @{$self}{qw/max_depth indent_length/};
314
315 $keysort = $canonical ? sub { $a cmp $b } : undef;
316
317 if ($self->{sort_by}) {
318 $keysort = ref($self->{sort_by}) eq 'CODE' ? $self->{sort_by}
319 : $self->{sort_by} =~ /\D+/ ? $self->{sort_by}
320 : sub { $a cmp $b };
321 }
322
323 encode_error("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)")
dec273dc 324 if(!ref $obj and !$props->[ P_ALLOW_NONREF ]);
d5424315
DG
325
326 my $str = $self->object_to_json($obj);
327
328 $str .= "\n" if ( $indent ); # JSON::XS 2.26 compatible
329
330 unless ($ascii or $latin1 or $utf8) {
331 utf8::upgrade($str);
332 }
333
dec273dc 334 if ($props->[ P_SHRINK ]) {
d5424315
DG
335 utf8::downgrade($str, 1);
336 }
337
338 return $str;
339 }
340
341
342 sub object_to_json {
343 my ($self, $obj) = @_;
344 my $type = ref($obj);
345
346 if($type eq 'HASH'){
347 return $self->hash_to_json($obj);
348 }
349 elsif($type eq 'ARRAY'){
350 return $self->array_to_json($obj);
351 }
352 elsif ($type) { # blessed object?
353 if (blessed($obj)) {
354
355 return $self->value_to_json($obj) if ( $obj->isa('JSON::PP::Boolean') );
356
5eabe055
N
357 if ( $allow_tags and $obj->can('FREEZE') ) {
358 my $obj_class = ref $obj || $obj;
359 $obj = bless $obj, $obj_class;
360 my @results = $obj->FREEZE('JSON');
361 if ( @results and ref $results[0] ) {
362 if ( refaddr( $obj ) eq refaddr( $results[0] ) ) {
363 encode_error( sprintf(
364 "%s::FREEZE method returned same object as was passed instead of a new one",
365 ref $obj
366 ) );
367 }
368 }
369 return '("'.$obj_class.'")['.join(',', @results).']';
370 }
371
d5424315
DG
372 if ( $convert_blessed and $obj->can('TO_JSON') ) {
373 my $result = $obj->TO_JSON();
05bd2440
CBW
374 if ( defined $result and ref( $result ) ) {
375 if ( refaddr( $obj ) eq refaddr( $result ) ) {
d5424315
DG
376 encode_error( sprintf(
377 "%s::TO_JSON method returned same object as was passed instead of a new one",
378 ref $obj
379 ) );
380 }
381 }
382
383 return $self->object_to_json( $result );
384 }
385
386 return "$obj" if ( $bignum and _is_bignum($obj) );
d5424315 387
dec273dc
CBW
388 if ($allow_blessed) {
389 return $self->blessed_to_json($obj) if ($as_nonblessed); # will be removed.
390 return 'null';
391 }
5eabe055 392 encode_error( sprintf("encountered object '%s', but neither allow_blessed, convert_blessed nor allow_tags settings are enabled (or TO_JSON/FREEZE method missing)", $obj)
dec273dc 393 );
d5424315
DG
394 }
395 else {
396 return $self->value_to_json($obj);
397 }
398 }
399 else{
400 return $self->value_to_json($obj);
401 }
402 }
403
404
405 sub hash_to_json {
406 my ($self, $obj) = @_;
407 my @res;
408
409 encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)")
410 if (++$depth > $max_depth);
411
412 my ($pre, $post) = $indent ? $self->_up_indent() : ('', '');
413 my $del = ($space_before ? ' ' : '') . ':' . ($space_after ? ' ' : '');
414
415 for my $k ( _sort( $obj ) ) {
416 if ( OLD_PERL ) { utf8::decode($k) } # key for Perl 5.6 / be optimized
dec273dc 417 push @res, $self->string_to_json( $k )
d5424315 418 . $del
dec273dc 419 . ( ref $obj->{$k} ? $self->object_to_json( $obj->{$k} ) : $self->value_to_json( $obj->{$k} ) );
d5424315
DG
420 }
421
422 --$depth;
423 $self->_down_indent() if ($indent);
424
dec273dc
CBW
425 return '{}' unless @res;
426 return '{' . $pre . join( ",$pre", @res ) . $post . '}';
d5424315
DG
427 }
428
429
430 sub array_to_json {
431 my ($self, $obj) = @_;
432 my @res;
433
434 encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)")
435 if (++$depth > $max_depth);
436
437 my ($pre, $post) = $indent ? $self->_up_indent() : ('', '');
438
439 for my $v (@$obj){
dec273dc 440 push @res, ref($v) ? $self->object_to_json($v) : $self->value_to_json($v);
d5424315
DG
441 }
442
443 --$depth;
444 $self->_down_indent() if ($indent);
445
dec273dc
CBW
446 return '[]' unless @res;
447 return '[' . $pre . join( ",$pre", @res ) . $post . ']';
d5424315
DG
448 }
449
dec273dc
CBW
450 sub _looks_like_number {
451 my $value = shift;
452 if (USE_B) {
453 my $b_obj = B::svref_2object(\$value);
454 my $flags = $b_obj->FLAGS;
455 return 1 if $flags & ( B::SVp_IOK() | B::SVp_NOK() ) and !( $flags & B::SVp_POK() );
456 return;
457 } else {
458 no warnings 'numeric';
9bccc4c2
CBW
459 # if the utf8 flag is on, it almost certainly started as a string
460 return if utf8::is_utf8($value);
dec273dc
CBW
461 # detect numbers
462 # string & "" -> ""
463 # number & "" -> 0 (with warning)
464 # nan and inf can detect as numbers, so check with * 0
465 return unless length((my $dummy = "") & $value);
466 return unless 0 + $value eq $value;
467 return 1 if $value * 0 == 0;
468 return -1; # inf/nan
469 }
470 }
d5424315
DG
471
472 sub value_to_json {
473 my ($self, $value) = @_;
474
475 return 'null' if(!defined $value);
476
d5424315
DG
477 my $type = ref($value);
478
dec273dc
CBW
479 if (!$type) {
480 if (_looks_like_number($value)) {
481 return $value;
482 }
483 return $self->string_to_json($value);
d5424315
DG
484 }
485 elsif( blessed($value) and $value->isa('JSON::PP::Boolean') ){
486 return $$value == 1 ? 'true' : 'false';
487 }
dec273dc 488 else {
d5424315
DG
489 if ((overload::StrVal($value) =~ /=(\w+)/)[0]) {
490 return $self->value_to_json("$value");
491 }
492
493 if ($type eq 'SCALAR' and defined $$value) {
494 return $$value eq '1' ? 'true'
495 : $$value eq '0' ? 'false'
496 : $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ? 'null'
497 : encode_error("cannot encode reference to scalar");
498 }
499
dec273dc
CBW
500 if ( $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ) {
501 return 'null';
502 }
503 else {
504 if ( $type eq 'SCALAR' or $type eq 'REF' ) {
d5424315 505 encode_error("cannot encode reference to scalar");
dec273dc
CBW
506 }
507 else {
d5424315 508 encode_error("encountered $value, but JSON can only represent references to arrays or hashes");
dec273dc
CBW
509 }
510 }
d5424315
DG
511
512 }
d5424315
DG
513 }
514
515
516 my %esc = (
517 "\n" => '\n',
518 "\r" => '\r',
519 "\t" => '\t',
520 "\f" => '\f',
521 "\b" => '\b',
522 "\"" => '\"',
523 "\\" => '\\\\',
524 "\'" => '\\\'',
525 );
526
527
528 sub string_to_json {
529 my ($self, $arg) = @_;
530
531 $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
532 $arg =~ s/\//\\\//g if ($escape_slash);
533 $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
534
535 if ($ascii) {
536 $arg = JSON_PP_encode_ascii($arg);
537 }
538
539 if ($latin1) {
540 $arg = JSON_PP_encode_latin1($arg);
541 }
542
543 if ($utf8) {
544 utf8::encode($arg);
545 }
546
547 return '"' . $arg . '"';
548 }
549
550
551 sub blessed_to_json {
552 my $reftype = reftype($_[1]) || '';
553 if ($reftype eq 'HASH') {
554 return $_[0]->hash_to_json($_[1]);
555 }
556 elsif ($reftype eq 'ARRAY') {
557 return $_[0]->array_to_json($_[1]);
558 }
559 else {
560 return 'null';
561 }
562 }
563
564
565 sub encode_error {
566 my $error = shift;
567 Carp::croak "$error";
568 }
569
570
571 sub _sort {
572 defined $keysort ? (sort $keysort (keys %{$_[0]})) : keys %{$_[0]};
573 }
574
575
576 sub _up_indent {
577 my $self = shift;
578 my $space = ' ' x $indent_length;
579
580 my ($pre,$post) = ('','');
581
582 $post = "\n" . $space x $indent_count;
583
584 $indent_count++;
585
586 $pre = "\n" . $space x $indent_count;
587
588 return ($pre,$post);
589 }
590
591
592 sub _down_indent { $indent_count--; }
593
594
595 sub PP_encode_box {
596 {
597 depth => $depth,
598 indent_count => $indent_count,
599 };
600 }
601
602} # Convert
603
604
605sub _encode_ascii {
606 join('',
607 map {
608 $_ <= 127 ?
609 chr($_) :
610 $_ <= 65535 ?
611 sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
612 } unpack('U*', $_[0])
613 );
614}
615
616
617sub _encode_latin1 {
618 join('',
619 map {
620 $_ <= 255 ?
621 chr($_) :
622 $_ <= 65535 ?
623 sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
624 } unpack('U*', $_[0])
625 );
626}
627
628
629sub _encode_surrogates { # from perlunicode
630 my $uni = $_[0] - 0x10000;
631 return ($uni / 0x400 + 0xD800, $uni % 0x400 + 0xDC00);
632}
633
634
635sub _is_bignum {
636 $_[0]->isa('Math::BigInt') or $_[0]->isa('Math::BigFloat');
637}
638
639
640
641#
642# JSON => Perl
643#
644
645my $max_intsize;
646
647BEGIN {
648 my $checkint = 1111;
649 for my $d (5..64) {
650 $checkint .= 1;
651 my $int = eval qq| $checkint |;
652 if ($int =~ /[eE]/) {
653 $max_intsize = $d - 1;
654 last;
655 }
656 }
657}
658
659{ # PARSE
660
661 my %escapes = ( # by Jeremy Muhlich <jmuhlich [at] bitflood.org>
662 b => "\x8",
663 t => "\x9",
664 n => "\xA",
665 f => "\xC",
666 r => "\xD",
667 '\\' => '\\',
668 '"' => '"',
669 '/' => '/',
670 );
671
672 my $text; # json data
673 my $at; # offset
a1e5c561 674 my $ch; # first character
d5424315
DG
675 my $len; # text length (changed according to UTF8 or NON UTF8)
676 # INTERNAL
677 my $depth; # nest counter
678 my $encoding; # json text encoding
679 my $is_valid_utf8; # temp variable
680 my $utf8_len; # utf8 byte length
681 # FLAGS
682 my $utf8; # must be utf8
a1e5c561 683 my $max_depth; # max nest number of objects and arrays
d5424315
DG
684 my $max_size;
685 my $relaxed;
686 my $cb_object;
687 my $cb_sk_object;
688
689 my $F_HOOK;
690
dec273dc 691 my $allow_bignum; # using Math::BigInt/BigFloat
d5424315
DG
692 my $singlequote; # loosely quoting
693 my $loose; #
694 my $allow_barekey; # bareKey
5eabe055
N
695 my $allow_tags;
696
697 my $alt_true;
698 my $alt_false;
d5424315 699
dec273dc
CBW
700 sub _detect_utf_encoding {
701 my $text = shift;
702 my @octets = unpack('C4', $text);
703 return 'unknown' unless defined $octets[3];
704 return ( $octets[0] and $octets[1]) ? 'UTF-8'
705 : (!$octets[0] and $octets[1]) ? 'UTF-16BE'
706 : (!$octets[0] and !$octets[1]) ? 'UTF-32BE'
707 : ( $octets[2] ) ? 'UTF-16LE'
708 : (!$octets[2] ) ? 'UTF-32LE'
709 : 'unknown';
710 }
d5424315
DG
711
712 sub PP_decode_json {
dec273dc 713 my ($self, $want_offset);
d5424315 714
dec273dc 715 ($self, $text, $want_offset) = @_;
d5424315
DG
716
717 ($at, $ch, $depth) = (0, '', 0);
718
719 if ( !defined $text or ref $text ) {
720 decode_error("malformed JSON string, neither array, object, number, string or atom");
721 }
722
dec273dc 723 my $props = $self->{PROPS};
d5424315 724
5eabe055
N
725 ($utf8, $relaxed, $loose, $allow_bignum, $allow_barekey, $singlequote, $allow_tags)
726 = @{$props}[P_UTF8, P_RELAXED, P_LOOSE .. P_ALLOW_SINGLEQUOTE, P_ALLOW_TAGS];
727
728 ($alt_true, $alt_false) = @$self{qw/true false/};
d5424315
DG
729
730 if ( $utf8 ) {
dec273dc
CBW
731 $encoding = _detect_utf_encoding($text);
732 if ($encoding ne 'UTF-8' and $encoding ne 'unknown') {
733 require Encode;
734 Encode::from_to($text, $encoding, 'utf-8');
735 } else {
736 utf8::downgrade( $text, 1 ) or Carp::croak("Wide character in subroutine entry");
737 }
d5424315
DG
738 }
739 else {
740 utf8::upgrade( $text );
87f3ebe4 741 utf8::encode( $text );
d5424315
DG
742 }
743
744 $len = length $text;
745
746 ($max_depth, $max_size, $cb_object, $cb_sk_object, $F_HOOK)
747 = @{$self}{qw/max_depth max_size cb_object cb_sk_object F_HOOK/};
748
749 if ($max_size > 1) {
750 use bytes;
751 my $bytes = length $text;
752 decode_error(
753 sprintf("attempted decode of JSON text of %s bytes size, but max_size is set to %s"
754 , $bytes, $max_size), 1
755 ) if ($bytes > $max_size);
756 }
757
d5424315
DG
758 white(); # remove head white space
759
dec273dc 760 decode_error("malformed JSON string, neither array, object, number, string or atom") unless defined $ch; # Is there a first character for JSON structure?
d5424315
DG
761
762 my $result = value();
763
dec273dc 764 if ( !$props->[ P_ALLOW_NONREF ] and !ref $result ) {
d5424315
DG
765 decode_error(
766 'JSON text must be an object or array (but found number, string, true, false or null,'
767 . ' use allow_nonref to allow this)', 1);
768 }
769
770 Carp::croak('something wrong.') if $len < $at; # we won't arrive here.
771
772 my $consumed = defined $ch ? $at - 1 : $at; # consumed JSON text length
773
774 white(); # remove tail white space
775
dec273dc
CBW
776 return ( $result, $consumed ) if $want_offset; # all right if decode_prefix
777
778 decode_error("garbage after JSON object") if defined $ch;
d5424315 779
dec273dc 780 $result;
d5424315
DG
781 }
782
783
784 sub next_chr {
785 return $ch = undef if($at >= $len);
786 $ch = substr($text, $at++, 1);
787 }
788
789
790 sub value {
791 white();
792 return if(!defined $ch);
793 return object() if($ch eq '{');
794 return array() if($ch eq '[');
5eabe055 795 return tag() if($ch eq '(');
d5424315
DG
796 return string() if($ch eq '"' or ($singlequote and $ch eq "'"));
797 return number() if($ch =~ /[0-9]/ or $ch eq '-');
798 return word();
799 }
800
801 sub string {
d5424315
DG
802 my $utf16;
803 my $is_utf8;
804
805 ($is_valid_utf8, $utf8_len) = ('', 0);
806
dec273dc 807 my $s = ''; # basically UTF8 flag on
d5424315
DG
808
809 if($ch eq '"' or ($singlequote and $ch eq "'")){
810 my $boundChar = $ch;
811
812 OUTER: while( defined(next_chr()) ){
813
814 if($ch eq $boundChar){
815 next_chr();
816
817 if ($utf16) {
818 decode_error("missing low surrogate character in surrogate pair");
819 }
820
821 utf8::decode($s) if($is_utf8);
822
823 return $s;
824 }
825 elsif($ch eq '\\'){
826 next_chr();
827 if(exists $escapes{$ch}){
828 $s .= $escapes{$ch};
829 }
830 elsif($ch eq 'u'){ # UNICODE handling
831 my $u = '';
832
833 for(1..4){
834 $ch = next_chr();
835 last OUTER if($ch !~ /[0-9a-fA-F]/);
836 $u .= $ch;
837 }
838
839 # U+D800 - U+DBFF
840 if ($u =~ /^[dD][89abAB][0-9a-fA-F]{2}/) { # UTF-16 high surrogate?
841 $utf16 = $u;
842 }
843 # U+DC00 - U+DFFF
844 elsif ($u =~ /^[dD][c-fC-F][0-9a-fA-F]{2}/) { # UTF-16 low surrogate?
845 unless (defined $utf16) {
846 decode_error("missing high surrogate character in surrogate pair");
847 }
848 $is_utf8 = 1;
849 $s .= JSON_PP_decode_surrogates($utf16, $u) || next;
850 $utf16 = undef;
851 }
852 else {
853 if (defined $utf16) {
854 decode_error("surrogate pair expected");
855 }
856
857 if ( ( my $hex = hex( $u ) ) > 127 ) {
858 $is_utf8 = 1;
859 $s .= JSON_PP_decode_unicode($u) || next;
860 }
861 else {
862 $s .= chr $hex;
863 }
864 }
865
866 }
867 else{
868 unless ($loose) {
869 $at -= 2;
870 decode_error('illegal backslash escape sequence in string');
871 }
872 $s .= $ch;
873 }
874 }
875 else{
876
877 if ( ord $ch > 127 ) {
87f3ebe4
CBW
878 unless( $ch = is_valid_utf8($ch) ) {
879 $at -= 1;
880 decode_error("malformed UTF-8 character in JSON string");
d5424315
DG
881 }
882 else {
87f3ebe4 883 $at += $utf8_len - 1;
d5424315
DG
884 }
885
886 $is_utf8 = 1;
887 }
888
889 if (!$loose) {
890 if ($ch =~ /[\x00-\x1f\x22\x5c]/) { # '/' ok
5eabe055
N
891 if (!$relaxed or $ch ne "\t") {
892 $at--;
893 decode_error('invalid character encountered while parsing JSON string');
894 }
d5424315
DG
895 }
896 }
897
898 $s .= $ch;
899 }
900 }
901 }
902
903 decode_error("unexpected end of string while parsing JSON string");
904 }
905
906
907 sub white {
908 while( defined $ch ){
dec273dc 909 if($ch eq '' or $ch =~ /\A[ \t\r\n]\z/){
d5424315
DG
910 next_chr();
911 }
dec273dc 912 elsif($relaxed and $ch eq '/'){
d5424315
DG
913 next_chr();
914 if(defined $ch and $ch eq '/'){
915 1 while(defined(next_chr()) and $ch ne "\n" and $ch ne "\r");
916 }
917 elsif(defined $ch and $ch eq '*'){
918 next_chr();
919 while(1){
920 if(defined $ch){
921 if($ch eq '*'){
922 if(defined(next_chr()) and $ch eq '/'){
923 next_chr();
924 last;
925 }
926 }
927 else{
928 next_chr();
929 }
930 }
931 else{
932 decode_error("Unterminated comment");
933 }
934 }
935 next;
936 }
937 else{
938 $at--;
939 decode_error("malformed JSON string, neither array, object, number, string or atom");
940 }
941 }
942 else{
943 if ($relaxed and $ch eq '#') { # correctly?
944 pos($text) = $at;
945 $text =~ /\G([^\n]*(?:\r\n|\r|\n|$))/g;
946 $at = pos($text);
947 next_chr;
948 next;
949 }
950
951 last;
952 }
953 }
954 }
955
956
957 sub array {
05bd2440 958 my $a = $_[0] || []; # you can use this code to use another array ref object.
d5424315
DG
959
960 decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)')
961 if (++$depth > $max_depth);
962
963 next_chr();
964 white();
965
966 if(defined $ch and $ch eq ']'){
967 --$depth;
968 next_chr();
969 return $a;
970 }
971 else {
972 while(defined($ch)){
973 push @$a, value();
974
975 white();
976
977 if (!defined $ch) {
978 last;
979 }
980
981 if($ch eq ']'){
982 --$depth;
983 next_chr();
984 return $a;
985 }
986
987 if($ch ne ','){
988 last;
989 }
990
991 next_chr();
992 white();
993
994 if ($relaxed and $ch eq ']') {
995 --$depth;
996 next_chr();
997 return $a;
998 }
999
1000 }
1001 }
1002
dec273dc 1003 $at-- if defined $ch and $ch ne '';
d5424315
DG
1004 decode_error(", or ] expected while parsing array");
1005 }
1006
5eabe055
N
1007 sub tag {
1008 decode_error('malformed JSON string, neither array, object, number, string or atom') unless $allow_tags;
1009
1010 next_chr();
1011 white();
1012
1013 my $tag = value();
1014 return unless defined $tag;
1015 decode_error('malformed JSON string, (tag) must be a string') if ref $tag;
1016
1017 white();
1018
1019 if (!defined $ch or $ch ne ')') {
1020 decode_error(') expected after tag');
1021 }
1022
1023 next_chr();
1024 white();
1025
1026 my $val = value();
1027 return unless defined $val;
1028 decode_error('malformed JSON string, tag value must be an array') unless ref $val eq 'ARRAY';
1029
1030 if (!eval { $tag->can('THAW') }) {
1031 decode_error('cannot decode perl-object (package does not exist)') if $@;
1032 decode_error('cannot decode perl-object (package does not have a THAW method)');
1033 }
1034 $tag->THAW('JSON', @$val);
1035 }
d5424315
DG
1036
1037 sub object {
05bd2440 1038 my $o = $_[0] || {}; # you can use this code to use another hash ref object.
d5424315
DG
1039 my $k;
1040
1041 decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)')
1042 if (++$depth > $max_depth);
1043 next_chr();
1044 white();
1045
1046 if(defined $ch and $ch eq '}'){
1047 --$depth;
1048 next_chr();
1049 if ($F_HOOK) {
1050 return _json_object_hook($o);
1051 }
1052 return $o;
1053 }
1054 else {
1055 while (defined $ch) {
1056 $k = ($allow_barekey and $ch ne '"' and $ch ne "'") ? bareKey() : string();
1057 white();
1058
1059 if(!defined $ch or $ch ne ':'){
1060 $at--;
1061 decode_error("':' expected");
1062 }
1063
1064 next_chr();
1065 $o->{$k} = value();
1066 white();
1067
1068 last if (!defined $ch);
1069
1070 if($ch eq '}'){
1071 --$depth;
1072 next_chr();
1073 if ($F_HOOK) {
1074 return _json_object_hook($o);
1075 }
1076 return $o;
1077 }
1078
1079 if($ch ne ','){
1080 last;
1081 }
1082
1083 next_chr();
1084 white();
1085
1086 if ($relaxed and $ch eq '}') {
1087 --$depth;
1088 next_chr();
1089 if ($F_HOOK) {
1090 return _json_object_hook($o);
1091 }
1092 return $o;
1093 }
1094
1095 }
1096
1097 }
1098
dec273dc 1099 $at-- if defined $ch and $ch ne '';
d5424315
DG
1100 decode_error(", or } expected while parsing object/hash");
1101 }
1102
1103
1104 sub bareKey { # doesn't strictly follow Standard ECMA-262 3rd Edition
1105 my $key;
1106 while($ch =~ /[^\x00-\x23\x25-\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]/){
1107 $key .= $ch;
1108 next_chr();
1109 }
1110 return $key;
1111 }
1112
1113
1114 sub word {
1115 my $word = substr($text,$at-1,4);
1116
1117 if($word eq 'true'){
1118 $at += 3;
1119 next_chr;
5eabe055 1120 return defined $alt_true ? $alt_true : $JSON::PP::true;
d5424315
DG
1121 }
1122 elsif($word eq 'null'){
1123 $at += 3;
1124 next_chr;
1125 return undef;
1126 }
1127 elsif($word eq 'fals'){
1128 $at += 3;
1129 if(substr($text,$at,1) eq 'e'){
1130 $at++;
1131 next_chr;
5eabe055 1132 return defined $alt_false ? $alt_false : $JSON::PP::false;
d5424315
DG
1133 }
1134 }
1135
1136 $at--; # for decode_error report
1137
1138 decode_error("'null' expected") if ($word =~ /^n/);
1139 decode_error("'true' expected") if ($word =~ /^t/);
1140 decode_error("'false' expected") if ($word =~ /^f/);
1141 decode_error("malformed JSON string, neither array, object, number, string or atom");
1142 }
1143
1144
1145 sub number {
1146 my $n = '';
1147 my $v;
a1e5c561 1148 my $is_dec;
dec273dc 1149 my $is_exp;
d5424315
DG
1150
1151 if($ch eq '-'){
1152 $n = '-';
1153 next_chr;
1154 if (!defined $ch or $ch !~ /\d/) {
1155 decode_error("malformed number (no digits after initial minus)");
1156 }
1157 }
1158
dec273dc
CBW
1159 # According to RFC4627, hex or oct digits are invalid.
1160 if($ch eq '0'){
1161 my $peek = substr($text,$at,1);
1162 if($peek =~ /^[0-9a-dfA-DF]/){ # e may be valid (exponential)
1163 decode_error("malformed number (leading zero must not be followed by another digit)");
1164 }
1165 $n .= $ch;
1166 next_chr;
1167 }
1168
d5424315
DG
1169 while(defined $ch and $ch =~ /\d/){
1170 $n .= $ch;
1171 next_chr;
1172 }
1173
1174 if(defined $ch and $ch eq '.'){
1175 $n .= '.';
a1e5c561 1176 $is_dec = 1;
d5424315
DG
1177
1178 next_chr;
1179 if (!defined $ch or $ch !~ /\d/) {
1180 decode_error("malformed number (no digits after decimal point)");
1181 }
1182 else {
1183 $n .= $ch;
1184 }
1185
1186 while(defined(next_chr) and $ch =~ /\d/){
1187 $n .= $ch;
1188 }
1189 }
1190
1191 if(defined $ch and ($ch eq 'e' or $ch eq 'E')){
1192 $n .= $ch;
dec273dc 1193 $is_exp = 1;
d5424315
DG
1194 next_chr;
1195
1196 if(defined($ch) and ($ch eq '+' or $ch eq '-')){
1197 $n .= $ch;
1198 next_chr;
1199 if (!defined $ch or $ch =~ /\D/) {
1200 decode_error("malformed number (no digits after exp sign)");
1201 }
1202 $n .= $ch;
1203 }
1204 elsif(defined($ch) and $ch =~ /\d/){
1205 $n .= $ch;
1206 }
1207 else {
1208 decode_error("malformed number (no digits after exp sign)");
1209 }
1210
1211 while(defined(next_chr) and $ch =~ /\d/){
1212 $n .= $ch;
1213 }
1214
1215 }
1216
1217 $v .= $n;
1218
dec273dc
CBW
1219 if ($is_dec or $is_exp) {
1220 if ($allow_bignum) {
1221 require Math::BigFloat;
1222 return Math::BigFloat->new($v);
d5424315 1223 }
dec273dc
CBW
1224 } else {
1225 if (length $v > $max_intsize) {
1226 if ($allow_bignum) { # from Adam Sussman
1227 require Math::BigInt;
1228 return Math::BigInt->new($v);
1229 }
1230 else {
1231 return "$v";
1232 }
d5424315
DG
1233 }
1234 }
d5424315 1235
a1e5c561 1236 return $is_dec ? $v/1.0 : 0+$v;
d5424315
DG
1237 }
1238
1239
1240 sub is_valid_utf8 {
1241
1242 $utf8_len = $_[0] =~ /[\x00-\x7F]/ ? 1
1243 : $_[0] =~ /[\xC2-\xDF]/ ? 2
1244 : $_[0] =~ /[\xE0-\xEF]/ ? 3
1245 : $_[0] =~ /[\xF0-\xF4]/ ? 4
1246 : 0
1247 ;
1248
1249 return unless $utf8_len;
1250
1251 my $is_valid_utf8 = substr($text, $at - 1, $utf8_len);
1252
1253 return ( $is_valid_utf8 =~ /^(?:
1254 [\x00-\x7F]
1255 |[\xC2-\xDF][\x80-\xBF]
1256 |[\xE0][\xA0-\xBF][\x80-\xBF]
1257 |[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
1258 |[\xED][\x80-\x9F][\x80-\xBF]
1259 |[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
1260 |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
1261 |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
1262 |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
1263 )$/x ) ? $is_valid_utf8 : '';
1264 }
1265
1266
1267 sub decode_error {
1268 my $error = shift;
1269 my $no_rep = shift;
1270 my $str = defined $text ? substr($text, $at) : '';
1271 my $mess = '';
dec273dc
CBW
1272 my $type = 'U*';
1273
1274 if ( OLD_PERL ) {
1275 my $type = $] < 5.006 ? 'C*'
1276 : utf8::is_utf8( $str ) ? 'U*' # 5.6
1277 : 'C*'
1278 ;
1279 }
d5424315
DG
1280
1281 for my $c ( unpack( $type, $str ) ) { # emulate pv_uni_display() ?
1282 $mess .= $c == 0x07 ? '\a'
1283 : $c == 0x09 ? '\t'
1284 : $c == 0x0a ? '\n'
1285 : $c == 0x0d ? '\r'
1286 : $c == 0x0c ? '\f'
1287 : $c < 0x20 ? sprintf('\x{%x}', $c)
1288 : $c == 0x5c ? '\\\\'
1289 : $c < 0x80 ? chr($c)
1290 : sprintf('\x{%x}', $c)
1291 ;
1292 if ( length $mess >= 20 ) {
1293 $mess .= '...';
1294 last;
1295 }
1296 }
1297
1298 unless ( length $mess ) {
1299 $mess = '(end of string)';
1300 }
1301
1302 Carp::croak (
1303 $no_rep ? "$error" : "$error, at character offset $at (before \"$mess\")"
1304 );
1305
1306 }
1307
1308
1309 sub _json_object_hook {
1310 my $o = $_[0];
1311 my @ks = keys %{$o};
1312
1313 if ( $cb_sk_object and @ks == 1 and exists $cb_sk_object->{ $ks[0] } and ref $cb_sk_object->{ $ks[0] } ) {
1314 my @val = $cb_sk_object->{ $ks[0] }->( $o->{$ks[0]} );
5eabe055
N
1315 if (@val == 0) {
1316 return $o;
1317 }
1318 elsif (@val == 1) {
d5424315
DG
1319 return $val[0];
1320 }
5eabe055
N
1321 else {
1322 Carp::croak("filter_json_single_key_object callbacks must not return more than one scalar");
1323 }
d5424315
DG
1324 }
1325
1326 my @val = $cb_object->($o) if ($cb_object);
5eabe055 1327 if (@val == 0) {
d5424315
DG
1328 return $o;
1329 }
5eabe055 1330 elsif (@val == 1) {
d5424315
DG
1331 return $val[0];
1332 }
5eabe055
N
1333 else {
1334 Carp::croak("filter_json_object callbacks must not return more than one scalar");
1335 }
d5424315
DG
1336 }
1337
1338
1339 sub PP_decode_box {
1340 {
1341 text => $text,
1342 at => $at,
1343 ch => $ch,
1344 len => $len,
1345 depth => $depth,
1346 encoding => $encoding,
1347 is_valid_utf8 => $is_valid_utf8,
1348 };
1349 }
1350
1351} # PARSE
1352
1353
1354sub _decode_surrogates { # from perlunicode
1355 my $uni = 0x10000 + (hex($_[0]) - 0xD800) * 0x400 + (hex($_[1]) - 0xDC00);
1356 my $un = pack('U*', $uni);
1357 utf8::encode( $un );
1358 return $un;
1359}
1360
1361
1362sub _decode_unicode {
1363 my $un = pack('U', hex shift);
1364 utf8::encode( $un );
1365 return $un;
1366}
1367
1368#
1369# Setup for various Perl versions (the code from JSON::PP58)
1370#
1371
1372BEGIN {
1373
1374 unless ( defined &utf8::is_utf8 ) {
1375 require Encode;
1376 *utf8::is_utf8 = *Encode::is_utf8;
1377 }
1378
dec273dc 1379 if ( !OLD_PERL ) {
d5424315
DG
1380 *JSON::PP::JSON_PP_encode_ascii = \&_encode_ascii;
1381 *JSON::PP::JSON_PP_encode_latin1 = \&_encode_latin1;
1382 *JSON::PP::JSON_PP_decode_surrogates = \&_decode_surrogates;
1383 *JSON::PP::JSON_PP_decode_unicode = \&_decode_unicode;
d5424315 1384
dec273dc
CBW
1385 if ($] < 5.008003) { # join() in 5.8.0 - 5.8.2 is broken.
1386 package JSON::PP;
1387 require subs;
1388 subs->import('join');
1389 eval q|
1390 sub join {
1391 return '' if (@_ < 2);
1392 my $j = shift;
1393 my $str = shift;
1394 for (@_) { $str .= $j . $_; }
1395 return $str;
1396 }
1397 |;
1398 }
d5424315
DG
1399 }
1400
1401
1402 sub JSON::PP::incr_parse {
1403 local $Carp::CarpLevel = 1;
1404 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_parse( @_ );
1405 }
1406
1407
1408 sub JSON::PP::incr_skip {
1409 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_skip;
1410 }
1411
1412
1413 sub JSON::PP::incr_reset {
1414 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_reset;
1415 }
1416
1417 eval q{
1418 sub JSON::PP::incr_text : lvalue {
1419 $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new;
1420
dec273dc 1421 if ( $_[0]->{_incr_parser}->{incr_pos} ) {
a1e5c561 1422 Carp::croak("incr_text cannot be called when the incremental parser already started parsing");
d5424315
DG
1423 }
1424 $_[0]->{_incr_parser}->{incr_text};
1425 }
1426 } if ( $] >= 5.006 );
1427
1428} # Setup for various Perl versions (the code from JSON::PP58)
1429
1430
1431###############################
1432# Utilities
1433#
1434
1435BEGIN {
1436 eval 'require Scalar::Util';
1437 unless($@){
1438 *JSON::PP::blessed = \&Scalar::Util::blessed;
1439 *JSON::PP::reftype = \&Scalar::Util::reftype;
05bd2440 1440 *JSON::PP::refaddr = \&Scalar::Util::refaddr;
d5424315 1441 }
dec273dc 1442 else{ # This code is from Scalar::Util.
d5424315
DG
1443 # warn $@;
1444 eval 'sub UNIVERSAL::a_sub_not_likely_to_be_here { ref($_[0]) }';
1445 *JSON::PP::blessed = sub {
1446 local($@, $SIG{__DIE__}, $SIG{__WARN__});
1447 ref($_[0]) ? eval { $_[0]->a_sub_not_likely_to_be_here } : undef;
1448 };
dec273dc 1449 require B;
d5424315
DG
1450 my %tmap = qw(
1451 B::NULL SCALAR
1452 B::HV HASH
1453 B::AV ARRAY
1454 B::CV CODE
1455 B::IO IO
1456 B::GV GLOB
1457 B::REGEXP REGEXP
1458 );
1459 *JSON::PP::reftype = sub {
1460 my $r = shift;
1461
1462 return undef unless length(ref($r));
1463
1464 my $t = ref(B::svref_2object($r));
1465
1466 return
1467 exists $tmap{$t} ? $tmap{$t}
1468 : length(ref($$r)) ? 'REF'
1469 : 'SCALAR';
1470 };
05bd2440
CBW
1471 *JSON::PP::refaddr = sub {
1472 return undef unless length(ref($_[0]));
1473
1474 my $addr;
1475 if(defined(my $pkg = blessed($_[0]))) {
1476 $addr .= bless $_[0], 'Scalar::Util::Fake';
1477 bless $_[0], $pkg;
1478 }
1479 else {
1480 $addr .= $_[0]
1481 }
1482
1483 $addr =~ /0x(\w+)/;
1484 local $^W;
1485 #no warnings 'portable';
1486 hex($1);
1487 }
d5424315
DG
1488 }
1489}
1490
1491
a1e5c561 1492# shamelessly copied and modified from JSON::XS code.
d5424315
DG
1493
1494$JSON::PP::true = do { bless \(my $dummy = 1), "JSON::PP::Boolean" };
1495$JSON::PP::false = do { bless \(my $dummy = 0), "JSON::PP::Boolean" };
1496
5eabe055 1497sub is_bool { blessed $_[0] and ( $_[0]->isa("JSON::PP::Boolean") or $_[0]->isa("Types::Serialiser::BooleanBase") or $_[0]->isa("JSON::XS::Boolean") ); }
d5424315
DG
1498
1499sub true { $JSON::PP::true }
1500sub false { $JSON::PP::false }
1501sub null { undef; }
1502
1503###############################
1504
d5424315
DG
1505package JSON::PP::IncrParser;
1506
1507use strict;
1508
1509use constant INCR_M_WS => 0; # initial whitespace skipping
1510use constant INCR_M_STR => 1; # inside string
1511use constant INCR_M_BS => 2; # inside backslash
1512use constant INCR_M_JSON => 3; # outside anything, count nesting
1513use constant INCR_M_C0 => 4;
1514use constant INCR_M_C1 => 5;
5eabe055
N
1515use constant INCR_M_TFN => 6;
1516use constant INCR_M_NUM => 7;
d5424315
DG
1517
1518$JSON::PP::IncrParser::VERSION = '1.01';
1519
d5424315
DG
1520sub new {
1521 my ( $class ) = @_;
1522
1523 bless {
1524 incr_nest => 0,
1525 incr_text => undef,
dec273dc
CBW
1526 incr_pos => 0,
1527 incr_mode => 0,
d5424315
DG
1528 }, $class;
1529}
1530
1531
1532sub incr_parse {
1533 my ( $self, $coder, $text ) = @_;
1534
1535 $self->{incr_text} = '' unless ( defined $self->{incr_text} );
1536
1537 if ( defined $text ) {
1538 if ( utf8::is_utf8( $text ) and !utf8::is_utf8( $self->{incr_text} ) ) {
1539 utf8::upgrade( $self->{incr_text} ) ;
1540 utf8::decode( $self->{incr_text} ) ;
1541 }
1542 $self->{incr_text} .= $text;
1543 }
1544
d5424315 1545 if ( defined wantarray ) {
dec273dc
CBW
1546 my $max_size = $coder->get_max_size;
1547 my $p = $self->{incr_pos};
1548 my @ret;
1549 {
d5424315 1550 do {
dec273dc
CBW
1551 unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) {
1552 $self->_incr_parse( $coder );
d5424315 1553
dec273dc
CBW
1554 if ( $max_size and $self->{incr_pos} > $max_size ) {
1555 Carp::croak("attempted decode of JSON text of $self->{incr_pos} bytes size, but max_size is set to $max_size");
1556 }
1557 unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) {
1558 # as an optimisation, do not accumulate white space in the incr buffer
1559 if ( $self->{incr_mode} == INCR_M_WS and $self->{incr_pos} ) {
1560 $self->{incr_pos} = 0;
1561 $self->{incr_text} = '';
1562 }
1563 last;
1564 }
d5424315
DG
1565 }
1566
dec273dc
CBW
1567 my ($obj, $offset) = $coder->PP_decode_json( $self->{incr_text}, 0x00000001 );
1568 push @ret, $obj;
1569 use bytes;
1570 $self->{incr_text} = substr( $self->{incr_text}, $offset || 0 );
1571 $self->{incr_pos} = 0;
1572 $self->{incr_nest} = 0;
1573 $self->{incr_mode} = 0;
1574 last unless wantarray;
1575 } while ( wantarray );
1576 }
d5424315 1577
dec273dc 1578 if ( wantarray ) {
d5424315
DG
1579 return @ret;
1580 }
1581 else { # in scalar context
5eabe055 1582 return defined $ret[0] ? $ret[0] : undef;
d5424315 1583 }
d5424315 1584 }
d5424315
DG
1585}
1586
1587
1588sub _incr_parse {
dec273dc
CBW
1589 my ($self, $coder) = @_;
1590 my $text = $self->{incr_text};
d5424315 1591 my $len = length $text;
dec273dc 1592 my $p = $self->{incr_pos};
d5424315 1593
dec273dc 1594INCR_PARSE:
d5424315 1595 while ( $len > $p ) {
dec273dc
CBW
1596 my $s = substr( $text, $p, 1 );
1597 last INCR_PARSE unless defined $s;
1598 my $mode = $self->{incr_mode};
1599
1600 if ( $mode == INCR_M_WS ) {
1601 while ( $len > $p ) {
1602 $s = substr( $text, $p, 1 );
1603 last INCR_PARSE unless defined $s;
1604 if ( ord($s) > 0x20 ) {
1605 if ( $s eq '#' ) {
1606 $self->{incr_mode} = INCR_M_C0;
1607 redo INCR_PARSE;
1608 } else {
1609 $self->{incr_mode} = INCR_M_JSON;
1610 redo INCR_PARSE;
1611 }
1612 }
1613 $p++;
d5424315 1614 }
dec273dc
CBW
1615 } elsif ( $mode == INCR_M_BS ) {
1616 $p++;
1617 $self->{incr_mode} = INCR_M_STR;
1618 redo INCR_PARSE;
1619 } elsif ( $mode == INCR_M_C0 or $mode == INCR_M_C1 ) {
1620 while ( $len > $p ) {
1621 $s = substr( $text, $p, 1 );
1622 last INCR_PARSE unless defined $s;
1623 if ( $s eq "\n" ) {
1624 $self->{incr_mode} = $self->{incr_mode} == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON;
d5424315
DG
1625 last;
1626 }
dec273dc 1627 $p++;
d5424315 1628 }
dec273dc 1629 next;
5eabe055
N
1630 } elsif ( $mode == INCR_M_TFN ) {
1631 while ( $len > $p ) {
1632 $s = substr( $text, $p++, 1 );
1633 next if defined $s and $s =~ /[rueals]/;
1634 last;
1635 }
1636 $p--;
1637 $self->{incr_mode} = INCR_M_JSON;
1638
1639 last INCR_PARSE unless $self->{incr_nest};
1640 redo INCR_PARSE;
1641 } elsif ( $mode == INCR_M_NUM ) {
1642 while ( $len > $p ) {
1643 $s = substr( $text, $p++, 1 );
1644 next if defined $s and $s =~ /[0-9eE.+\-]/;
1645 last;
1646 }
1647 $p--;
1648 $self->{incr_mode} = INCR_M_JSON;
1649
1650 last INCR_PARSE unless $self->{incr_nest};
1651 redo INCR_PARSE;
dec273dc
CBW
1652 } elsif ( $mode == INCR_M_STR ) {
1653 while ( $len > $p ) {
1654 $s = substr( $text, $p, 1 );
1655 last INCR_PARSE unless defined $s;
1656 if ( $s eq '"' ) {
1657 $p++;
1658 $self->{incr_mode} = INCR_M_JSON;
1659
1660 last INCR_PARSE unless $self->{incr_nest};
1661 redo INCR_PARSE;
d5424315 1662 }
dec273dc
CBW
1663 elsif ( $s eq '\\' ) {
1664 $p++;
1665 if ( !defined substr($text, $p, 1) ) {
1666 $self->{incr_mode} = INCR_M_BS;
1667 last INCR_PARSE;
1668 }
1669 }
1670 $p++;
d5424315 1671 }
dec273dc
CBW
1672 } elsif ( $mode == INCR_M_JSON ) {
1673 while ( $len > $p ) {
1674 $s = substr( $text, $p++, 1 );
1675 if ( $s eq "\x00" ) {
1676 $p--;
1677 last INCR_PARSE;
1678 } elsif ( $s eq "\x09" or $s eq "\x0a" or $s eq "\x0d" or $s eq "\x20" ) {
1679 if ( !$self->{incr_nest} ) {
1680 $p--; # do not eat the whitespace, let the next round do it
1681 last INCR_PARSE;
1682 }
1683 next;
5eabe055
N
1684 } elsif ( $s eq 't' or $s eq 'f' or $s eq 'n' ) {
1685 $self->{incr_mode} = INCR_M_TFN;
1686 redo INCR_PARSE;
1687 } elsif ( $s =~ /^[0-9\-]$/ ) {
1688 $self->{incr_mode} = INCR_M_NUM;
1689 redo INCR_PARSE;
dec273dc
CBW
1690 } elsif ( $s eq '"' ) {
1691 $self->{incr_mode} = INCR_M_STR;
1692 redo INCR_PARSE;
1693 } elsif ( $s eq '[' or $s eq '{' ) {
1694 if ( ++$self->{incr_nest} > $coder->get_max_depth ) {
1695 Carp::croak('json text or perl structure exceeds maximum nesting level (max_depth set too low?)');
1696 }
1697 next;
1698 } elsif ( $s eq ']' or $s eq '}' ) {
1699 if ( --$self->{incr_nest} <= 0 ) {
1700 last INCR_PARSE;
1701 }
1702 } elsif ( $s eq '#' ) {
1703 $self->{incr_mode} = INCR_M_C1;
1704 redo INCR_PARSE;
d5424315
DG
1705 }
1706 }
d5424315 1707 }
d5424315
DG
1708 }
1709
dec273dc
CBW
1710 $self->{incr_pos} = $p;
1711 $self->{incr_parsing} = $p ? 1 : 0; # for backward compatibility
d5424315
DG
1712}
1713
1714
1715sub incr_text {
dec273dc 1716 if ( $_[0]->{incr_pos} ) {
a1e5c561 1717 Carp::croak("incr_text cannot be called when the incremental parser already started parsing");
d5424315
DG
1718 }
1719 $_[0]->{incr_text};
1720}
1721
1722
1723sub incr_skip {
1724 my $self = shift;
dec273dc
CBW
1725 $self->{incr_text} = substr( $self->{incr_text}, $self->{incr_pos} );
1726 $self->{incr_pos} = 0;
1727 $self->{incr_mode} = 0;
1728 $self->{incr_nest} = 0;
d5424315
DG
1729}
1730
1731
1732sub incr_reset {
1733 my $self = shift;
1734 $self->{incr_text} = undef;
dec273dc 1735 $self->{incr_pos} = 0;
d5424315
DG
1736 $self->{incr_mode} = 0;
1737 $self->{incr_nest} = 0;
d5424315
DG
1738}
1739
1740###############################
1741
1742
17431;
1744__END__
1745=pod
1746
1747=head1 NAME
1748
1749JSON::PP - JSON::XS compatible pure-Perl module.
1750
1751=head1 SYNOPSIS
1752
1753 use JSON::PP;
1754
1755 # exported functions, they croak on error
1756 # and expect/generate UTF-8
1757
1758 $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref;
1759 $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text;
1760
1761 # OO-interface
1762
dec273dc 1763 $json = JSON::PP->new->ascii->pretty->allow_nonref;
d5424315 1764
dec273dc 1765 $pretty_printed_json_text = $json->encode( $perl_scalar );
d5424315
DG
1766 $perl_scalar = $json->decode( $json_text );
1767
d5424315
DG
1768 # Note that JSON version 2.0 and above will automatically use
1769 # JSON::XS or JSON::PP, so you should be able to just:
1770
1771 use JSON;
1772
1773
1774=head1 VERSION
1775
929e53be 1776 4.02
d5424315
DG
1777
1778=head1 DESCRIPTION
1779
5eabe055 1780JSON::PP is a pure perl JSON decoder/encoder, and (almost) compatible to much
dec273dc
CBW
1781faster L<JSON::XS> written by Marc Lehmann in C. JSON::PP works as
1782a fallback module when you use L<JSON> module without having
1783installed JSON::XS.
1784
1785Because of this fallback feature of JSON.pm, JSON::PP tries not to
1786be more JavaScript-friendly than JSON::XS (i.e. not to escape extra
5eabe055 1787characters such as U+2028 and U+2029, etc),
dec273dc
CBW
1788in order for you not to lose such JavaScript-friendliness silently
1789when you use JSON.pm and install JSON::XS for speed or by accident.
1790If you need JavaScript-friendly RFC7159-compliant pure perl module,
1791try L<JSON::Tiny>, which is derived from L<Mojolicious> web
1792framework and is also smaller and faster than JSON::PP.
1793
1794JSON::PP has been in the Perl core since Perl 5.14, mainly for
1795CPAN toolchain modules to parse META.json.
d5424315
DG
1796
1797=head1 FUNCTIONAL INTERFACE
1798
dec273dc
CBW
1799This section is taken from JSON::XS almost verbatim. C<encode_json>
1800and C<decode_json> are exported by default.
d5424315
DG
1801
1802=head2 encode_json
1803
1804 $json_text = encode_json $perl_scalar
1805
dec273dc
CBW
1806Converts the given Perl data structure to a UTF-8 encoded, binary string
1807(that is, the string contains octets only). Croaks on error.
d5424315
DG
1808
1809This function call is functionally identical to:
1810
1811 $json_text = JSON::PP->new->utf8->encode($perl_scalar)
1812
dec273dc
CBW
1813Except being faster.
1814
d5424315
DG
1815=head2 decode_json
1816
1817 $perl_scalar = decode_json $json_text
1818
1819The opposite of C<encode_json>: expects an UTF-8 (binary) string and tries
1820to parse that as an UTF-8 encoded JSON text, returning the resulting
dec273dc 1821reference. Croaks on error.
d5424315
DG
1822
1823This function call is functionally identical to:
1824
1825 $perl_scalar = JSON::PP->new->utf8->decode($json_text)
1826
dec273dc
CBW
1827Except being faster.
1828
d5424315
DG
1829=head2 JSON::PP::is_bool
1830
1831 $is_boolean = JSON::PP::is_bool($scalar)
1832
1833Returns true if the passed scalar represents either JSON::PP::true or
1834JSON::PP::false, two constants that act like C<1> and C<0> respectively
1835and are also used to represent JSON C<true> and C<false> in Perl strings.
1836
d5424315
DG
1837See L<MAPPING>, below, for more information on how JSON values are mapped to
1838Perl.
1839
dec273dc 1840=head1 OBJECT-ORIENTED INTERFACE
d5424315 1841
dec273dc 1842This section is also taken from JSON::XS.
d5424315 1843
dec273dc
CBW
1844The object oriented interface lets you configure your own encoding or
1845decoding style, within the limits of supported formats.
d5424315
DG
1846
1847=head2 new
1848
7942a65c 1849 $json = JSON::PP->new
d5424315 1850
dec273dc 1851Creates a new JSON::PP object that can be used to de/encode JSON
5eabe055
N
1852strings. All boolean flags described below are by default I<disabled>
1853(with the exception of C<allow_nonref>, which defaults to I<enabled> since
1854version C<4.0>).
d5424315 1855
dec273dc 1856The mutators for flags all return the JSON::PP object again and thus calls can
d5424315
DG
1857be chained:
1858
1859 my $json = JSON::PP->new->utf8->space_after->encode({a => [1,2]})
1860 => {"a": [1, 2]}
1861
1862=head2 ascii
1863
1864 $json = $json->ascii([$enable])
1865
1866 $enabled = $json->get_ascii
1867
dec273dc
CBW
1868If C<$enable> is true (or missing), then the C<encode> method will not
1869generate characters outside the code range C<0..127> (which is ASCII). Any
1870Unicode characters outside that range will be escaped using either a
1871single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL escape sequence,
1872as per RFC4627. The resulting encoded JSON text can be treated as a native
1873Unicode string, an ascii-encoded, latin1-encoded or UTF-8 encoded string,
1874or any other superset of ASCII.
1875
1876If C<$enable> is false, then the C<encode> method will not escape Unicode
1877characters unless required by the JSON syntax or other flags. This results
1878in a faster and more compact format.
d5424315 1879
dec273dc 1880See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
d5424315 1881
dec273dc
CBW
1882The main use for this flag is to produce JSON texts that can be
1883transmitted over a 7-bit channel, as the encoded JSON texts will not
1884contain any 8 bit characters.
d5424315
DG
1885
1886 JSON::PP->new->ascii(1)->encode([chr 0x10401])
1887 => ["\ud801\udc01"]
1888
1889=head2 latin1
1890
1891 $json = $json->latin1([$enable])
1892
1893 $enabled = $json->get_latin1
1894
dec273dc
CBW
1895If C<$enable> is true (or missing), then the C<encode> method will encode
1896the resulting JSON text as latin1 (or iso-8859-1), escaping any characters
1897outside the code range C<0..255>. The resulting string can be treated as a
1898latin1-encoded JSON text or a native Unicode string. The C<decode> method
1899will not be affected in any way by this flag, as C<decode> by default
1900expects Unicode, which is a strict superset of latin1.
d5424315 1901
dec273dc
CBW
1902If C<$enable> is false, then the C<encode> method will not escape Unicode
1903characters unless required by the JSON syntax or other flags.
d5424315 1904
dec273dc
CBW
1905See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
1906
1907The main use for this flag is efficiently encoding binary data as JSON
1908text, as most octets will not be escaped, resulting in a smaller encoded
1909size. The disadvantage is that the resulting JSON text is encoded
1910in latin1 (and must correctly be treated as such when storing and
1911transferring), a rare encoding for JSON. It is therefore most useful when
1912you want to store data structures known to contain binary data efficiently
1913in files or databases, not when talking to other JSON encoders/decoders.
d5424315 1914
dec273dc
CBW
1915 JSON::PP->new->latin1->encode (["\x{89}\x{abc}"]
1916 => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not)
d5424315
DG
1917
1918=head2 utf8
1919
1920 $json = $json->utf8([$enable])
1921
1922 $enabled = $json->get_utf8
1923
dec273dc
CBW
1924If C<$enable> is true (or missing), then the C<encode> method will encode
1925the JSON result into UTF-8, as required by many protocols, while the
1926C<decode> method expects to be handled an UTF-8-encoded string. Please
1927note that UTF-8-encoded strings do not contain any characters outside the
1928range C<0..255>, they are thus useful for bytewise/binary I/O. In future
1929versions, enabling this option might enable autodetection of the UTF-16
1930and UTF-32 encoding families, as described in RFC4627.
d5424315 1931
dec273dc
CBW
1932If C<$enable> is false, then the C<encode> method will return the JSON
1933string as a (non-encoded) Unicode string, while C<decode> expects thus a
1934Unicode string. Any decoding or encoding (e.g. to UTF-8 or UTF-16) needs
1935to be done yourself, e.g. using the Encode module.
d5424315 1936
dec273dc 1937See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
d5424315
DG
1938
1939Example, output UTF-16BE-encoded JSON:
1940
1941 use Encode;
1942 $jsontext = encode "UTF-16BE", JSON::PP->new->encode ($object);
1943
1944Example, decode UTF-32LE-encoded JSON:
1945
1946 use Encode;
1947 $object = JSON::PP->new->decode (decode "UTF-32LE", $jsontext);
1948
d5424315
DG
1949=head2 pretty
1950
1951 $json = $json->pretty([$enable])
1952
1953This enables (or disables) all of the C<indent>, C<space_before> and
dec273dc
CBW
1954C<space_after> (and in the future possibly more) flags in one call to
1955generate the most readable (or most compact) form possible.
d5424315
DG
1956
1957=head2 indent
1958
1959 $json = $json->indent([$enable])
1960
1961 $enabled = $json->get_indent
1962
dec273dc
CBW
1963If C<$enable> is true (or missing), then the C<encode> method will use a multiline
1964format as output, putting every array member or object/hash key-value pair
1965into its own line, indenting them properly.
1966
1967If C<$enable> is false, no newlines or indenting will be produced, and the
1968resulting JSON text is guaranteed not to contain any C<newlines>.
1969
1970This setting has no effect when decoding JSON texts.
1971
d5424315
DG
1972The default indent space length is three.
1973You can use C<indent_length> to change the length.
1974
1975=head2 space_before
1976
1977 $json = $json->space_before([$enable])
1978
1979 $enabled = $json->get_space_before
1980
1981If C<$enable> is true (or missing), then the C<encode> method will add an extra
1982optional space before the C<:> separating keys from values in JSON objects.
1983
1984If C<$enable> is false, then the C<encode> method will not add any extra
1985space at those places.
1986
dec273dc
CBW
1987This setting has no effect when decoding JSON texts. You will also
1988most likely combine this setting with C<space_after>.
d5424315
DG
1989
1990Example, space_before enabled, space_after and indent disabled:
1991
1992 {"key" :"value"}
1993
1994=head2 space_after
1995
1996 $json = $json->space_after([$enable])
1997
1998 $enabled = $json->get_space_after
1999
2000If C<$enable> is true (or missing), then the C<encode> method will add an extra
2001optional space after the C<:> separating keys from values in JSON objects
2002and extra whitespace after the C<,> separating key-value pairs and array
2003members.
2004
2005If C<$enable> is false, then the C<encode> method will not add any extra
2006space at those places.
2007
2008This setting has no effect when decoding JSON texts.
2009
2010Example, space_before and indent disabled, space_after enabled:
2011
2012 {"key": "value"}
2013
2014=head2 relaxed
2015
2016 $json = $json->relaxed([$enable])
2017
2018 $enabled = $json->get_relaxed
2019
2020If C<$enable> is true (or missing), then C<decode> will accept some
2021extensions to normal JSON syntax (see below). C<encode> will not be
2022affected in anyway. I<Be aware that this option makes you accept invalid
2023JSON texts as if they were valid!>. I suggest only to use this option to
2024parse application-specific files written by humans (configuration files,
2025resource files etc.)
2026
2027If C<$enable> is false (the default), then C<decode> will only accept
2028valid JSON texts.
2029
2030Currently accepted extensions are:
2031
2032=over 4
2033
2034=item * list items can have an end-comma
2035
2036JSON I<separates> array elements and key-value pairs with commas. This
2037can be annoying if you write JSON texts manually and want to be able to
2038quickly append elements, so this extension accepts comma at the end of
2039such items not just between them:
2040
2041 [
2042 1,
2043 2, <- this comma not normally allowed
2044 ]
2045 {
2046 "k1": "v1",
2047 "k2": "v2", <- this comma not normally allowed
2048 }
2049
2050=item * shell-style '#'-comments
2051
2052Whenever JSON allows whitespace, shell-style comments are additionally
2053allowed. They are terminated by the first carriage-return or line-feed
2054character, after which more white-space and comments are allowed.
2055
2056 [
2057 1, # this comment not allowed in JSON
2058 # neither this one...
2059 ]
2060
dec273dc
CBW
2061=item * C-style multiple-line '/* */'-comments (JSON::PP only)
2062
2063Whenever JSON allows whitespace, C-style multiple-line comments are additionally
2064allowed. Everything between C</*> and C<*/> is a comment, after which
2065more white-space and comments are allowed.
2066
2067 [
2068 1, /* this comment not allowed in JSON */
2069 /* neither this one... */
2070 ]
2071
2072=item * C++-style one-line '//'-comments (JSON::PP only)
2073
2074Whenever JSON allows whitespace, C++-style one-line comments are additionally
2075allowed. They are terminated by the first carriage-return or line-feed
2076character, after which more white-space and comments are allowed.
2077
2078 [
2079 1, // this comment not allowed in JSON
2080 // neither this one...
2081 ]
2082
5eabe055
N
2083=item * literal ASCII TAB characters in strings
2084
2085Literal ASCII TAB characters are now allowed in strings (and treated as
2086C<\t>).
2087
2088 [
2089 "Hello\tWorld",
2090 "Hello<TAB>World", # literal <TAB> would not normally be allowed
2091 ]
2092
d5424315
DG
2093=back
2094
2095=head2 canonical
2096
2097 $json = $json->canonical([$enable])
2098
2099 $enabled = $json->get_canonical
2100
2101If C<$enable> is true (or missing), then the C<encode> method will output JSON objects
2102by sorting their keys. This is adding a comparatively high overhead.
2103
2104If C<$enable> is false, then the C<encode> method will output key-value
2105pairs in the order Perl stores them (which will likely change between runs
dec273dc
CBW
2106of the same script, and can change even within the same run from 5.18
2107onwards).
d5424315
DG
2108
2109This option is useful if you want the same data structure to be encoded as
2110the same JSON text (given the same overall settings). If it is disabled,
2111the same hash might be encoded differently even if contains the same data,
2112as key-value pairs have no inherent ordering in Perl.
2113
2114This setting has no effect when decoding JSON texts.
2115
dec273dc 2116This setting has currently no effect on tied hashes.
d5424315
DG
2117
2118=head2 allow_nonref
2119
2120 $json = $json->allow_nonref([$enable])
2121
2122 $enabled = $json->get_allow_nonref
2123
5eabe055
N
2124Unlike other boolean options, this opotion is enabled by default beginning
2125with version C<4.0>.
2126
d5424315
DG
2127If C<$enable> is true (or missing), then the C<encode> method can convert a
2128non-reference into its corresponding string, number or null JSON value,
2129which is an extension to RFC4627. Likewise, C<decode> will accept those JSON
2130values instead of croaking.
2131
2132If C<$enable> is false, then the C<encode> method will croak if it isn't
2133passed an arrayref or hashref, as JSON texts must either be an object
2134or array. Likewise, C<decode> will croak if given something that is not a
2135JSON object or array.
2136
5eabe055
N
2137Example, encode a Perl scalar as JSON value without enabled C<allow_nonref>,
2138resulting in an error:
dec273dc 2139
5eabe055
N
2140 JSON::PP->new->allow_nonref(0)->encode ("Hello, World!")
2141 => hash- or arrayref expected...
d5424315
DG
2142
2143=head2 allow_unknown
2144
5eabe055 2145 $json = $json->allow_unknown([$enable])
d5424315
DG
2146
2147 $enabled = $json->get_allow_unknown
2148
dec273dc 2149If C<$enable> is true (or missing), then C<encode> will I<not> throw an
d5424315 2150exception when it encounters values it cannot represent in JSON (for
dec273dc
CBW
2151example, filehandles) but instead will encode a JSON C<null> value. Note
2152that blessed objects are not included here and are handled separately by
2153c<allow_blessed>.
d5424315 2154
dec273dc 2155If C<$enable> is false (the default), then C<encode> will throw an
d5424315
DG
2156exception when it encounters anything it cannot encode as JSON.
2157
dec273dc
CBW
2158This option does not affect C<decode> in any way, and it is recommended to
2159leave it off unless you know your communications partner.
d5424315
DG
2160
2161=head2 allow_blessed
2162
2163 $json = $json->allow_blessed([$enable])
2164
2165 $enabled = $json->get_allow_blessed
2166
dec273dc
CBW
2167See L<OBJECT SERIALISATION> for details.
2168
d5424315 2169If C<$enable> is true (or missing), then the C<encode> method will not
dec273dc
CBW
2170barf when it encounters a blessed reference that it cannot convert
2171otherwise. Instead, a JSON C<null> value is encoded instead of the object.
d5424315
DG
2172
2173If C<$enable> is false (the default), then C<encode> will throw an
dec273dc
CBW
2174exception when it encounters a blessed object that it cannot convert
2175otherwise.
2176
2177This setting has no effect on C<decode>.
d5424315
DG
2178
2179=head2 convert_blessed
2180
2181 $json = $json->convert_blessed([$enable])
2182
2183 $enabled = $json->get_convert_blessed
2184
dec273dc
CBW
2185See L<OBJECT SERIALISATION> for details.
2186
d5424315
DG
2187If C<$enable> is true (or missing), then C<encode>, upon encountering a
2188blessed object, will check for the availability of the C<TO_JSON> method
dec273dc
CBW
2189on the object's class. If found, it will be called in scalar context and
2190the resulting scalar will be encoded instead of the object.
d5424315
DG
2191
2192The C<TO_JSON> method may safely call die if it wants. If C<TO_JSON>
2193returns other blessed objects, those will be handled in the same
2194way. C<TO_JSON> must take care of not causing an endless recursion cycle
2195(== crash) in this case. The name of C<TO_JSON> was chosen because other
2196methods called by the Perl core (== not by the user of the object) are
dec273dc 2197usually in upper case letters and to avoid collisions with any C<to_json>
d5424315
DG
2198function or method.
2199
dec273dc
CBW
2200If C<$enable> is false (the default), then C<encode> will not consider
2201this type of conversion.
d5424315 2202
dec273dc 2203This setting has no effect on C<decode>.
d5424315 2204
5eabe055
N
2205=head2 allow_tags
2206
2207 $json = $json->allow_tags([$enable])
2208
2209 $enabled = $json->get_allow_tags
2210
2211See L<OBJECT SERIALISATION> for details.
2212
2213If C<$enable> is true (or missing), then C<encode>, upon encountering a
2214blessed object, will check for the availability of the C<FREEZE> method on
2215the object's class. If found, it will be used to serialise the object into
2216a nonstandard tagged JSON value (that JSON decoders cannot decode).
2217
2218It also causes C<decode> to parse such tagged JSON values and deserialise
2219them via a call to the C<THAW> method.
2220
2221If C<$enable> is false (the default), then C<encode> will not consider
2222this type of conversion, and tagged JSON values will cause a parse error
2223in C<decode>, as if tags were not part of the grammar.
2224
2225=head2 boolean_values
2226
2227 $json->boolean_values([$false, $true])
2228
2229 ($false, $true) = $json->get_boolean_values
2230
2231By default, JSON booleans will be decoded as overloaded
2232C<$JSON::PP::false> and C<$JSON::PP::true> objects.
2233
2234With this method you can specify your own boolean values for decoding -
2235on decode, JSON C<false> will be decoded as a copy of C<$false>, and JSON
2236C<true> will be decoded as C<$true> ("copy" here is the same thing as
2237assigning a value to another variable, i.e. C<$copy = $false>).
2238
2239This is useful when you want to pass a decoded data structure directly
2240to other serialisers like YAML, Data::MessagePack and so on.
2241
2242Note that this works only when you C<decode>. You can set incompatible
2243boolean objects (like L<boolean>), but when you C<encode> a data structure
2244with such boolean objects, you still need to enable C<convert_blessed>
2245(and add a C<TO_JSON> method if necessary).
2246
2247Calling this method without any arguments will reset the booleans
2248to their default values.
2249
2250C<get_boolean_values> will return both C<$false> and C<$true> values, or
2251the empty list when they are set to the default.
2252
d5424315
DG
2253=head2 filter_json_object
2254
2255 $json = $json->filter_json_object([$coderef])
2256
2257When C<$coderef> is specified, it will be called from C<decode> each
5eabe055
N
2258time it decodes a JSON object. The only argument is a reference to
2259the newly-created hash. If the code references returns a single scalar
2260(which need not be a reference), this value (or rather a copy of it) is
2261inserted into the deserialised data structure. If it returns an empty
2262list (NOTE: I<not> C<undef>, which is a valid scalar), the original
2263deserialised hash will be inserted. This setting can slow down decoding
2264considerably.
d5424315
DG
2265
2266When C<$coderef> is omitted or undefined, any existing callback will
2267be removed and C<decode> will not change the deserialised hash in any
2268way.
2269
2270Example, convert all JSON objects into the integer 5:
2271
5eabe055 2272 my $js = JSON::PP->new->filter_json_object(sub { 5 });
d5424315 2273 # returns [5]
5eabe055
N
2274 $js->decode('[{}]');
2275 # returns 5
2276 $js->decode('{"a":1, "b":2}');
d5424315
DG
2277
2278=head2 filter_json_single_key_object
2279
2280 $json = $json->filter_json_single_key_object($key [=> $coderef])
2281
2282Works remotely similar to C<filter_json_object>, but is only called for
2283JSON objects having a single key named C<$key>.
2284
2285This C<$coderef> is called before the one specified via
2286C<filter_json_object>, if any. It gets passed the single value in the JSON
2287object. If it returns a single value, it will be inserted into the data
2288structure. If it returns nothing (not even C<undef> but the empty list),
2289the callback from C<filter_json_object> will be called next, as if no
2290single-key callback were specified.
2291
2292If C<$coderef> is omitted or undefined, the corresponding callback will be
2293disabled. There can only ever be one callback for a given key.
2294
2295As this callback gets called less often then the C<filter_json_object>
2296one, decoding speed will not usually suffer as much. Therefore, single-key
2297objects make excellent targets to serialise Perl objects into, especially
2298as single-key JSON objects are as close to the type-tagged value concept
2299as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not
2300support this in any way, so you need to make sure your data never looks
2301like a serialised Perl hash.
2302
2303Typical names for the single object key are C<__class_whatever__>, or
2304C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even
2305things like C<__class_md5sum(classname)__>, to reduce the risk of clashing
2306with real hashes.
2307
2308Example, decode JSON objects of the form C<< { "__widget__" => <id> } >>
2309into the corresponding C<< $WIDGET{<id>} >> object:
2310
2311 # return whatever is in $WIDGET{5}:
2312 JSON::PP
2313 ->new
2314 ->filter_json_single_key_object (__widget__ => sub {
2315 $WIDGET{ $_[0] }
2316 })
2317 ->decode ('{"__widget__": 5')
2318
2319 # this can be used with a TO_JSON method in some "widget" class
2320 # for serialisation to json:
2321 sub WidgetBase::TO_JSON {
2322 my ($self) = @_;
2323
2324 unless ($self->{id}) {
2325 $self->{id} = ..get..some..id..;
2326 $WIDGET{$self->{id}} = $self;
2327 }
2328
2329 { __widget__ => $self->{id} }
2330 }
2331
2332=head2 shrink
2333
2334 $json = $json->shrink([$enable])
2335
2336 $enabled = $json->get_shrink
2337
dec273dc
CBW
2338If C<$enable> is true (or missing), the string returned by C<encode> will
2339be shrunk (i.e. downgraded if possible).
d5424315 2340
dec273dc
CBW
2341The actual definition of what shrink does might change in future versions,
2342but it will always try to save space at the expense of time.
d5424315 2343
dec273dc 2344If C<$enable> is false, then JSON::PP does nothing.
d5424315
DG
2345
2346=head2 max_depth
2347
2348 $json = $json->max_depth([$maximum_nesting_depth])
2349
2350 $max_depth = $json->get_max_depth
2351
2352Sets the maximum nesting level (default C<512>) accepted while encoding
2353or decoding. If a higher nesting level is detected in JSON text or a Perl
2354data structure, then the encoder and decoder will stop and croak at that
2355point.
2356
2357Nesting level is defined by number of hash- or arrayrefs that the encoder
2358needs to traverse to reach a given point or the number of C<{> or C<[>
2359characters without their matching closing parenthesis crossed to reach a
2360given character in a string.
2361
dec273dc
CBW
2362Setting the maximum depth to one disallows any nesting, so that ensures
2363that the object is only a single hash/object or array.
2364
d5424315
DG
2365If no argument is given, the highest possible setting will be used, which
2366is rarely useful.
2367
a1e5c561 2368See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful.
d5424315 2369
d5424315
DG
2370=head2 max_size
2371
2372 $json = $json->max_size([$maximum_string_size])
2373
2374 $max_size = $json->get_max_size
2375
2376Set the maximum length a JSON text may have (in bytes) where decoding is
2377being attempted. The default is C<0>, meaning no limit. When C<decode>
2378is called on a string that is longer then this many bytes, it will not
2379attempt to decode the string but throw an exception. This setting has no
2380effect on C<encode> (yet).
2381
2382If no argument is given, the limit check will be deactivated (same as when
2383C<0> is specified).
2384
a1e5c561 2385See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful.
d5424315
DG
2386
2387=head2 encode
2388
2389 $json_text = $json->encode($perl_scalar)
2390
dec273dc
CBW
2391Converts the given Perl value or data structure to its JSON
2392representation. Croaks on error.
d5424315
DG
2393
2394=head2 decode
2395
2396 $perl_scalar = $json->decode($json_text)
2397
2398The opposite of C<encode>: expects a JSON text and tries to parse it,
2399returning the resulting simple scalar or reference. Croaks on error.
2400
d5424315
DG
2401=head2 decode_prefix
2402
2403 ($perl_scalar, $characters) = $json->decode_prefix($json_text)
2404
2405This works like the C<decode> method, but instead of raising an exception
2406when there is trailing garbage after the first JSON object, it will
2407silently stop parsing there and return the number of characters consumed
2408so far.
2409
dec273dc
CBW
2410This is useful if your JSON texts are not delimited by an outer protocol
2411and you need to know where the JSON text ends.
2412
2413 JSON::PP->new->decode_prefix ("[1] the tail")
2414 => ([1], 3)
2415
2416=head1 FLAGS FOR JSON::PP ONLY
2417
2418The following flags and properties are for JSON::PP only. If you use
2419any of these, you can't make your application run faster by replacing
2420JSON::PP with JSON::XS. If you need these and also speed boost,
5eabe055
N
2421you might want to try L<Cpanel::JSON::XS>, a fork of JSON::XS by
2422Reini Urban, which supports some of these (with a different set of
2423incompatibilities). Most of these historical flags are only kept
2424for backward compatibility, and should not be used in a new application.
dec273dc
CBW
2425
2426=head2 allow_singlequote
2427
2428 $json = $json->allow_singlequote([$enable])
2429 $enabled = $json->get_allow_singlequote
2430
2431If C<$enable> is true (or missing), then C<decode> will accept
2432invalid JSON texts that contain strings that begin and end with
5eabe055 2433single quotation marks. C<encode> will not be affected in any way.
dec273dc
CBW
2434I<Be aware that this option makes you accept invalid JSON texts
2435as if they were valid!>. I suggest only to use this option to
2436parse application-specific files written by humans (configuration
2437files, resource files etc.)
2438
2439If C<$enable> is false (the default), then C<decode> will only accept
2440valid JSON texts.
2441
2442 $json->allow_singlequote->decode(qq|{"foo":'bar'}|);
2443 $json->allow_singlequote->decode(qq|{'foo':"bar"}|);
2444 $json->allow_singlequote->decode(qq|{'foo':'bar'}|);
2445
2446=head2 allow_barekey
2447
2448 $json = $json->allow_barekey([$enable])
2449 $enabled = $json->get_allow_barekey
2450
2451If C<$enable> is true (or missing), then C<decode> will accept
2452invalid JSON texts that contain JSON objects whose names don't
2453begin and end with quotation marks. C<encode> will not be affected
5eabe055 2454in any way. I<Be aware that this option makes you accept invalid JSON
dec273dc
CBW
2455texts as if they were valid!>. I suggest only to use this option to
2456parse application-specific files written by humans (configuration
2457files, resource files etc.)
2458
2459If C<$enable> is false (the default), then C<decode> will only accept
2460valid JSON texts.
2461
2462 $json->allow_barekey->decode(qq|{foo:"bar"}|);
2463
2464=head2 allow_bignum
2465
2466 $json = $json->allow_bignum([$enable])
2467 $enabled = $json->get_allow_bignum
2468
2469If C<$enable> is true (or missing), then C<decode> will convert
2470big integers Perl cannot handle as integer into L<Math::BigInt>
2471objects and convert floating numbers into L<Math::BigFloat>
2472objects. C<encode> will convert C<Math::BigInt> and C<Math::BigFloat>
2473objects into JSON numbers.
2474
2475 $json->allow_nonref->allow_bignum;
2476 $bigfloat = $json->decode('2.000000000000000000000000001');
2477 print $json->encode($bigfloat);
2478 # => 2.000000000000000000000000001
2479
2480See also L<MAPPING>.
2481
2482=head2 loose
2483
2484 $json = $json->loose([$enable])
2485 $enabled = $json->get_loose
2486
2487If C<$enable> is true (or missing), then C<decode> will accept
2488invalid JSON texts that contain unescaped [\x00-\x1f\x22\x5c]
5eabe055 2489characters. C<encode> will not be affected in any way.
dec273dc
CBW
2490I<Be aware that this option makes you accept invalid JSON texts
2491as if they were valid!>. I suggest only to use this option to
2492parse application-specific files written by humans (configuration
2493files, resource files etc.)
2494
2495If C<$enable> is false (the default), then C<decode> will only accept
2496valid JSON texts.
2497
2498 $json->loose->decode(qq|["abc
2499 def"]|);
2500
2501=head2 escape_slash
2502
2503 $json = $json->escape_slash([$enable])
2504 $enabled = $json->get_escape_slash
2505
2506If C<$enable> is true (or missing), then C<encode> will explicitly
2507escape I<slash> (solidus; C<U+002F>) characters to reduce the risk of
2508XSS (cross site scripting) that may be caused by C<< </script> >>
2509in a JSON text, with the cost of bloating the size of JSON texts.
2510
2511This option may be useful when you embed JSON in HTML, but embedding
2512arbitrary JSON in HTML (by some HTML template toolkit or by string
2513interpolation) is risky in general. You must escape necessary
2514characters in correct order, depending on the context.
2515
5eabe055 2516C<decode> will not be affected in any way.
dec273dc
CBW
2517
2518=head2 indent_length
2519
2520 $json = $json->indent_length($number_of_spaces)
2521 $length = $json->get_indent_length
2522
2523This option is only useful when you also enable C<indent> or C<pretty>.
2524
2525JSON::XS indents with three spaces when you C<encode> (if requested
2526by C<indent> or C<pretty>), and the number cannot be changed.
2527JSON::PP allows you to change/get the number of indent spaces with these
2528mutator/accessor. The default number of spaces is three (the same as
2529JSON::XS), and the acceptable range is from C<0> (no indentation;
2530it'd be better to disable indentation by C<indent(0)>) to C<15>.
2531
2532=head2 sort_by
2533
2534 $json = $json->sort_by($code_ref)
2535 $json = $json->sort_by($subroutine_name)
2536
2537If you just want to sort keys (names) in JSON objects when you
2538C<encode>, enable C<canonical> option (see above) that allows you to
2539sort object keys alphabetically.
2540
2541If you do need to sort non-alphabetically for whatever reasons,
2542you can give a code reference (or a subroutine name) to C<sort_by>,
2543then the argument will be passed to Perl's C<sort> built-in function.
2544
2545As the sorting is done in the JSON::PP scope, you usually need to
2546prepend C<JSON::PP::> to the subroutine name, and the special variables
2547C<$a> and C<$b> used in the subrontine used by C<sort> function.
2548
2549Example:
2550
2551 my %ORDER = (id => 1, class => 2, name => 3);
2552 $json->sort_by(sub {
2553 ($ORDER{$JSON::PP::a} // 999) <=> ($ORDER{$JSON::PP::b} // 999)
2554 or $JSON::PP::a cmp $JSON::PP::b
2555 });
2556 print $json->encode([
2557 {name => 'CPAN', id => 1, href => 'http://cpan.org'}
2558 ]);
2559 # [{"id":1,"name":"CPAN","href":"http://cpan.org"}]
2560
2561Note that C<sort_by> affects all the plain hashes in the data structure.
2562If you need finer control, C<tie> necessary hashes with a module that
2563implements ordered hash (such as L<Hash::Ordered> and L<Tie::IxHash>).
2564C<canonical> and C<sort_by> don't affect the key order in C<tie>d
2565hashes.
2566
2567 use Hash::Ordered;
2568 tie my %hash, 'Hash::Ordered',
2569 (name => 'CPAN', id => 1, href => 'http://cpan.org');
2570 print $json->encode([\%hash]);
2571 # [{"name":"CPAN","id":1,"href":"http://cpan.org"}] # order is kept
d5424315
DG
2572
2573=head1 INCREMENTAL PARSING
2574
dec273dc 2575This section is also taken from JSON::XS.
d5424315 2576
dec273dc
CBW
2577In some cases, there is the need for incremental parsing of JSON
2578texts. While this module always has to keep both JSON text and resulting
2579Perl data structure in memory at one time, it does allow you to parse a
2580JSON stream incrementally. It does so by accumulating text until it has
2581a full JSON object, which it then can decode. This process is similar to
2582using C<decode_prefix> to see if a full JSON object is available, but
2583is much more efficient (and can be implemented with a minimum of method
2584calls).
d5424315 2585
dec273dc 2586JSON::PP will only attempt to parse the JSON text once it is sure it
d5424315
DG
2587has enough text to get a decisive result, using a very simple but
2588truly incremental parser. This means that it sometimes won't stop as
dec273dc
CBW
2589early as the full parser, for example, it doesn't detect mismatched
2590parentheses. The only thing it guarantees is that it starts decoding as
d5424315
DG
2591soon as a syntactically valid JSON text has been seen. This means you need
2592to set resource limits (e.g. C<max_size>) to ensure the parser will stop
2593parsing in the presence if syntax errors.
2594
2595The following methods implement this incremental parser.
2596
2597=head2 incr_parse
2598
2599 $json->incr_parse( [$string] ) # void context
2600
2601 $obj_or_undef = $json->incr_parse( [$string] ) # scalar context
2602
2603 @obj_or_empty = $json->incr_parse( [$string] ) # list context
2604
2605This is the central parsing function. It can both append new text and
2606extract objects from the stream accumulated so far (both of these
2607functions are optional).
2608
2609If C<$string> is given, then this string is appended to the already
2610existing JSON fragment stored in the C<$json> object.
2611
2612After that, if the function is called in void context, it will simply
2613return without doing anything further. This can be used to add more text
2614in as many chunks as you want.
2615
2616If the method is called in scalar context, then it will try to extract
2617exactly I<one> JSON object. If that is successful, it will return this
2618object, otherwise it will return C<undef>. If there is a parse error,
2619this method will croak just as C<decode> would do (one can then use
a1e5c561 2620C<incr_skip> to skip the erroneous part). This is the most common way of
d5424315
DG
2621using the method.
2622
2623And finally, in list context, it will try to extract as many objects
2624from the stream as it can find and return them, or the empty list
dec273dc
CBW
2625otherwise. For this to work, there must be no separators (other than
2626whitespace) between the JSON objects or arrays, instead they must be
2627concatenated back-to-back. If an error occurs, an exception will be
2628raised as in the scalar context case. Note that in this case, any
2629previously-parsed JSON texts will be lost.
d5424315 2630
dec273dc
CBW
2631Example: Parse some JSON arrays/objects in a given string and return
2632them.
d5424315 2633
dec273dc 2634 my @objs = JSON::PP->new->incr_parse ("[5][7][1,2]");
d5424315
DG
2635
2636=head2 incr_text
2637
2638 $lvalue_string = $json->incr_text
2639
2640This method returns the currently stored JSON fragment as an lvalue, that
2641is, you can manipulate it. This I<only> works when a preceding call to
2642C<incr_parse> in I<scalar context> successfully returned an object. Under
2643all other circumstances you must not call this function (I mean it.
2644although in simple tests it might actually work, it I<will> fail under
2645real world conditions). As a special exception, you can also call this
2646method before having parsed anything.
2647
dec273dc
CBW
2648That means you can only use this function to look at or manipulate text
2649before or after complete JSON objects, not while the parser is in the
2650middle of parsing a JSON object.
2651
d5424315
DG
2652This function is useful in two cases: a) finding the trailing text after a
2653JSON object or b) parsing multiple JSON objects separated by non-JSON text
2654(such as commas).
2655
d5424315
DG
2656=head2 incr_skip
2657
2658 $json->incr_skip
2659
dec273dc
CBW
2660This will reset the state of the incremental parser and will remove
2661the parsed text from the input buffer so far. This is useful after
2662C<incr_parse> died, in which case the input buffer and incremental parser
2663state is left unchanged, to skip the text parsed so far and to reset the
2664parse state.
2665
2666The difference to C<incr_reset> is that only text until the parse error
2667occurred is removed.
d5424315
DG
2668
2669=head2 incr_reset
2670
2671 $json->incr_reset
2672
2673This completely resets the incremental parser, that is, after this call,
2674it will be as if the parser had never parsed anything.
2675
a1e5c561 2676This is useful if you want to repeatedly parse JSON objects and want to
d5424315
DG
2677ignore any trailing data, which means you have to reset the parser after
2678each successful decode.
2679
d5424315
DG
2680=head1 MAPPING
2681
dec273dc
CBW
2682Most of this section is also taken from JSON::XS.
2683
2684This section describes how JSON::PP maps Perl values to JSON values and
2685vice versa. These mappings are designed to "do the right thing" in most
2686circumstances automatically, preserving round-tripping characteristics
2687(what you put in comes out as something equivalent).
d5424315 2688
dec273dc
CBW
2689For the more enlightened: note that in the following descriptions,
2690lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
2691refers to the abstract Perl language itself.
d5424315
DG
2692
2693=head2 JSON -> PERL
2694
2695=over 4
2696
2697=item object
2698
2699A JSON object becomes a reference to a hash in Perl. No ordering of object
dec273dc 2700keys is preserved (JSON does not preserve object key ordering itself).
d5424315
DG
2701
2702=item array
2703
2704A JSON array becomes a reference to an array in Perl.
2705
2706=item string
2707
2708A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON
2709are represented by the same codepoints in the Perl string, so no manual
2710decoding is necessary.
2711
2712=item number
2713
2714A JSON number becomes either an integer, numeric (floating point) or
2715string scalar in perl, depending on its range and any fractional parts. On
2716the Perl level, there is no difference between those as Perl handles all
2717the conversion details, but an integer may take slightly less memory and
2718might represent more values exactly than floating point numbers.
2719
dec273dc 2720If the number consists of digits only, JSON::PP will try to represent
d5424315
DG
2721it as an integer value. If that fails, it will try to represent it as
2722a numeric (floating point) value if that is possible without loss of
2723precision. Otherwise it will preserve the number as a string value (in
2724which case you lose roundtripping ability, as the JSON number will be
a1e5c561 2725re-encoded to a JSON string).
d5424315
DG
2726
2727Numbers containing a fractional or exponential part will always be
2728represented as numeric (floating point) values, possibly at a loss of
2729precision (in which case you might lose perfect roundtripping ability, but
2730the JSON number will still be re-encoded as a JSON number).
2731
2732Note that precision is not accuracy - binary floating point values cannot
2733represent most decimal fractions exactly, and when converting from and to
dec273dc 2734floating point, JSON::PP only guarantees precision up to but not including
a1e5c561 2735the least significant bit.
d5424315 2736
dec273dc
CBW
2737When C<allow_bignum> is enabled, big integer values and any numeric
2738values will be converted into L<Math::BigInt> and L<Math::BigFloat>
2739objects respectively, without becoming string scalars or losing
2740precision.
d5424315
DG
2741
2742=item true, false
2743
2744These JSON atoms become C<JSON::PP::true> and C<JSON::PP::false>,
2745respectively. They are overloaded to act almost exactly like the numbers
a1e5c561 2746C<1> and C<0>. You can check whether a scalar is a JSON boolean by using
dec273dc 2747the C<JSON::PP::is_bool> function.
d5424315
DG
2748
2749=item null
2750
2751A JSON null atom becomes C<undef> in Perl.
2752
dec273dc
CBW
2753=item shell-style comments (C<< # I<text> >>)
2754
2755As a nonstandard extension to the JSON syntax that is enabled by the
2756C<relaxed> setting, shell-style comments are allowed. They can start
2757anywhere outside strings and go till the end of the line.
d5424315 2758
5eabe055
N
2759=item tagged values (C<< (I<tag>)I<value> >>).
2760
2761Another nonstandard extension to the JSON syntax, enabled with the
2762C<allow_tags> setting, are tagged values. In this implementation, the
2763I<tag> must be a perl package/class name encoded as a JSON string, and the
2764I<value> must be a JSON array encoding optional constructor arguments.
2765
2766See L<OBJECT SERIALISATION>, below, for details.
2767
d5424315
DG
2768=back
2769
2770
2771=head2 PERL -> JSON
2772
2773The mapping from Perl to JSON is slightly more difficult, as Perl is a
2774truly typeless language, so we can only guess which JSON type is meant by
2775a Perl value.
2776
2777=over 4
2778
2779=item hash references
2780
dec273dc
CBW
2781Perl hash references become JSON objects. As there is no inherent
2782ordering in hash keys (or JSON objects), they will usually be encoded
2783in a pseudo-random order. JSON::PP can optionally sort the hash keys
2784(determined by the I<canonical> flag and/or I<sort_by> property), so
2785the same data structure will serialise to the same JSON text (given
2786same settings and version of JSON::PP), but this incurs a runtime
2787overhead and is only rarely useful, e.g. when you want to compare some
2788JSON text against another for equality.
d5424315
DG
2789
2790=item array references
2791
2792Perl array references become JSON arrays.
2793
2794=item other references
2795
2796Other unblessed references are generally not allowed and will cause an
2797exception to be thrown, except for references to the integers C<0> and
2798C<1>, which get turned into C<false> and C<true> atoms in JSON. You can
dec273dc
CBW
2799also use C<JSON::PP::false> and C<JSON::PP::true> to improve
2800readability.
d5424315 2801
dec273dc 2802 to_json [\0, JSON::PP::true] # yields [false,true]
d5424315 2803
dec273dc 2804=item JSON::PP::true, JSON::PP::false
d5424315
DG
2805
2806These special values become JSON true and JSON false values,
2807respectively. You can also use C<\1> and C<\0> directly if you want.
2808
dec273dc 2809=item JSON::PP::null
d5424315 2810
dec273dc 2811This special value becomes JSON null.
d5424315 2812
dec273dc 2813=item blessed objects
d5424315 2814
dec273dc
CBW
2815Blessed objects are not directly representable in JSON, but C<JSON::PP>
2816allows various ways of handling objects. See L<OBJECT SERIALISATION>,
2817below, for details.
d5424315
DG
2818
2819=item simple scalars
2820
2821Simple Perl scalars (any scalar that is not a reference) are the most
dec273dc 2822difficult objects to encode: JSON::PP will encode undefined scalars as
d5424315
DG
2823JSON C<null> values, scalars that have last been used in a string context
2824before encoding as JSON strings, and anything else as number value:
2825
2826 # dump as number
2827 encode_json [2] # yields [2]
2828 encode_json [-3.0e17] # yields [-3e+17]
2829 my $value = 5; encode_json [$value] # yields [5]
2830
2831 # used as string, so dump as string
2832 print $value;
2833 encode_json [$value] # yields ["5"]
2834
2835 # undef becomes null
2836 encode_json [undef] # yields [null]
2837
5eabe055 2838You can force the type to be a JSON string by stringifying it:
d5424315
DG
2839
2840 my $x = 3.1; # some variable containing a number
2841 "$x"; # stringified
2842 $x .= ""; # another, more awkward way to stringify
2843 print $x; # perl does it for you, too, quite often
dec273dc 2844 # (but for older perls)
d5424315 2845
5eabe055 2846You can force the type to be a JSON number by numifying it:
d5424315
DG
2847
2848 my $x = "3"; # some variable containing a string
2849 $x += 0; # numify it, ensuring it will be dumped as a number
a1e5c561 2850 $x *= 1; # same thing, the choice is yours.
d5424315 2851
5eabe055
N
2852You can not currently force the type in other, less obscure, ways.
2853
2854Since version 2.91_01, JSON::PP uses a different number detection logic
2855that converts a scalar that is possible to turn into a number safely.
2856The new logic is slightly faster, and tends to help people who use older
2857perl or who want to encode complicated data structure. However, this may
2858results in a different JSON text from the one JSON::XS encodes (and
2859thus may break tests that compare entire JSON texts). If you do
2860need the previous behavior for compatibility or for finer control,
2861set PERL_JSON_PP_USE_B environmental variable to true before you
2862C<use> JSON::PP (or JSON.pm).
d5424315
DG
2863
2864Note that numerical precision has the same meaning as under Perl (so
2865binary to decimal conversion follows the same rules as in Perl, which
2866can differ to other languages). Also, your perl interpreter might expose
2867extensions to the floating point numbers of your platform, such as
2868infinities or NaN's - these cannot be represented in JSON, and it is an
2869error to pass those in.
2870
dec273dc
CBW
2871JSON::PP (and JSON::XS) trusts what you pass to C<encode> method
2872(or C<encode_json> function) is a clean, validated data structure with
2873values that can be represented as valid JSON values only, because it's
2874not from an external data source (as opposed to JSON texts you pass to
2875C<decode> or C<decode_json>, which JSON::PP considers tainted and
2876doesn't trust). As JSON::PP doesn't know exactly what you and consumers
2877of your JSON texts want the unexpected values to be (you may want to
2878convert them into null, or to stringify them with or without
2879normalisation (string representation of infinities/NaN may vary
2880depending on platforms), or to croak without conversion), you're advised
2881to do what you and your consumers need before you encode, and also not
2882to numify values that may start with values that look like a number
2883(including infinities/NaN), without validating.
d5424315
DG
2884
2885=back
2886
dec273dc 2887=head2 OBJECT SERIALISATION
d5424315 2888
5eabe055
N
2889As JSON cannot directly represent Perl objects, you have to choose between
2890a pure JSON representation (without the ability to deserialise the object
2891automatically again), and a nonstandard extension to the JSON syntax,
2892tagged values.
d5424315 2893
dec273dc 2894=head3 SERIALISATION
d5424315 2895
dec273dc 2896What happens when C<JSON::PP> encounters a Perl object depends on the
5eabe055
N
2897C<allow_blessed>, C<convert_blessed>, C<allow_tags> and C<allow_bignum>
2898settings, which are used in this order:
d5424315 2899
dec273dc 2900=over 4
d5424315 2901
5eabe055
N
2902=item 1. C<allow_tags> is enabled and the object has a C<FREEZE> method.
2903
2904In this case, C<JSON::PP> creates a tagged JSON value, using a nonstandard
2905extension to the JSON syntax.
2906
2907This works by invoking the C<FREEZE> method on the object, with the first
2908argument being the object to serialise, and the second argument being the
2909constant string C<JSON> to distinguish it from other serialisers.
2910
2911The C<FREEZE> method can return any number of values (i.e. zero or
2912more). These values and the paclkage/classname of the object will then be
2913encoded as a tagged JSON value in the following format:
2914
2915 ("classname")[FREEZE return values...]
2916
2917e.g.:
2918
2919 ("URI")["http://www.google.com/"]
2920 ("MyDate")[2013,10,29]
2921 ("ImageData::JPEG")["Z3...VlCg=="]
2922
2923For example, the hypothetical C<My::Object> C<FREEZE> method might use the
2924objects C<type> and C<id> members to encode the object:
2925
2926 sub My::Object::FREEZE {
2927 my ($self, $serialiser) = @_;
2928
2929 ($self->{type}, $self->{id})
2930 }
2931
2932=item 2. C<convert_blessed> is enabled and the object has a C<TO_JSON> method.
d5424315 2933
dec273dc
CBW
2934In this case, the C<TO_JSON> method of the object is invoked in scalar
2935context. It must return a single scalar that can be directly encoded into
2936JSON. This scalar replaces the object in the JSON text.
d5424315 2937
dec273dc
CBW
2938For example, the following C<TO_JSON> method will convert all L<URI>
2939objects to JSON strings when serialised. The fact that these values
2940originally were L<URI> objects is lost.
d5424315 2941
dec273dc
CBW
2942 sub URI::TO_JSON {
2943 my ($uri) = @_;
2944 $uri->as_string
2945 }
d5424315 2946
5eabe055 2947=item 3. C<allow_bignum> is enabled and the object is a C<Math::BigInt> or C<Math::BigFloat>.
d5424315 2948
dec273dc 2949The object will be serialised as a JSON number value.
d5424315 2950
5eabe055 2951=item 4. C<allow_blessed> is enabled.
d5424315 2952
dec273dc 2953The object will be serialised as a JSON null value.
d5424315 2954
5eabe055 2955=item 5. none of the above
d5424315 2956
dec273dc
CBW
2957If none of the settings are enabled or the respective methods are missing,
2958C<JSON::PP> throws an exception.
d5424315 2959
dec273dc 2960=back
d5424315 2961
5eabe055
N
2962=head3 DESERIALISATION
2963
2964For deserialisation there are only two cases to consider: either
2965nonstandard tagging was used, in which case C<allow_tags> decides,
2966or objects cannot be automatically be deserialised, in which
2967case you can use postprocessing or the C<filter_json_object> or
2968C<filter_json_single_key_object> callbacks to get some real objects our of
2969your JSON.
2970
2971This section only considers the tagged value case: a tagged JSON object
2972is encountered during decoding and C<allow_tags> is disabled, a parse
2973error will result (as if tagged values were not part of the grammar).
2974
2975If C<allow_tags> is enabled, C<JSON::PP> will look up the C<THAW> method
2976of the package/classname used during serialisation (it will not attempt
2977to load the package as a Perl module). If there is no such method, the
2978decoding will fail with an error.
2979
2980Otherwise, the C<THAW> method is invoked with the classname as first
2981argument, the constant string C<JSON> as second argument, and all the
2982values from the JSON array (the values originally returned by the
2983C<FREEZE> method) as remaining arguments.
2984
2985The method must then return the object. While technically you can return
2986any Perl scalar, you might have to enable the C<allow_nonref> setting to
2987make that work in all cases, so better return an actual blessed reference.
2988
2989As an example, let's implement a C<THAW> function that regenerates the
2990C<My::Object> from the C<FREEZE> example earlier:
2991
2992 sub My::Object::THAW {
2993 my ($class, $serialiser, $type, $id) = @_;
2994
2995 $class->new (type => $type, id => $id)
2996 }
2997
2998
dec273dc 2999=head1 ENCODING/CODESET FLAG NOTES
d5424315 3000
dec273dc 3001This section is taken from JSON::XS.
d5424315 3002
dec273dc
CBW
3003The interested reader might have seen a number of flags that signify
3004encodings or codesets - C<utf8>, C<latin1> and C<ascii>. There seems to be
3005some confusion on what these do, so here is a short comparison:
d5424315 3006
dec273dc
CBW
3007C<utf8> controls whether the JSON text created by C<encode> (and expected
3008by C<decode>) is UTF-8 encoded or not, while C<latin1> and C<ascii> only
3009control whether C<encode> escapes character values outside their respective
3010codeset range. Neither of these flags conflict with each other, although
3011some combinations make less sense than others.
d5424315 3012
dec273dc
CBW
3013Care has been taken to make all flags symmetrical with respect to
3014C<encode> and C<decode>, that is, texts encoded with any combination of
3015these flag values will be correctly decoded when the same flags are used
3016- in general, if you use different flag settings while encoding vs. when
3017decoding you likely have a bug somewhere.
d5424315 3018
dec273dc
CBW
3019Below comes a verbose discussion of these flags. Note that a "codeset" is
3020simply an abstract set of character-codepoint pairs, while an encoding
3021takes those codepoint numbers and I<encodes> them, in our case into
3022octets. Unicode is (among other things) a codeset, UTF-8 is an encoding,
3023and ISO-8859-1 (= latin 1) and ASCII are both codesets I<and> encodings at
3024the same time, which can be confusing.
d5424315 3025
dec273dc 3026=over 4
d5424315 3027
dec273dc
CBW
3028=item C<utf8> flag disabled
3029
3030When C<utf8> is disabled (the default), then C<encode>/C<decode> generate
3031and expect Unicode strings, that is, characters with high ordinal Unicode
3032values (> 255) will be encoded as such characters, and likewise such
3033characters are decoded as-is, no changes to them will be done, except
3034"(re-)interpreting" them as Unicode codepoints or Unicode characters,
3035respectively (to Perl, these are the same thing in strings unless you do
3036funny/weird/dumb stuff).
3037
3038This is useful when you want to do the encoding yourself (e.g. when you
3039want to have UTF-16 encoded JSON texts) or when some other layer does
3040the encoding for you (for example, when printing to a terminal using a
3041filehandle that transparently encodes to UTF-8 you certainly do NOT want
3042to UTF-8 encode your data first and have Perl encode it another time).
3043
3044=item C<utf8> flag enabled
3045
3046If the C<utf8>-flag is enabled, C<encode>/C<decode> will encode all
3047characters using the corresponding UTF-8 multi-byte sequence, and will
3048expect your input strings to be encoded as UTF-8, that is, no "character"
3049of the input string must have any value > 255, as UTF-8 does not allow
3050that.
3051
3052The C<utf8> flag therefore switches between two modes: disabled means you
3053will get a Unicode string in Perl, enabled means you get an UTF-8 encoded
3054octet/binary string in Perl.
3055
3056=item C<latin1> or C<ascii> flags enabled
3057
3058With C<latin1> (or C<ascii>) enabled, C<encode> will escape characters
3059with ordinal values > 255 (> 127 with C<ascii>) and encode the remaining
3060characters as specified by the C<utf8> flag.
3061
3062If C<utf8> is disabled, then the result is also correctly encoded in those
3063character sets (as both are proper subsets of Unicode, meaning that a
3064Unicode string with all character values < 256 is the same thing as a
3065ISO-8859-1 string, and a Unicode string with all character values < 128 is
3066the same thing as an ASCII string in Perl).
3067
3068If C<utf8> is enabled, you still get a correct UTF-8-encoded string,
3069regardless of these flags, just some more characters will be escaped using
3070C<\uXXXX> then before.
3071
3072Note that ISO-8859-1-I<encoded> strings are not compatible with UTF-8
3073encoding, while ASCII-encoded strings are. That is because the ISO-8859-1
3074encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 I<codeset> being
3075a subset of Unicode), while ASCII is.
3076
3077Surprisingly, C<decode> will ignore these flags and so treat all input
3078values as governed by the C<utf8> flag. If it is disabled, this allows you
3079to decode ISO-8859-1- and ASCII-encoded strings, as both strict subsets of
3080Unicode. If it is enabled, you can correctly decode UTF-8 encoded strings.
3081
3082So neither C<latin1> nor C<ascii> are incompatible with the C<utf8> flag -
3083they only govern when the JSON output engine escapes a character or not.
3084
3085The main use for C<latin1> is to relatively efficiently store binary data
3086as JSON, at the expense of breaking compatibility with most JSON decoders.
3087
3088The main use for C<ascii> is to force the output to not contain characters
3089with values > 127, which means you can interpret the resulting string
3090as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about any character set and
30918-bit-encoding, and still get the same data structure back. This is useful
3092when your channel for JSON transfer is not 8-bit clean or the encoding
3093might be mangled in between (e.g. in mail), and works because ASCII is a
3094proper subset of most 8-bit and multibyte encodings in use in the world.
d5424315
DG
3095
3096=back
3097
5eabe055
N
3098=head1 BUGS
3099
3100Please report bugs on a specific behavior of this module to RT or GitHub
3101issues (preferred):
3102
3103L<https://github.com/makamaka/JSON-PP/issues>
3104
3105L<https://rt.cpan.org/Public/Dist/Display.html?Queue=JSON-PP>
3106
3107As for new features and requests to change common behaviors, please
3108ask the author of JSON::XS (Marc Lehmann, E<lt>schmorp[at]schmorp.deE<gt>)
3109first, by email (important!), to keep compatibility among JSON.pm backends.
3110
3111Generally speaking, if you need something special for you, you are advised
3112to create a new module, maybe based on L<JSON::Tiny>, which is smaller and
3113written in a much cleaner way than this module.
3114
d5424315
DG
3115=head1 SEE ALSO
3116
dec273dc
CBW
3117The F<json_pp> command line utility for quick experiments.
3118
3119L<JSON::XS>, L<Cpanel::JSON::XS>, and L<JSON::Tiny> for faster alternatives.
3120L<JSON> and L<JSON::MaybeXS> for easy migration.
d5424315 3121
dec273dc 3122L<JSON::PP::Compat5005> and L<JSON::PP::Compat5006> for older perl users.
d5424315
DG
3123
3124RFC4627 (L<http://www.ietf.org/rfc/rfc4627.txt>)
3125
5eabe055
N
3126RFC7159 (L<http://www.ietf.org/rfc/rfc7159.txt>)
3127
3128RFC8259 (L<http://www.ietf.org/rfc/rfc8259.txt>)
3129
d5424315
DG
3130=head1 AUTHOR
3131
3132Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
3133
5eabe055
N
3134=head1 CURRENT MAINTAINER
3135
3136Kenichi Ishigaki, E<lt>ishigaki[at]cpan.orgE<gt>
d5424315
DG
3137
3138=head1 COPYRIGHT AND LICENSE
3139
a1e5c561 3140Copyright 2007-2016 by Makamaka Hannyaharamitu
d5424315 3141
5eabe055
N
3142Most of the documentation is taken from JSON::XS by Marc Lehmann
3143
d5424315
DG
3144This library is free software; you can redistribute it and/or modify
3145it under the same terms as Perl itself.
3146
3147=cut