This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update podlators to CPAN version 4.10
[perl5.git] / cpan / JSON-PP / lib / JSON / PP.pm
CommitLineData
d5424315
DG
1package JSON::PP;
2
3# JSON-2.0
4
5use 5.005;
6use strict;
dec273dc
CBW
7
8use Exporter ();
9BEGIN { @JSON::PP::ISA = ('Exporter') }
10
d5424315 11use overload ();
dec273dc 12use JSON::PP::Boolean;
d5424315
DG
13
14use Carp ();
d5424315
DG
15#use Devel::Peek;
16
9c399ae2 17$JSON::PP::VERSION = '2.97000';
d5424315
DG
18
19@JSON::PP::EXPORT = qw(encode_json decode_json from_json to_json);
20
21# instead of hash-access, i tried index-access for speed.
22# but this method is not faster than what i expected. so it will be changed.
23
24use constant P_ASCII => 0;
25use constant P_LATIN1 => 1;
26use constant P_UTF8 => 2;
27use constant P_INDENT => 3;
28use constant P_CANONICAL => 4;
29use constant P_SPACE_BEFORE => 5;
30use constant P_SPACE_AFTER => 6;
31use constant P_ALLOW_NONREF => 7;
32use constant P_SHRINK => 8;
33use constant P_ALLOW_BLESSED => 9;
34use constant P_CONVERT_BLESSED => 10;
35use constant P_RELAXED => 11;
36
37use constant P_LOOSE => 12;
38use constant P_ALLOW_BIGNUM => 13;
39use constant P_ALLOW_BAREKEY => 14;
40use constant P_ALLOW_SINGLEQUOTE => 15;
41use constant P_ESCAPE_SLASH => 16;
42use constant P_AS_NONBLESSED => 17;
43
44use constant P_ALLOW_UNKNOWN => 18;
45
46use constant OLD_PERL => $] < 5.008 ? 1 : 0;
dec273dc
CBW
47use constant USE_B => 0;
48
49BEGIN {
50if (USE_B) {
51 require B;
52}
53}
d5424315
DG
54
55BEGIN {
56 my @xs_compati_bit_properties = qw(
57 latin1 ascii utf8 indent canonical space_before space_after allow_nonref shrink
58 allow_blessed convert_blessed relaxed allow_unknown
59 );
60 my @pp_bit_properties = qw(
61 allow_singlequote allow_bignum loose
62 allow_barekey escape_slash as_nonblessed
63 );
64
a1e5c561 65 # Perl version check, Unicode handling is enabled?
d5424315 66 # Helper module sets @JSON::PP::_properties.
dec273dc 67 if ( OLD_PERL ) {
d5424315
DG
68 my $helper = $] >= 5.006 ? 'JSON::PP::Compat5006' : 'JSON::PP::Compat5005';
69 eval qq| require $helper |;
70 if ($@) { Carp::croak $@; }
71 }
72
73 for my $name (@xs_compati_bit_properties, @pp_bit_properties) {
dec273dc 74 my $property_id = 'P_' . uc($name);
d5424315
DG
75
76 eval qq/
77 sub $name {
78 my \$enable = defined \$_[1] ? \$_[1] : 1;
79
80 if (\$enable) {
dec273dc 81 \$_[0]->{PROPS}->[$property_id] = 1;
d5424315
DG
82 }
83 else {
dec273dc 84 \$_[0]->{PROPS}->[$property_id] = 0;
d5424315
DG
85 }
86
87 \$_[0];
88 }
89
90 sub get_$name {
dec273dc 91 \$_[0]->{PROPS}->[$property_id] ? 1 : '';
d5424315
DG
92 }
93 /;
94 }
95
96}
97
98
99
100# Functions
101
d5424315
DG
102my $JSON; # cache
103
104sub encode_json ($) { # encode
105 ($JSON ||= __PACKAGE__->new->utf8)->encode(@_);
106}
107
108
109sub decode_json { # decode
110 ($JSON ||= __PACKAGE__->new->utf8)->decode(@_);
111}
112
113# Obsoleted
114
115sub to_json($) {
116 Carp::croak ("JSON::PP::to_json has been renamed to encode_json.");
117}
118
119
120sub from_json($) {
121 Carp::croak ("JSON::PP::from_json has been renamed to decode_json.");
122}
123
124
125# Methods
126
127sub new {
128 my $class = shift;
129 my $self = {
130 max_depth => 512,
131 max_size => 0,
d5424315
DG
132 indent_length => 3,
133 };
134
135 bless $self, $class;
136}
137
138
139sub encode {
140 return $_[0]->PP_encode_json($_[1]);
141}
142
143
144sub decode {
145 return $_[0]->PP_decode_json($_[1], 0x00000000);
146}
147
148
149sub decode_prefix {
150 return $_[0]->PP_decode_json($_[1], 0x00000001);
151}
152
153
154# accessor
155
156
157# pretty printing
158
159sub pretty {
160 my ($self, $v) = @_;
161 my $enable = defined $v ? $v : 1;
162
163 if ($enable) { # indent_length(3) for JSON::XS compatibility
dec273dc 164 $self->indent(1)->space_before(1)->space_after(1);
d5424315
DG
165 }
166 else {
167 $self->indent(0)->space_before(0)->space_after(0);
168 }
169
170 $self;
171}
172
173# etc
174
175sub max_depth {
176 my $max = defined $_[1] ? $_[1] : 0x80000000;
177 $_[0]->{max_depth} = $max;
178 $_[0];
179}
180
181
182sub get_max_depth { $_[0]->{max_depth}; }
183
184
185sub max_size {
186 my $max = defined $_[1] ? $_[1] : 0;
187 $_[0]->{max_size} = $max;
188 $_[0];
189}
190
191
192sub get_max_size { $_[0]->{max_size}; }
193
194
195sub filter_json_object {
dec273dc
CBW
196 if (defined $_[1] and ref $_[1] eq 'CODE') {
197 $_[0]->{cb_object} = $_[1];
198 } else {
199 delete $_[0]->{cb_object};
200 }
d5424315
DG
201 $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0;
202 $_[0];
203}
204
205sub filter_json_single_key_object {
dec273dc
CBW
206 if (@_ == 1 or @_ > 3) {
207 Carp::croak("Usage: JSON::PP::filter_json_single_key_object(self, key, callback = undef)");
208 }
209 if (defined $_[2] and ref $_[2] eq 'CODE') {
d5424315 210 $_[0]->{cb_sk_object}->{$_[1]} = $_[2];
dec273dc
CBW
211 } else {
212 delete $_[0]->{cb_sk_object}->{$_[1]};
213 delete $_[0]->{cb_sk_object} unless %{$_[0]->{cb_sk_object} || {}};
d5424315
DG
214 }
215 $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0;
216 $_[0];
217}
218
219sub indent_length {
220 if (!defined $_[1] or $_[1] > 15 or $_[1] < 0) {
221 Carp::carp "The acceptable range of indent_length() is 0 to 15.";
222 }
223 else {
224 $_[0]->{indent_length} = $_[1];
225 }
226 $_[0];
227}
228
229sub get_indent_length {
230 $_[0]->{indent_length};
231}
232
233sub sort_by {
234 $_[0]->{sort_by} = defined $_[1] ? $_[1] : 1;
235 $_[0];
236}
237
238sub allow_bigint {
dec273dc
CBW
239 Carp::carp("allow_bigint() is obsoleted. use allow_bignum() instead.");
240 $_[0]->allow_bignum;
d5424315
DG
241}
242
243###############################
244
245###
246### Perl => JSON
247###
248
249
250{ # Convert
251
252 my $max_depth;
253 my $indent;
254 my $ascii;
255 my $latin1;
256 my $utf8;
257 my $space_before;
258 my $space_after;
259 my $canonical;
260 my $allow_blessed;
261 my $convert_blessed;
262
263 my $indent_length;
264 my $escape_slash;
265 my $bignum;
266 my $as_nonblessed;
267
268 my $depth;
269 my $indent_count;
270 my $keysort;
271
272
273 sub PP_encode_json {
274 my $self = shift;
275 my $obj = shift;
276
277 $indent_count = 0;
278 $depth = 0;
279
dec273dc 280 my $props = $self->{PROPS};
d5424315
DG
281
282 ($ascii, $latin1, $utf8, $indent, $canonical, $space_before, $space_after, $allow_blessed,
283 $convert_blessed, $escape_slash, $bignum, $as_nonblessed)
dec273dc 284 = @{$props}[P_ASCII .. P_SPACE_AFTER, P_ALLOW_BLESSED, P_CONVERT_BLESSED,
d5424315
DG
285 P_ESCAPE_SLASH, P_ALLOW_BIGNUM, P_AS_NONBLESSED];
286
287 ($max_depth, $indent_length) = @{$self}{qw/max_depth indent_length/};
288
289 $keysort = $canonical ? sub { $a cmp $b } : undef;
290
291 if ($self->{sort_by}) {
292 $keysort = ref($self->{sort_by}) eq 'CODE' ? $self->{sort_by}
293 : $self->{sort_by} =~ /\D+/ ? $self->{sort_by}
294 : sub { $a cmp $b };
295 }
296
297 encode_error("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)")
dec273dc 298 if(!ref $obj and !$props->[ P_ALLOW_NONREF ]);
d5424315
DG
299
300 my $str = $self->object_to_json($obj);
301
302 $str .= "\n" if ( $indent ); # JSON::XS 2.26 compatible
303
304 unless ($ascii or $latin1 or $utf8) {
305 utf8::upgrade($str);
306 }
307
dec273dc 308 if ($props->[ P_SHRINK ]) {
d5424315
DG
309 utf8::downgrade($str, 1);
310 }
311
312 return $str;
313 }
314
315
316 sub object_to_json {
317 my ($self, $obj) = @_;
318 my $type = ref($obj);
319
320 if($type eq 'HASH'){
321 return $self->hash_to_json($obj);
322 }
323 elsif($type eq 'ARRAY'){
324 return $self->array_to_json($obj);
325 }
326 elsif ($type) { # blessed object?
327 if (blessed($obj)) {
328
329 return $self->value_to_json($obj) if ( $obj->isa('JSON::PP::Boolean') );
330
331 if ( $convert_blessed and $obj->can('TO_JSON') ) {
332 my $result = $obj->TO_JSON();
05bd2440
CBW
333 if ( defined $result and ref( $result ) ) {
334 if ( refaddr( $obj ) eq refaddr( $result ) ) {
d5424315
DG
335 encode_error( sprintf(
336 "%s::TO_JSON method returned same object as was passed instead of a new one",
337 ref $obj
338 ) );
339 }
340 }
341
342 return $self->object_to_json( $result );
343 }
344
345 return "$obj" if ( $bignum and _is_bignum($obj) );
d5424315 346
dec273dc
CBW
347 if ($allow_blessed) {
348 return $self->blessed_to_json($obj) if ($as_nonblessed); # will be removed.
349 return 'null';
350 }
d5424315
DG
351 encode_error( sprintf("encountered object '%s', but neither allow_blessed "
352 . "nor convert_blessed settings are enabled", $obj)
dec273dc 353 );
d5424315
DG
354 }
355 else {
356 return $self->value_to_json($obj);
357 }
358 }
359 else{
360 return $self->value_to_json($obj);
361 }
362 }
363
364
365 sub hash_to_json {
366 my ($self, $obj) = @_;
367 my @res;
368
369 encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)")
370 if (++$depth > $max_depth);
371
372 my ($pre, $post) = $indent ? $self->_up_indent() : ('', '');
373 my $del = ($space_before ? ' ' : '') . ':' . ($space_after ? ' ' : '');
374
375 for my $k ( _sort( $obj ) ) {
376 if ( OLD_PERL ) { utf8::decode($k) } # key for Perl 5.6 / be optimized
dec273dc 377 push @res, $self->string_to_json( $k )
d5424315 378 . $del
dec273dc 379 . ( ref $obj->{$k} ? $self->object_to_json( $obj->{$k} ) : $self->value_to_json( $obj->{$k} ) );
d5424315
DG
380 }
381
382 --$depth;
383 $self->_down_indent() if ($indent);
384
dec273dc
CBW
385 return '{}' unless @res;
386 return '{' . $pre . join( ",$pre", @res ) . $post . '}';
d5424315
DG
387 }
388
389
390 sub array_to_json {
391 my ($self, $obj) = @_;
392 my @res;
393
394 encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)")
395 if (++$depth > $max_depth);
396
397 my ($pre, $post) = $indent ? $self->_up_indent() : ('', '');
398
399 for my $v (@$obj){
dec273dc 400 push @res, ref($v) ? $self->object_to_json($v) : $self->value_to_json($v);
d5424315
DG
401 }
402
403 --$depth;
404 $self->_down_indent() if ($indent);
405
dec273dc
CBW
406 return '[]' unless @res;
407 return '[' . $pre . join( ",$pre", @res ) . $post . ']';
d5424315
DG
408 }
409
dec273dc
CBW
410 sub _looks_like_number {
411 my $value = shift;
412 if (USE_B) {
413 my $b_obj = B::svref_2object(\$value);
414 my $flags = $b_obj->FLAGS;
415 return 1 if $flags & ( B::SVp_IOK() | B::SVp_NOK() ) and !( $flags & B::SVp_POK() );
416 return;
417 } else {
418 no warnings 'numeric';
419 # detect numbers
420 # string & "" -> ""
421 # number & "" -> 0 (with warning)
422 # nan and inf can detect as numbers, so check with * 0
423 return unless length((my $dummy = "") & $value);
424 return unless 0 + $value eq $value;
425 return 1 if $value * 0 == 0;
426 return -1; # inf/nan
427 }
428 }
d5424315
DG
429
430 sub value_to_json {
431 my ($self, $value) = @_;
432
433 return 'null' if(!defined $value);
434
d5424315
DG
435 my $type = ref($value);
436
dec273dc
CBW
437 if (!$type) {
438 if (_looks_like_number($value)) {
439 return $value;
440 }
441 return $self->string_to_json($value);
d5424315
DG
442 }
443 elsif( blessed($value) and $value->isa('JSON::PP::Boolean') ){
444 return $$value == 1 ? 'true' : 'false';
445 }
dec273dc 446 else {
d5424315
DG
447 if ((overload::StrVal($value) =~ /=(\w+)/)[0]) {
448 return $self->value_to_json("$value");
449 }
450
451 if ($type eq 'SCALAR' and defined $$value) {
452 return $$value eq '1' ? 'true'
453 : $$value eq '0' ? 'false'
454 : $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ? 'null'
455 : encode_error("cannot encode reference to scalar");
456 }
457
dec273dc
CBW
458 if ( $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ) {
459 return 'null';
460 }
461 else {
462 if ( $type eq 'SCALAR' or $type eq 'REF' ) {
d5424315 463 encode_error("cannot encode reference to scalar");
dec273dc
CBW
464 }
465 else {
d5424315 466 encode_error("encountered $value, but JSON can only represent references to arrays or hashes");
dec273dc
CBW
467 }
468 }
d5424315
DG
469
470 }
d5424315
DG
471 }
472
473
474 my %esc = (
475 "\n" => '\n',
476 "\r" => '\r',
477 "\t" => '\t',
478 "\f" => '\f',
479 "\b" => '\b',
480 "\"" => '\"',
481 "\\" => '\\\\',
482 "\'" => '\\\'',
483 );
484
485
486 sub string_to_json {
487 my ($self, $arg) = @_;
488
489 $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
490 $arg =~ s/\//\\\//g if ($escape_slash);
491 $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
492
493 if ($ascii) {
494 $arg = JSON_PP_encode_ascii($arg);
495 }
496
497 if ($latin1) {
498 $arg = JSON_PP_encode_latin1($arg);
499 }
500
501 if ($utf8) {
502 utf8::encode($arg);
503 }
504
505 return '"' . $arg . '"';
506 }
507
508
509 sub blessed_to_json {
510 my $reftype = reftype($_[1]) || '';
511 if ($reftype eq 'HASH') {
512 return $_[0]->hash_to_json($_[1]);
513 }
514 elsif ($reftype eq 'ARRAY') {
515 return $_[0]->array_to_json($_[1]);
516 }
517 else {
518 return 'null';
519 }
520 }
521
522
523 sub encode_error {
524 my $error = shift;
525 Carp::croak "$error";
526 }
527
528
529 sub _sort {
530 defined $keysort ? (sort $keysort (keys %{$_[0]})) : keys %{$_[0]};
531 }
532
533
534 sub _up_indent {
535 my $self = shift;
536 my $space = ' ' x $indent_length;
537
538 my ($pre,$post) = ('','');
539
540 $post = "\n" . $space x $indent_count;
541
542 $indent_count++;
543
544 $pre = "\n" . $space x $indent_count;
545
546 return ($pre,$post);
547 }
548
549
550 sub _down_indent { $indent_count--; }
551
552
553 sub PP_encode_box {
554 {
555 depth => $depth,
556 indent_count => $indent_count,
557 };
558 }
559
560} # Convert
561
562
563sub _encode_ascii {
564 join('',
565 map {
566 $_ <= 127 ?
567 chr($_) :
568 $_ <= 65535 ?
569 sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
570 } unpack('U*', $_[0])
571 );
572}
573
574
575sub _encode_latin1 {
576 join('',
577 map {
578 $_ <= 255 ?
579 chr($_) :
580 $_ <= 65535 ?
581 sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
582 } unpack('U*', $_[0])
583 );
584}
585
586
587sub _encode_surrogates { # from perlunicode
588 my $uni = $_[0] - 0x10000;
589 return ($uni / 0x400 + 0xD800, $uni % 0x400 + 0xDC00);
590}
591
592
593sub _is_bignum {
594 $_[0]->isa('Math::BigInt') or $_[0]->isa('Math::BigFloat');
595}
596
597
598
599#
600# JSON => Perl
601#
602
603my $max_intsize;
604
605BEGIN {
606 my $checkint = 1111;
607 for my $d (5..64) {
608 $checkint .= 1;
609 my $int = eval qq| $checkint |;
610 if ($int =~ /[eE]/) {
611 $max_intsize = $d - 1;
612 last;
613 }
614 }
615}
616
617{ # PARSE
618
619 my %escapes = ( # by Jeremy Muhlich <jmuhlich [at] bitflood.org>
620 b => "\x8",
621 t => "\x9",
622 n => "\xA",
623 f => "\xC",
624 r => "\xD",
625 '\\' => '\\',
626 '"' => '"',
627 '/' => '/',
628 );
629
630 my $text; # json data
631 my $at; # offset
a1e5c561 632 my $ch; # first character
d5424315
DG
633 my $len; # text length (changed according to UTF8 or NON UTF8)
634 # INTERNAL
635 my $depth; # nest counter
636 my $encoding; # json text encoding
637 my $is_valid_utf8; # temp variable
638 my $utf8_len; # utf8 byte length
639 # FLAGS
640 my $utf8; # must be utf8
a1e5c561 641 my $max_depth; # max nest number of objects and arrays
d5424315
DG
642 my $max_size;
643 my $relaxed;
644 my $cb_object;
645 my $cb_sk_object;
646
647 my $F_HOOK;
648
dec273dc 649 my $allow_bignum; # using Math::BigInt/BigFloat
d5424315
DG
650 my $singlequote; # loosely quoting
651 my $loose; #
652 my $allow_barekey; # bareKey
653
dec273dc
CBW
654 sub _detect_utf_encoding {
655 my $text = shift;
656 my @octets = unpack('C4', $text);
657 return 'unknown' unless defined $octets[3];
658 return ( $octets[0] and $octets[1]) ? 'UTF-8'
659 : (!$octets[0] and $octets[1]) ? 'UTF-16BE'
660 : (!$octets[0] and !$octets[1]) ? 'UTF-32BE'
661 : ( $octets[2] ) ? 'UTF-16LE'
662 : (!$octets[2] ) ? 'UTF-32LE'
663 : 'unknown';
664 }
d5424315
DG
665
666 sub PP_decode_json {
dec273dc 667 my ($self, $want_offset);
d5424315 668
dec273dc 669 ($self, $text, $want_offset) = @_;
d5424315
DG
670
671 ($at, $ch, $depth) = (0, '', 0);
672
673 if ( !defined $text or ref $text ) {
674 decode_error("malformed JSON string, neither array, object, number, string or atom");
675 }
676
dec273dc 677 my $props = $self->{PROPS};
d5424315 678
dec273dc
CBW
679 ($utf8, $relaxed, $loose, $allow_bignum, $allow_barekey, $singlequote)
680 = @{$props}[P_UTF8, P_RELAXED, P_LOOSE .. P_ALLOW_SINGLEQUOTE];
d5424315
DG
681
682 if ( $utf8 ) {
dec273dc
CBW
683 $encoding = _detect_utf_encoding($text);
684 if ($encoding ne 'UTF-8' and $encoding ne 'unknown') {
685 require Encode;
686 Encode::from_to($text, $encoding, 'utf-8');
687 } else {
688 utf8::downgrade( $text, 1 ) or Carp::croak("Wide character in subroutine entry");
689 }
d5424315
DG
690 }
691 else {
692 utf8::upgrade( $text );
87f3ebe4 693 utf8::encode( $text );
d5424315
DG
694 }
695
696 $len = length $text;
697
698 ($max_depth, $max_size, $cb_object, $cb_sk_object, $F_HOOK)
699 = @{$self}{qw/max_depth max_size cb_object cb_sk_object F_HOOK/};
700
701 if ($max_size > 1) {
702 use bytes;
703 my $bytes = length $text;
704 decode_error(
705 sprintf("attempted decode of JSON text of %s bytes size, but max_size is set to %s"
706 , $bytes, $max_size), 1
707 ) if ($bytes > $max_size);
708 }
709
d5424315
DG
710 white(); # remove head white space
711
dec273dc 712 decode_error("malformed JSON string, neither array, object, number, string or atom") unless defined $ch; # Is there a first character for JSON structure?
d5424315
DG
713
714 my $result = value();
715
dec273dc 716 if ( !$props->[ P_ALLOW_NONREF ] and !ref $result ) {
d5424315
DG
717 decode_error(
718 'JSON text must be an object or array (but found number, string, true, false or null,'
719 . ' use allow_nonref to allow this)', 1);
720 }
721
722 Carp::croak('something wrong.') if $len < $at; # we won't arrive here.
723
724 my $consumed = defined $ch ? $at - 1 : $at; # consumed JSON text length
725
726 white(); # remove tail white space
727
dec273dc
CBW
728 return ( $result, $consumed ) if $want_offset; # all right if decode_prefix
729
730 decode_error("garbage after JSON object") if defined $ch;
d5424315 731
dec273dc 732 $result;
d5424315
DG
733 }
734
735
736 sub next_chr {
737 return $ch = undef if($at >= $len);
738 $ch = substr($text, $at++, 1);
739 }
740
741
742 sub value {
743 white();
744 return if(!defined $ch);
745 return object() if($ch eq '{');
746 return array() if($ch eq '[');
747 return string() if($ch eq '"' or ($singlequote and $ch eq "'"));
748 return number() if($ch =~ /[0-9]/ or $ch eq '-');
749 return word();
750 }
751
752 sub string {
d5424315
DG
753 my $utf16;
754 my $is_utf8;
755
756 ($is_valid_utf8, $utf8_len) = ('', 0);
757
dec273dc 758 my $s = ''; # basically UTF8 flag on
d5424315
DG
759
760 if($ch eq '"' or ($singlequote and $ch eq "'")){
761 my $boundChar = $ch;
762
763 OUTER: while( defined(next_chr()) ){
764
765 if($ch eq $boundChar){
766 next_chr();
767
768 if ($utf16) {
769 decode_error("missing low surrogate character in surrogate pair");
770 }
771
772 utf8::decode($s) if($is_utf8);
773
774 return $s;
775 }
776 elsif($ch eq '\\'){
777 next_chr();
778 if(exists $escapes{$ch}){
779 $s .= $escapes{$ch};
780 }
781 elsif($ch eq 'u'){ # UNICODE handling
782 my $u = '';
783
784 for(1..4){
785 $ch = next_chr();
786 last OUTER if($ch !~ /[0-9a-fA-F]/);
787 $u .= $ch;
788 }
789
790 # U+D800 - U+DBFF
791 if ($u =~ /^[dD][89abAB][0-9a-fA-F]{2}/) { # UTF-16 high surrogate?
792 $utf16 = $u;
793 }
794 # U+DC00 - U+DFFF
795 elsif ($u =~ /^[dD][c-fC-F][0-9a-fA-F]{2}/) { # UTF-16 low surrogate?
796 unless (defined $utf16) {
797 decode_error("missing high surrogate character in surrogate pair");
798 }
799 $is_utf8 = 1;
800 $s .= JSON_PP_decode_surrogates($utf16, $u) || next;
801 $utf16 = undef;
802 }
803 else {
804 if (defined $utf16) {
805 decode_error("surrogate pair expected");
806 }
807
808 if ( ( my $hex = hex( $u ) ) > 127 ) {
809 $is_utf8 = 1;
810 $s .= JSON_PP_decode_unicode($u) || next;
811 }
812 else {
813 $s .= chr $hex;
814 }
815 }
816
817 }
818 else{
819 unless ($loose) {
820 $at -= 2;
821 decode_error('illegal backslash escape sequence in string');
822 }
823 $s .= $ch;
824 }
825 }
826 else{
827
828 if ( ord $ch > 127 ) {
87f3ebe4
CBW
829 unless( $ch = is_valid_utf8($ch) ) {
830 $at -= 1;
831 decode_error("malformed UTF-8 character in JSON string");
d5424315
DG
832 }
833 else {
87f3ebe4 834 $at += $utf8_len - 1;
d5424315
DG
835 }
836
837 $is_utf8 = 1;
838 }
839
840 if (!$loose) {
841 if ($ch =~ /[\x00-\x1f\x22\x5c]/) { # '/' ok
842 $at--;
843 decode_error('invalid character encountered while parsing JSON string');
844 }
845 }
846
847 $s .= $ch;
848 }
849 }
850 }
851
852 decode_error("unexpected end of string while parsing JSON string");
853 }
854
855
856 sub white {
857 while( defined $ch ){
dec273dc 858 if($ch eq '' or $ch =~ /\A[ \t\r\n]\z/){
d5424315
DG
859 next_chr();
860 }
dec273dc 861 elsif($relaxed and $ch eq '/'){
d5424315
DG
862 next_chr();
863 if(defined $ch and $ch eq '/'){
864 1 while(defined(next_chr()) and $ch ne "\n" and $ch ne "\r");
865 }
866 elsif(defined $ch and $ch eq '*'){
867 next_chr();
868 while(1){
869 if(defined $ch){
870 if($ch eq '*'){
871 if(defined(next_chr()) and $ch eq '/'){
872 next_chr();
873 last;
874 }
875 }
876 else{
877 next_chr();
878 }
879 }
880 else{
881 decode_error("Unterminated comment");
882 }
883 }
884 next;
885 }
886 else{
887 $at--;
888 decode_error("malformed JSON string, neither array, object, number, string or atom");
889 }
890 }
891 else{
892 if ($relaxed and $ch eq '#') { # correctly?
893 pos($text) = $at;
894 $text =~ /\G([^\n]*(?:\r\n|\r|\n|$))/g;
895 $at = pos($text);
896 next_chr;
897 next;
898 }
899
900 last;
901 }
902 }
903 }
904
905
906 sub array {
05bd2440 907 my $a = $_[0] || []; # you can use this code to use another array ref object.
d5424315
DG
908
909 decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)')
910 if (++$depth > $max_depth);
911
912 next_chr();
913 white();
914
915 if(defined $ch and $ch eq ']'){
916 --$depth;
917 next_chr();
918 return $a;
919 }
920 else {
921 while(defined($ch)){
922 push @$a, value();
923
924 white();
925
926 if (!defined $ch) {
927 last;
928 }
929
930 if($ch eq ']'){
931 --$depth;
932 next_chr();
933 return $a;
934 }
935
936 if($ch ne ','){
937 last;
938 }
939
940 next_chr();
941 white();
942
943 if ($relaxed and $ch eq ']') {
944 --$depth;
945 next_chr();
946 return $a;
947 }
948
949 }
950 }
951
dec273dc 952 $at-- if defined $ch and $ch ne '';
d5424315
DG
953 decode_error(", or ] expected while parsing array");
954 }
955
956
957 sub object {
05bd2440 958 my $o = $_[0] || {}; # you can use this code to use another hash ref object.
d5424315
DG
959 my $k;
960
961 decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)')
962 if (++$depth > $max_depth);
963 next_chr();
964 white();
965
966 if(defined $ch and $ch eq '}'){
967 --$depth;
968 next_chr();
969 if ($F_HOOK) {
970 return _json_object_hook($o);
971 }
972 return $o;
973 }
974 else {
975 while (defined $ch) {
976 $k = ($allow_barekey and $ch ne '"' and $ch ne "'") ? bareKey() : string();
977 white();
978
979 if(!defined $ch or $ch ne ':'){
980 $at--;
981 decode_error("':' expected");
982 }
983
984 next_chr();
985 $o->{$k} = value();
986 white();
987
988 last if (!defined $ch);
989
990 if($ch eq '}'){
991 --$depth;
992 next_chr();
993 if ($F_HOOK) {
994 return _json_object_hook($o);
995 }
996 return $o;
997 }
998
999 if($ch ne ','){
1000 last;
1001 }
1002
1003 next_chr();
1004 white();
1005
1006 if ($relaxed and $ch eq '}') {
1007 --$depth;
1008 next_chr();
1009 if ($F_HOOK) {
1010 return _json_object_hook($o);
1011 }
1012 return $o;
1013 }
1014
1015 }
1016
1017 }
1018
dec273dc 1019 $at-- if defined $ch and $ch ne '';
d5424315
DG
1020 decode_error(", or } expected while parsing object/hash");
1021 }
1022
1023
1024 sub bareKey { # doesn't strictly follow Standard ECMA-262 3rd Edition
1025 my $key;
1026 while($ch =~ /[^\x00-\x23\x25-\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]/){
1027 $key .= $ch;
1028 next_chr();
1029 }
1030 return $key;
1031 }
1032
1033
1034 sub word {
1035 my $word = substr($text,$at-1,4);
1036
1037 if($word eq 'true'){
1038 $at += 3;
1039 next_chr;
1040 return $JSON::PP::true;
1041 }
1042 elsif($word eq 'null'){
1043 $at += 3;
1044 next_chr;
1045 return undef;
1046 }
1047 elsif($word eq 'fals'){
1048 $at += 3;
1049 if(substr($text,$at,1) eq 'e'){
1050 $at++;
1051 next_chr;
1052 return $JSON::PP::false;
1053 }
1054 }
1055
1056 $at--; # for decode_error report
1057
1058 decode_error("'null' expected") if ($word =~ /^n/);
1059 decode_error("'true' expected") if ($word =~ /^t/);
1060 decode_error("'false' expected") if ($word =~ /^f/);
1061 decode_error("malformed JSON string, neither array, object, number, string or atom");
1062 }
1063
1064
1065 sub number {
1066 my $n = '';
1067 my $v;
a1e5c561 1068 my $is_dec;
dec273dc 1069 my $is_exp;
d5424315
DG
1070
1071 if($ch eq '-'){
1072 $n = '-';
1073 next_chr;
1074 if (!defined $ch or $ch !~ /\d/) {
1075 decode_error("malformed number (no digits after initial minus)");
1076 }
1077 }
1078
dec273dc
CBW
1079 # According to RFC4627, hex or oct digits are invalid.
1080 if($ch eq '0'){
1081 my $peek = substr($text,$at,1);
1082 if($peek =~ /^[0-9a-dfA-DF]/){ # e may be valid (exponential)
1083 decode_error("malformed number (leading zero must not be followed by another digit)");
1084 }
1085 $n .= $ch;
1086 next_chr;
1087 }
1088
d5424315
DG
1089 while(defined $ch and $ch =~ /\d/){
1090 $n .= $ch;
1091 next_chr;
1092 }
1093
1094 if(defined $ch and $ch eq '.'){
1095 $n .= '.';
a1e5c561 1096 $is_dec = 1;
d5424315
DG
1097
1098 next_chr;
1099 if (!defined $ch or $ch !~ /\d/) {
1100 decode_error("malformed number (no digits after decimal point)");
1101 }
1102 else {
1103 $n .= $ch;
1104 }
1105
1106 while(defined(next_chr) and $ch =~ /\d/){
1107 $n .= $ch;
1108 }
1109 }
1110
1111 if(defined $ch and ($ch eq 'e' or $ch eq 'E')){
1112 $n .= $ch;
dec273dc 1113 $is_exp = 1;
d5424315
DG
1114 next_chr;
1115
1116 if(defined($ch) and ($ch eq '+' or $ch eq '-')){
1117 $n .= $ch;
1118 next_chr;
1119 if (!defined $ch or $ch =~ /\D/) {
1120 decode_error("malformed number (no digits after exp sign)");
1121 }
1122 $n .= $ch;
1123 }
1124 elsif(defined($ch) and $ch =~ /\d/){
1125 $n .= $ch;
1126 }
1127 else {
1128 decode_error("malformed number (no digits after exp sign)");
1129 }
1130
1131 while(defined(next_chr) and $ch =~ /\d/){
1132 $n .= $ch;
1133 }
1134
1135 }
1136
1137 $v .= $n;
1138
dec273dc
CBW
1139 if ($is_dec or $is_exp) {
1140 if ($allow_bignum) {
1141 require Math::BigFloat;
1142 return Math::BigFloat->new($v);
d5424315 1143 }
dec273dc
CBW
1144 } else {
1145 if (length $v > $max_intsize) {
1146 if ($allow_bignum) { # from Adam Sussman
1147 require Math::BigInt;
1148 return Math::BigInt->new($v);
1149 }
1150 else {
1151 return "$v";
1152 }
d5424315
DG
1153 }
1154 }
d5424315 1155
a1e5c561 1156 return $is_dec ? $v/1.0 : 0+$v;
d5424315
DG
1157 }
1158
1159
1160 sub is_valid_utf8 {
1161
1162 $utf8_len = $_[0] =~ /[\x00-\x7F]/ ? 1
1163 : $_[0] =~ /[\xC2-\xDF]/ ? 2
1164 : $_[0] =~ /[\xE0-\xEF]/ ? 3
1165 : $_[0] =~ /[\xF0-\xF4]/ ? 4
1166 : 0
1167 ;
1168
1169 return unless $utf8_len;
1170
1171 my $is_valid_utf8 = substr($text, $at - 1, $utf8_len);
1172
1173 return ( $is_valid_utf8 =~ /^(?:
1174 [\x00-\x7F]
1175 |[\xC2-\xDF][\x80-\xBF]
1176 |[\xE0][\xA0-\xBF][\x80-\xBF]
1177 |[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
1178 |[\xED][\x80-\x9F][\x80-\xBF]
1179 |[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
1180 |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
1181 |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
1182 |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
1183 )$/x ) ? $is_valid_utf8 : '';
1184 }
1185
1186
1187 sub decode_error {
1188 my $error = shift;
1189 my $no_rep = shift;
1190 my $str = defined $text ? substr($text, $at) : '';
1191 my $mess = '';
dec273dc
CBW
1192 my $type = 'U*';
1193
1194 if ( OLD_PERL ) {
1195 my $type = $] < 5.006 ? 'C*'
1196 : utf8::is_utf8( $str ) ? 'U*' # 5.6
1197 : 'C*'
1198 ;
1199 }
d5424315
DG
1200
1201 for my $c ( unpack( $type, $str ) ) { # emulate pv_uni_display() ?
1202 $mess .= $c == 0x07 ? '\a'
1203 : $c == 0x09 ? '\t'
1204 : $c == 0x0a ? '\n'
1205 : $c == 0x0d ? '\r'
1206 : $c == 0x0c ? '\f'
1207 : $c < 0x20 ? sprintf('\x{%x}', $c)
1208 : $c == 0x5c ? '\\\\'
1209 : $c < 0x80 ? chr($c)
1210 : sprintf('\x{%x}', $c)
1211 ;
1212 if ( length $mess >= 20 ) {
1213 $mess .= '...';
1214 last;
1215 }
1216 }
1217
1218 unless ( length $mess ) {
1219 $mess = '(end of string)';
1220 }
1221
1222 Carp::croak (
1223 $no_rep ? "$error" : "$error, at character offset $at (before \"$mess\")"
1224 );
1225
1226 }
1227
1228
1229 sub _json_object_hook {
1230 my $o = $_[0];
1231 my @ks = keys %{$o};
1232
1233 if ( $cb_sk_object and @ks == 1 and exists $cb_sk_object->{ $ks[0] } and ref $cb_sk_object->{ $ks[0] } ) {
1234 my @val = $cb_sk_object->{ $ks[0] }->( $o->{$ks[0]} );
1235 if (@val == 1) {
1236 return $val[0];
1237 }
1238 }
1239
1240 my @val = $cb_object->($o) if ($cb_object);
1241 if (@val == 0 or @val > 1) {
1242 return $o;
1243 }
1244 else {
1245 return $val[0];
1246 }
1247 }
1248
1249
1250 sub PP_decode_box {
1251 {
1252 text => $text,
1253 at => $at,
1254 ch => $ch,
1255 len => $len,
1256 depth => $depth,
1257 encoding => $encoding,
1258 is_valid_utf8 => $is_valid_utf8,
1259 };
1260 }
1261
1262} # PARSE
1263
1264
1265sub _decode_surrogates { # from perlunicode
1266 my $uni = 0x10000 + (hex($_[0]) - 0xD800) * 0x400 + (hex($_[1]) - 0xDC00);
1267 my $un = pack('U*', $uni);
1268 utf8::encode( $un );
1269 return $un;
1270}
1271
1272
1273sub _decode_unicode {
1274 my $un = pack('U', hex shift);
1275 utf8::encode( $un );
1276 return $un;
1277}
1278
1279#
1280# Setup for various Perl versions (the code from JSON::PP58)
1281#
1282
1283BEGIN {
1284
1285 unless ( defined &utf8::is_utf8 ) {
1286 require Encode;
1287 *utf8::is_utf8 = *Encode::is_utf8;
1288 }
1289
dec273dc 1290 if ( !OLD_PERL ) {
d5424315
DG
1291 *JSON::PP::JSON_PP_encode_ascii = \&_encode_ascii;
1292 *JSON::PP::JSON_PP_encode_latin1 = \&_encode_latin1;
1293 *JSON::PP::JSON_PP_decode_surrogates = \&_decode_surrogates;
1294 *JSON::PP::JSON_PP_decode_unicode = \&_decode_unicode;
d5424315 1295
dec273dc
CBW
1296 if ($] < 5.008003) { # join() in 5.8.0 - 5.8.2 is broken.
1297 package JSON::PP;
1298 require subs;
1299 subs->import('join');
1300 eval q|
1301 sub join {
1302 return '' if (@_ < 2);
1303 my $j = shift;
1304 my $str = shift;
1305 for (@_) { $str .= $j . $_; }
1306 return $str;
1307 }
1308 |;
1309 }
d5424315
DG
1310 }
1311
1312
1313 sub JSON::PP::incr_parse {
1314 local $Carp::CarpLevel = 1;
1315 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_parse( @_ );
1316 }
1317
1318
1319 sub JSON::PP::incr_skip {
1320 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_skip;
1321 }
1322
1323
1324 sub JSON::PP::incr_reset {
1325 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_reset;
1326 }
1327
1328 eval q{
1329 sub JSON::PP::incr_text : lvalue {
1330 $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new;
1331
dec273dc 1332 if ( $_[0]->{_incr_parser}->{incr_pos} ) {
a1e5c561 1333 Carp::croak("incr_text cannot be called when the incremental parser already started parsing");
d5424315
DG
1334 }
1335 $_[0]->{_incr_parser}->{incr_text};
1336 }
1337 } if ( $] >= 5.006 );
1338
1339} # Setup for various Perl versions (the code from JSON::PP58)
1340
1341
1342###############################
1343# Utilities
1344#
1345
1346BEGIN {
1347 eval 'require Scalar::Util';
1348 unless($@){
1349 *JSON::PP::blessed = \&Scalar::Util::blessed;
1350 *JSON::PP::reftype = \&Scalar::Util::reftype;
05bd2440 1351 *JSON::PP::refaddr = \&Scalar::Util::refaddr;
d5424315 1352 }
dec273dc 1353 else{ # This code is from Scalar::Util.
d5424315
DG
1354 # warn $@;
1355 eval 'sub UNIVERSAL::a_sub_not_likely_to_be_here { ref($_[0]) }';
1356 *JSON::PP::blessed = sub {
1357 local($@, $SIG{__DIE__}, $SIG{__WARN__});
1358 ref($_[0]) ? eval { $_[0]->a_sub_not_likely_to_be_here } : undef;
1359 };
dec273dc 1360 require B;
d5424315
DG
1361 my %tmap = qw(
1362 B::NULL SCALAR
1363 B::HV HASH
1364 B::AV ARRAY
1365 B::CV CODE
1366 B::IO IO
1367 B::GV GLOB
1368 B::REGEXP REGEXP
1369 );
1370 *JSON::PP::reftype = sub {
1371 my $r = shift;
1372
1373 return undef unless length(ref($r));
1374
1375 my $t = ref(B::svref_2object($r));
1376
1377 return
1378 exists $tmap{$t} ? $tmap{$t}
1379 : length(ref($$r)) ? 'REF'
1380 : 'SCALAR';
1381 };
05bd2440
CBW
1382 *JSON::PP::refaddr = sub {
1383 return undef unless length(ref($_[0]));
1384
1385 my $addr;
1386 if(defined(my $pkg = blessed($_[0]))) {
1387 $addr .= bless $_[0], 'Scalar::Util::Fake';
1388 bless $_[0], $pkg;
1389 }
1390 else {
1391 $addr .= $_[0]
1392 }
1393
1394 $addr =~ /0x(\w+)/;
1395 local $^W;
1396 #no warnings 'portable';
1397 hex($1);
1398 }
d5424315
DG
1399 }
1400}
1401
1402
a1e5c561 1403# shamelessly copied and modified from JSON::XS code.
d5424315
DG
1404
1405$JSON::PP::true = do { bless \(my $dummy = 1), "JSON::PP::Boolean" };
1406$JSON::PP::false = do { bless \(my $dummy = 0), "JSON::PP::Boolean" };
1407
9c399ae2 1408sub is_bool { blessed $_[0] and $_[0]->isa("JSON::PP::Boolean"); }
d5424315
DG
1409
1410sub true { $JSON::PP::true }
1411sub false { $JSON::PP::false }
1412sub null { undef; }
1413
1414###############################
1415
d5424315
DG
1416package JSON::PP::IncrParser;
1417
1418use strict;
1419
1420use constant INCR_M_WS => 0; # initial whitespace skipping
1421use constant INCR_M_STR => 1; # inside string
1422use constant INCR_M_BS => 2; # inside backslash
1423use constant INCR_M_JSON => 3; # outside anything, count nesting
1424use constant INCR_M_C0 => 4;
1425use constant INCR_M_C1 => 5;
1426
1427$JSON::PP::IncrParser::VERSION = '1.01';
1428
d5424315
DG
1429sub new {
1430 my ( $class ) = @_;
1431
1432 bless {
1433 incr_nest => 0,
1434 incr_text => undef,
dec273dc
CBW
1435 incr_pos => 0,
1436 incr_mode => 0,
d5424315
DG
1437 }, $class;
1438}
1439
1440
1441sub incr_parse {
1442 my ( $self, $coder, $text ) = @_;
1443
1444 $self->{incr_text} = '' unless ( defined $self->{incr_text} );
1445
1446 if ( defined $text ) {
1447 if ( utf8::is_utf8( $text ) and !utf8::is_utf8( $self->{incr_text} ) ) {
1448 utf8::upgrade( $self->{incr_text} ) ;
1449 utf8::decode( $self->{incr_text} ) ;
1450 }
1451 $self->{incr_text} .= $text;
1452 }
1453
d5424315 1454 if ( defined wantarray ) {
dec273dc
CBW
1455 my $max_size = $coder->get_max_size;
1456 my $p = $self->{incr_pos};
1457 my @ret;
1458 {
d5424315 1459 do {
dec273dc
CBW
1460 unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) {
1461 $self->_incr_parse( $coder );
d5424315 1462
dec273dc
CBW
1463 if ( $max_size and $self->{incr_pos} > $max_size ) {
1464 Carp::croak("attempted decode of JSON text of $self->{incr_pos} bytes size, but max_size is set to $max_size");
1465 }
1466 unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) {
1467 # as an optimisation, do not accumulate white space in the incr buffer
1468 if ( $self->{incr_mode} == INCR_M_WS and $self->{incr_pos} ) {
1469 $self->{incr_pos} = 0;
1470 $self->{incr_text} = '';
1471 }
1472 last;
1473 }
d5424315
DG
1474 }
1475
dec273dc
CBW
1476 my ($obj, $offset) = $coder->PP_decode_json( $self->{incr_text}, 0x00000001 );
1477 push @ret, $obj;
1478 use bytes;
1479 $self->{incr_text} = substr( $self->{incr_text}, $offset || 0 );
1480 $self->{incr_pos} = 0;
1481 $self->{incr_nest} = 0;
1482 $self->{incr_mode} = 0;
1483 last unless wantarray;
1484 } while ( wantarray );
1485 }
d5424315 1486
dec273dc 1487 if ( wantarray ) {
d5424315
DG
1488 return @ret;
1489 }
1490 else { # in scalar context
dec273dc 1491 return $ret[0] ? $ret[0] : undef;
d5424315 1492 }
d5424315 1493 }
d5424315
DG
1494}
1495
1496
1497sub _incr_parse {
dec273dc
CBW
1498 my ($self, $coder) = @_;
1499 my $text = $self->{incr_text};
d5424315 1500 my $len = length $text;
dec273dc 1501 my $p = $self->{incr_pos};
d5424315 1502
dec273dc 1503INCR_PARSE:
d5424315 1504 while ( $len > $p ) {
dec273dc
CBW
1505 my $s = substr( $text, $p, 1 );
1506 last INCR_PARSE unless defined $s;
1507 my $mode = $self->{incr_mode};
1508
1509 if ( $mode == INCR_M_WS ) {
1510 while ( $len > $p ) {
1511 $s = substr( $text, $p, 1 );
1512 last INCR_PARSE unless defined $s;
1513 if ( ord($s) > 0x20 ) {
1514 if ( $s eq '#' ) {
1515 $self->{incr_mode} = INCR_M_C0;
1516 redo INCR_PARSE;
1517 } else {
1518 $self->{incr_mode} = INCR_M_JSON;
1519 redo INCR_PARSE;
1520 }
1521 }
1522 $p++;
d5424315 1523 }
dec273dc
CBW
1524 } elsif ( $mode == INCR_M_BS ) {
1525 $p++;
1526 $self->{incr_mode} = INCR_M_STR;
1527 redo INCR_PARSE;
1528 } elsif ( $mode == INCR_M_C0 or $mode == INCR_M_C1 ) {
1529 while ( $len > $p ) {
1530 $s = substr( $text, $p, 1 );
1531 last INCR_PARSE unless defined $s;
1532 if ( $s eq "\n" ) {
1533 $self->{incr_mode} = $self->{incr_mode} == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON;
d5424315
DG
1534 last;
1535 }
dec273dc 1536 $p++;
d5424315 1537 }
dec273dc
CBW
1538 next;
1539 } elsif ( $mode == INCR_M_STR ) {
1540 while ( $len > $p ) {
1541 $s = substr( $text, $p, 1 );
1542 last INCR_PARSE unless defined $s;
1543 if ( $s eq '"' ) {
1544 $p++;
1545 $self->{incr_mode} = INCR_M_JSON;
1546
1547 last INCR_PARSE unless $self->{incr_nest};
1548 redo INCR_PARSE;
d5424315 1549 }
dec273dc
CBW
1550 elsif ( $s eq '\\' ) {
1551 $p++;
1552 if ( !defined substr($text, $p, 1) ) {
1553 $self->{incr_mode} = INCR_M_BS;
1554 last INCR_PARSE;
1555 }
1556 }
1557 $p++;
d5424315 1558 }
dec273dc
CBW
1559 } elsif ( $mode == INCR_M_JSON ) {
1560 while ( $len > $p ) {
1561 $s = substr( $text, $p++, 1 );
1562 if ( $s eq "\x00" ) {
1563 $p--;
1564 last INCR_PARSE;
1565 } elsif ( $s eq "\x09" or $s eq "\x0a" or $s eq "\x0d" or $s eq "\x20" ) {
1566 if ( !$self->{incr_nest} ) {
1567 $p--; # do not eat the whitespace, let the next round do it
1568 last INCR_PARSE;
1569 }
1570 next;
1571 } elsif ( $s eq '"' ) {
1572 $self->{incr_mode} = INCR_M_STR;
1573 redo INCR_PARSE;
1574 } elsif ( $s eq '[' or $s eq '{' ) {
1575 if ( ++$self->{incr_nest} > $coder->get_max_depth ) {
1576 Carp::croak('json text or perl structure exceeds maximum nesting level (max_depth set too low?)');
1577 }
1578 next;
1579 } elsif ( $s eq ']' or $s eq '}' ) {
1580 if ( --$self->{incr_nest} <= 0 ) {
1581 last INCR_PARSE;
1582 }
1583 } elsif ( $s eq '#' ) {
1584 $self->{incr_mode} = INCR_M_C1;
1585 redo INCR_PARSE;
d5424315
DG
1586 }
1587 }
d5424315 1588 }
d5424315
DG
1589 }
1590
dec273dc
CBW
1591 $self->{incr_pos} = $p;
1592 $self->{incr_parsing} = $p ? 1 : 0; # for backward compatibility
d5424315
DG
1593}
1594
1595
1596sub incr_text {
dec273dc 1597 if ( $_[0]->{incr_pos} ) {
a1e5c561 1598 Carp::croak("incr_text cannot be called when the incremental parser already started parsing");
d5424315
DG
1599 }
1600 $_[0]->{incr_text};
1601}
1602
1603
1604sub incr_skip {
1605 my $self = shift;
dec273dc
CBW
1606 $self->{incr_text} = substr( $self->{incr_text}, $self->{incr_pos} );
1607 $self->{incr_pos} = 0;
1608 $self->{incr_mode} = 0;
1609 $self->{incr_nest} = 0;
d5424315
DG
1610}
1611
1612
1613sub incr_reset {
1614 my $self = shift;
1615 $self->{incr_text} = undef;
dec273dc 1616 $self->{incr_pos} = 0;
d5424315
DG
1617 $self->{incr_mode} = 0;
1618 $self->{incr_nest} = 0;
d5424315
DG
1619}
1620
1621###############################
1622
1623
16241;
1625__END__
1626=pod
1627
1628=head1 NAME
1629
1630JSON::PP - JSON::XS compatible pure-Perl module.
1631
1632=head1 SYNOPSIS
1633
1634 use JSON::PP;
1635
1636 # exported functions, they croak on error
1637 # and expect/generate UTF-8
1638
1639 $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref;
1640 $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text;
1641
1642 # OO-interface
1643
dec273dc 1644 $json = JSON::PP->new->ascii->pretty->allow_nonref;
d5424315 1645
dec273dc 1646 $pretty_printed_json_text = $json->encode( $perl_scalar );
d5424315
DG
1647 $perl_scalar = $json->decode( $json_text );
1648
d5424315
DG
1649 # Note that JSON version 2.0 and above will automatically use
1650 # JSON::XS or JSON::PP, so you should be able to just:
1651
1652 use JSON;
1653
1654
1655=head1 VERSION
1656
9c399ae2 1657 2.97000
d5424315
DG
1658
1659=head1 DESCRIPTION
1660
dec273dc
CBW
1661JSON::PP is a pure perl JSON decoder/encoder (as of RFC4627, which
1662we know is obsolete but we still stick to; see below for an option
1663to support part of RFC7159), and (almost) compatible to much
1664faster L<JSON::XS> written by Marc Lehmann in C. JSON::PP works as
1665a fallback module when you use L<JSON> module without having
1666installed JSON::XS.
1667
1668Because of this fallback feature of JSON.pm, JSON::PP tries not to
1669be more JavaScript-friendly than JSON::XS (i.e. not to escape extra
1670characters such as U+2028 and U+2029 nor support RFC7159/ECMA-404),
1671in order for you not to lose such JavaScript-friendliness silently
1672when you use JSON.pm and install JSON::XS for speed or by accident.
1673If you need JavaScript-friendly RFC7159-compliant pure perl module,
1674try L<JSON::Tiny>, which is derived from L<Mojolicious> web
1675framework and is also smaller and faster than JSON::PP.
1676
1677JSON::PP has been in the Perl core since Perl 5.14, mainly for
1678CPAN toolchain modules to parse META.json.
d5424315
DG
1679
1680=head1 FUNCTIONAL INTERFACE
1681
dec273dc
CBW
1682This section is taken from JSON::XS almost verbatim. C<encode_json>
1683and C<decode_json> are exported by default.
d5424315
DG
1684
1685=head2 encode_json
1686
1687 $json_text = encode_json $perl_scalar
1688
dec273dc
CBW
1689Converts the given Perl data structure to a UTF-8 encoded, binary string
1690(that is, the string contains octets only). Croaks on error.
d5424315
DG
1691
1692This function call is functionally identical to:
1693
1694 $json_text = JSON::PP->new->utf8->encode($perl_scalar)
1695
dec273dc
CBW
1696Except being faster.
1697
d5424315
DG
1698=head2 decode_json
1699
1700 $perl_scalar = decode_json $json_text
1701
1702The opposite of C<encode_json>: expects an UTF-8 (binary) string and tries
1703to parse that as an UTF-8 encoded JSON text, returning the resulting
dec273dc 1704reference. Croaks on error.
d5424315
DG
1705
1706This function call is functionally identical to:
1707
1708 $perl_scalar = JSON::PP->new->utf8->decode($json_text)
1709
dec273dc
CBW
1710Except being faster.
1711
d5424315
DG
1712=head2 JSON::PP::is_bool
1713
1714 $is_boolean = JSON::PP::is_bool($scalar)
1715
1716Returns true if the passed scalar represents either JSON::PP::true or
1717JSON::PP::false, two constants that act like C<1> and C<0> respectively
1718and are also used to represent JSON C<true> and C<false> in Perl strings.
1719
d5424315
DG
1720See L<MAPPING>, below, for more information on how JSON values are mapped to
1721Perl.
1722
dec273dc 1723=head1 OBJECT-ORIENTED INTERFACE
d5424315 1724
dec273dc 1725This section is also taken from JSON::XS.
d5424315 1726
dec273dc
CBW
1727The object oriented interface lets you configure your own encoding or
1728decoding style, within the limits of supported formats.
d5424315
DG
1729
1730=head2 new
1731
7942a65c 1732 $json = JSON::PP->new
d5424315 1733
dec273dc
CBW
1734Creates a new JSON::PP object that can be used to de/encode JSON
1735strings. All boolean flags described below are by default I<disabled>.
d5424315 1736
dec273dc 1737The mutators for flags all return the JSON::PP object again and thus calls can
d5424315
DG
1738be chained:
1739
1740 my $json = JSON::PP->new->utf8->space_after->encode({a => [1,2]})
1741 => {"a": [1, 2]}
1742
1743=head2 ascii
1744
1745 $json = $json->ascii([$enable])
1746
1747 $enabled = $json->get_ascii
1748
dec273dc
CBW
1749If C<$enable> is true (or missing), then the C<encode> method will not
1750generate characters outside the code range C<0..127> (which is ASCII). Any
1751Unicode characters outside that range will be escaped using either a
1752single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL escape sequence,
1753as per RFC4627. The resulting encoded JSON text can be treated as a native
1754Unicode string, an ascii-encoded, latin1-encoded or UTF-8 encoded string,
1755or any other superset of ASCII.
1756
1757If C<$enable> is false, then the C<encode> method will not escape Unicode
1758characters unless required by the JSON syntax or other flags. This results
1759in a faster and more compact format.
d5424315 1760
dec273dc 1761See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
d5424315 1762
dec273dc
CBW
1763The main use for this flag is to produce JSON texts that can be
1764transmitted over a 7-bit channel, as the encoded JSON texts will not
1765contain any 8 bit characters.
d5424315
DG
1766
1767 JSON::PP->new->ascii(1)->encode([chr 0x10401])
1768 => ["\ud801\udc01"]
1769
1770=head2 latin1
1771
1772 $json = $json->latin1([$enable])
1773
1774 $enabled = $json->get_latin1
1775
dec273dc
CBW
1776If C<$enable> is true (or missing), then the C<encode> method will encode
1777the resulting JSON text as latin1 (or iso-8859-1), escaping any characters
1778outside the code range C<0..255>. The resulting string can be treated as a
1779latin1-encoded JSON text or a native Unicode string. The C<decode> method
1780will not be affected in any way by this flag, as C<decode> by default
1781expects Unicode, which is a strict superset of latin1.
d5424315 1782
dec273dc
CBW
1783If C<$enable> is false, then the C<encode> method will not escape Unicode
1784characters unless required by the JSON syntax or other flags.
d5424315 1785
dec273dc
CBW
1786See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
1787
1788The main use for this flag is efficiently encoding binary data as JSON
1789text, as most octets will not be escaped, resulting in a smaller encoded
1790size. The disadvantage is that the resulting JSON text is encoded
1791in latin1 (and must correctly be treated as such when storing and
1792transferring), a rare encoding for JSON. It is therefore most useful when
1793you want to store data structures known to contain binary data efficiently
1794in files or databases, not when talking to other JSON encoders/decoders.
d5424315 1795
dec273dc
CBW
1796 JSON::PP->new->latin1->encode (["\x{89}\x{abc}"]
1797 => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not)
d5424315
DG
1798
1799=head2 utf8
1800
1801 $json = $json->utf8([$enable])
1802
1803 $enabled = $json->get_utf8
1804
dec273dc
CBW
1805If C<$enable> is true (or missing), then the C<encode> method will encode
1806the JSON result into UTF-8, as required by many protocols, while the
1807C<decode> method expects to be handled an UTF-8-encoded string. Please
1808note that UTF-8-encoded strings do not contain any characters outside the
1809range C<0..255>, they are thus useful for bytewise/binary I/O. In future
1810versions, enabling this option might enable autodetection of the UTF-16
1811and UTF-32 encoding families, as described in RFC4627.
d5424315 1812
dec273dc
CBW
1813If C<$enable> is false, then the C<encode> method will return the JSON
1814string as a (non-encoded) Unicode string, while C<decode> expects thus a
1815Unicode string. Any decoding or encoding (e.g. to UTF-8 or UTF-16) needs
1816to be done yourself, e.g. using the Encode module.
d5424315 1817
dec273dc 1818See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
d5424315
DG
1819
1820Example, output UTF-16BE-encoded JSON:
1821
1822 use Encode;
1823 $jsontext = encode "UTF-16BE", JSON::PP->new->encode ($object);
1824
1825Example, decode UTF-32LE-encoded JSON:
1826
1827 use Encode;
1828 $object = JSON::PP->new->decode (decode "UTF-32LE", $jsontext);
1829
d5424315
DG
1830=head2 pretty
1831
1832 $json = $json->pretty([$enable])
1833
1834This enables (or disables) all of the C<indent>, C<space_before> and
dec273dc
CBW
1835C<space_after> (and in the future possibly more) flags in one call to
1836generate the most readable (or most compact) form possible.
d5424315
DG
1837
1838=head2 indent
1839
1840 $json = $json->indent([$enable])
1841
1842 $enabled = $json->get_indent
1843
dec273dc
CBW
1844If C<$enable> is true (or missing), then the C<encode> method will use a multiline
1845format as output, putting every array member or object/hash key-value pair
1846into its own line, indenting them properly.
1847
1848If C<$enable> is false, no newlines or indenting will be produced, and the
1849resulting JSON text is guaranteed not to contain any C<newlines>.
1850
1851This setting has no effect when decoding JSON texts.
1852
d5424315
DG
1853The default indent space length is three.
1854You can use C<indent_length> to change the length.
1855
1856=head2 space_before
1857
1858 $json = $json->space_before([$enable])
1859
1860 $enabled = $json->get_space_before
1861
1862If C<$enable> is true (or missing), then the C<encode> method will add an extra
1863optional space before the C<:> separating keys from values in JSON objects.
1864
1865If C<$enable> is false, then the C<encode> method will not add any extra
1866space at those places.
1867
dec273dc
CBW
1868This setting has no effect when decoding JSON texts. You will also
1869most likely combine this setting with C<space_after>.
d5424315
DG
1870
1871Example, space_before enabled, space_after and indent disabled:
1872
1873 {"key" :"value"}
1874
1875=head2 space_after
1876
1877 $json = $json->space_after([$enable])
1878
1879 $enabled = $json->get_space_after
1880
1881If C<$enable> is true (or missing), then the C<encode> method will add an extra
1882optional space after the C<:> separating keys from values in JSON objects
1883and extra whitespace after the C<,> separating key-value pairs and array
1884members.
1885
1886If C<$enable> is false, then the C<encode> method will not add any extra
1887space at those places.
1888
1889This setting has no effect when decoding JSON texts.
1890
1891Example, space_before and indent disabled, space_after enabled:
1892
1893 {"key": "value"}
1894
1895=head2 relaxed
1896
1897 $json = $json->relaxed([$enable])
1898
1899 $enabled = $json->get_relaxed
1900
1901If C<$enable> is true (or missing), then C<decode> will accept some
1902extensions to normal JSON syntax (see below). C<encode> will not be
1903affected in anyway. I<Be aware that this option makes you accept invalid
1904JSON texts as if they were valid!>. I suggest only to use this option to
1905parse application-specific files written by humans (configuration files,
1906resource files etc.)
1907
1908If C<$enable> is false (the default), then C<decode> will only accept
1909valid JSON texts.
1910
1911Currently accepted extensions are:
1912
1913=over 4
1914
1915=item * list items can have an end-comma
1916
1917JSON I<separates> array elements and key-value pairs with commas. This
1918can be annoying if you write JSON texts manually and want to be able to
1919quickly append elements, so this extension accepts comma at the end of
1920such items not just between them:
1921
1922 [
1923 1,
1924 2, <- this comma not normally allowed
1925 ]
1926 {
1927 "k1": "v1",
1928 "k2": "v2", <- this comma not normally allowed
1929 }
1930
1931=item * shell-style '#'-comments
1932
1933Whenever JSON allows whitespace, shell-style comments are additionally
1934allowed. They are terminated by the first carriage-return or line-feed
1935character, after which more white-space and comments are allowed.
1936
1937 [
1938 1, # this comment not allowed in JSON
1939 # neither this one...
1940 ]
1941
dec273dc
CBW
1942=item * C-style multiple-line '/* */'-comments (JSON::PP only)
1943
1944Whenever JSON allows whitespace, C-style multiple-line comments are additionally
1945allowed. Everything between C</*> and C<*/> is a comment, after which
1946more white-space and comments are allowed.
1947
1948 [
1949 1, /* this comment not allowed in JSON */
1950 /* neither this one... */
1951 ]
1952
1953=item * C++-style one-line '//'-comments (JSON::PP only)
1954
1955Whenever JSON allows whitespace, C++-style one-line comments are additionally
1956allowed. They are terminated by the first carriage-return or line-feed
1957character, after which more white-space and comments are allowed.
1958
1959 [
1960 1, // this comment not allowed in JSON
1961 // neither this one...
1962 ]
1963
d5424315
DG
1964=back
1965
1966=head2 canonical
1967
1968 $json = $json->canonical([$enable])
1969
1970 $enabled = $json->get_canonical
1971
1972If C<$enable> is true (or missing), then the C<encode> method will output JSON objects
1973by sorting their keys. This is adding a comparatively high overhead.
1974
1975If C<$enable> is false, then the C<encode> method will output key-value
1976pairs in the order Perl stores them (which will likely change between runs
dec273dc
CBW
1977of the same script, and can change even within the same run from 5.18
1978onwards).
d5424315
DG
1979
1980This option is useful if you want the same data structure to be encoded as
1981the same JSON text (given the same overall settings). If it is disabled,
1982the same hash might be encoded differently even if contains the same data,
1983as key-value pairs have no inherent ordering in Perl.
1984
1985This setting has no effect when decoding JSON texts.
1986
dec273dc 1987This setting has currently no effect on tied hashes.
d5424315
DG
1988
1989=head2 allow_nonref
1990
1991 $json = $json->allow_nonref([$enable])
1992
1993 $enabled = $json->get_allow_nonref
1994
1995If C<$enable> is true (or missing), then the C<encode> method can convert a
1996non-reference into its corresponding string, number or null JSON value,
1997which is an extension to RFC4627. Likewise, C<decode> will accept those JSON
1998values instead of croaking.
1999
2000If C<$enable> is false, then the C<encode> method will croak if it isn't
2001passed an arrayref or hashref, as JSON texts must either be an object
2002or array. Likewise, C<decode> will croak if given something that is not a
2003JSON object or array.
2004
dec273dc
CBW
2005Example, encode a Perl scalar as JSON value with enabled C<allow_nonref>,
2006resulting in an invalid JSON text:
2007
d5424315
DG
2008 JSON::PP->new->allow_nonref->encode ("Hello, World!")
2009 => "Hello, World!"
2010
2011=head2 allow_unknown
2012
2013 $json = $json->allow_unknown ([$enable])
2014
2015 $enabled = $json->get_allow_unknown
2016
dec273dc 2017If C<$enable> is true (or missing), then C<encode> will I<not> throw an
d5424315 2018exception when it encounters values it cannot represent in JSON (for
dec273dc
CBW
2019example, filehandles) but instead will encode a JSON C<null> value. Note
2020that blessed objects are not included here and are handled separately by
2021c<allow_blessed>.
d5424315 2022
dec273dc 2023If C<$enable> is false (the default), then C<encode> will throw an
d5424315
DG
2024exception when it encounters anything it cannot encode as JSON.
2025
dec273dc
CBW
2026This option does not affect C<decode> in any way, and it is recommended to
2027leave it off unless you know your communications partner.
d5424315
DG
2028
2029=head2 allow_blessed
2030
2031 $json = $json->allow_blessed([$enable])
2032
2033 $enabled = $json->get_allow_blessed
2034
dec273dc
CBW
2035See L<OBJECT SERIALISATION> for details.
2036
d5424315 2037If C<$enable> is true (or missing), then the C<encode> method will not
dec273dc
CBW
2038barf when it encounters a blessed reference that it cannot convert
2039otherwise. Instead, a JSON C<null> value is encoded instead of the object.
d5424315
DG
2040
2041If C<$enable> is false (the default), then C<encode> will throw an
dec273dc
CBW
2042exception when it encounters a blessed object that it cannot convert
2043otherwise.
2044
2045This setting has no effect on C<decode>.
d5424315
DG
2046
2047=head2 convert_blessed
2048
2049 $json = $json->convert_blessed([$enable])
2050
2051 $enabled = $json->get_convert_blessed
2052
dec273dc
CBW
2053See L<OBJECT SERIALISATION> for details.
2054
d5424315
DG
2055If C<$enable> is true (or missing), then C<encode>, upon encountering a
2056blessed object, will check for the availability of the C<TO_JSON> method
dec273dc
CBW
2057on the object's class. If found, it will be called in scalar context and
2058the resulting scalar will be encoded instead of the object.
d5424315
DG
2059
2060The C<TO_JSON> method may safely call die if it wants. If C<TO_JSON>
2061returns other blessed objects, those will be handled in the same
2062way. C<TO_JSON> must take care of not causing an endless recursion cycle
2063(== crash) in this case. The name of C<TO_JSON> was chosen because other
2064methods called by the Perl core (== not by the user of the object) are
dec273dc 2065usually in upper case letters and to avoid collisions with any C<to_json>
d5424315
DG
2066function or method.
2067
dec273dc
CBW
2068If C<$enable> is false (the default), then C<encode> will not consider
2069this type of conversion.
d5424315 2070
dec273dc 2071This setting has no effect on C<decode>.
d5424315
DG
2072
2073=head2 filter_json_object
2074
2075 $json = $json->filter_json_object([$coderef])
2076
2077When C<$coderef> is specified, it will be called from C<decode> each
dec273dc
CBW
2078time it decodes a JSON object. The only argument is a reference to the
2079newly-created hash. If the code references returns a single scalar (which
2080need not be a reference), this value (i.e. a copy of that scalar to avoid
2081aliasing) is inserted into the deserialised data structure. If it returns
2082an empty list (NOTE: I<not> C<undef>, which is a valid scalar), the
2083original deserialised hash will be inserted. This setting can slow down
2084decoding considerably.
d5424315
DG
2085
2086When C<$coderef> is omitted or undefined, any existing callback will
2087be removed and C<decode> will not change the deserialised hash in any
2088way.
2089
2090Example, convert all JSON objects into the integer 5:
2091
2092 my $js = JSON::PP->new->filter_json_object (sub { 5 });
2093 # returns [5]
2094 $js->decode ('[{}]'); # the given subroutine takes a hash reference.
2095 # throw an exception because allow_nonref is not enabled
2096 # so a lone 5 is not allowed.
2097 $js->decode ('{"a":1, "b":2}');
2098
2099=head2 filter_json_single_key_object
2100
2101 $json = $json->filter_json_single_key_object($key [=> $coderef])
2102
2103Works remotely similar to C<filter_json_object>, but is only called for
2104JSON objects having a single key named C<$key>.
2105
2106This C<$coderef> is called before the one specified via
2107C<filter_json_object>, if any. It gets passed the single value in the JSON
2108object. If it returns a single value, it will be inserted into the data
2109structure. If it returns nothing (not even C<undef> but the empty list),
2110the callback from C<filter_json_object> will be called next, as if no
2111single-key callback were specified.
2112
2113If C<$coderef> is omitted or undefined, the corresponding callback will be
2114disabled. There can only ever be one callback for a given key.
2115
2116As this callback gets called less often then the C<filter_json_object>
2117one, decoding speed will not usually suffer as much. Therefore, single-key
2118objects make excellent targets to serialise Perl objects into, especially
2119as single-key JSON objects are as close to the type-tagged value concept
2120as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not
2121support this in any way, so you need to make sure your data never looks
2122like a serialised Perl hash.
2123
2124Typical names for the single object key are C<__class_whatever__>, or
2125C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even
2126things like C<__class_md5sum(classname)__>, to reduce the risk of clashing
2127with real hashes.
2128
2129Example, decode JSON objects of the form C<< { "__widget__" => <id> } >>
2130into the corresponding C<< $WIDGET{<id>} >> object:
2131
2132 # return whatever is in $WIDGET{5}:
2133 JSON::PP
2134 ->new
2135 ->filter_json_single_key_object (__widget__ => sub {
2136 $WIDGET{ $_[0] }
2137 })
2138 ->decode ('{"__widget__": 5')
2139
2140 # this can be used with a TO_JSON method in some "widget" class
2141 # for serialisation to json:
2142 sub WidgetBase::TO_JSON {
2143 my ($self) = @_;
2144
2145 unless ($self->{id}) {
2146 $self->{id} = ..get..some..id..;
2147 $WIDGET{$self->{id}} = $self;
2148 }
2149
2150 { __widget__ => $self->{id} }
2151 }
2152
2153=head2 shrink
2154
2155 $json = $json->shrink([$enable])
2156
2157 $enabled = $json->get_shrink
2158
dec273dc
CBW
2159If C<$enable> is true (or missing), the string returned by C<encode> will
2160be shrunk (i.e. downgraded if possible).
d5424315 2161
dec273dc
CBW
2162The actual definition of what shrink does might change in future versions,
2163but it will always try to save space at the expense of time.
d5424315 2164
dec273dc 2165If C<$enable> is false, then JSON::PP does nothing.
d5424315
DG
2166
2167=head2 max_depth
2168
2169 $json = $json->max_depth([$maximum_nesting_depth])
2170
2171 $max_depth = $json->get_max_depth
2172
2173Sets the maximum nesting level (default C<512>) accepted while encoding
2174or decoding. If a higher nesting level is detected in JSON text or a Perl
2175data structure, then the encoder and decoder will stop and croak at that
2176point.
2177
2178Nesting level is defined by number of hash- or arrayrefs that the encoder
2179needs to traverse to reach a given point or the number of C<{> or C<[>
2180characters without their matching closing parenthesis crossed to reach a
2181given character in a string.
2182
dec273dc
CBW
2183Setting the maximum depth to one disallows any nesting, so that ensures
2184that the object is only a single hash/object or array.
2185
d5424315
DG
2186If no argument is given, the highest possible setting will be used, which
2187is rarely useful.
2188
a1e5c561 2189See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful.
d5424315 2190
d5424315
DG
2191=head2 max_size
2192
2193 $json = $json->max_size([$maximum_string_size])
2194
2195 $max_size = $json->get_max_size
2196
2197Set the maximum length a JSON text may have (in bytes) where decoding is
2198being attempted. The default is C<0>, meaning no limit. When C<decode>
2199is called on a string that is longer then this many bytes, it will not
2200attempt to decode the string but throw an exception. This setting has no
2201effect on C<encode> (yet).
2202
2203If no argument is given, the limit check will be deactivated (same as when
2204C<0> is specified).
2205
a1e5c561 2206See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful.
d5424315
DG
2207
2208=head2 encode
2209
2210 $json_text = $json->encode($perl_scalar)
2211
dec273dc
CBW
2212Converts the given Perl value or data structure to its JSON
2213representation. Croaks on error.
d5424315
DG
2214
2215=head2 decode
2216
2217 $perl_scalar = $json->decode($json_text)
2218
2219The opposite of C<encode>: expects a JSON text and tries to parse it,
2220returning the resulting simple scalar or reference. Croaks on error.
2221
d5424315
DG
2222=head2 decode_prefix
2223
2224 ($perl_scalar, $characters) = $json->decode_prefix($json_text)
2225
2226This works like the C<decode> method, but instead of raising an exception
2227when there is trailing garbage after the first JSON object, it will
2228silently stop parsing there and return the number of characters consumed
2229so far.
2230
dec273dc
CBW
2231This is useful if your JSON texts are not delimited by an outer protocol
2232and you need to know where the JSON text ends.
2233
2234 JSON::PP->new->decode_prefix ("[1] the tail")
2235 => ([1], 3)
2236
2237=head1 FLAGS FOR JSON::PP ONLY
2238
2239The following flags and properties are for JSON::PP only. If you use
2240any of these, you can't make your application run faster by replacing
2241JSON::PP with JSON::XS. If you need these and also speed boost,
2242try L<Cpanel::JSON::XS>, a fork of JSON::XS by Reini Urban, which
2243supports some of these.
2244
2245=head2 allow_singlequote
2246
2247 $json = $json->allow_singlequote([$enable])
2248 $enabled = $json->get_allow_singlequote
2249
2250If C<$enable> is true (or missing), then C<decode> will accept
2251invalid JSON texts that contain strings that begin and end with
2252single quotation marks. C<encode> will not be affected in anyway.
2253I<Be aware that this option makes you accept invalid JSON texts
2254as if they were valid!>. I suggest only to use this option to
2255parse application-specific files written by humans (configuration
2256files, resource files etc.)
2257
2258If C<$enable> is false (the default), then C<decode> will only accept
2259valid JSON texts.
2260
2261 $json->allow_singlequote->decode(qq|{"foo":'bar'}|);
2262 $json->allow_singlequote->decode(qq|{'foo':"bar"}|);
2263 $json->allow_singlequote->decode(qq|{'foo':'bar'}|);
2264
2265=head2 allow_barekey
2266
2267 $json = $json->allow_barekey([$enable])
2268 $enabled = $json->get_allow_barekey
2269
2270If C<$enable> is true (or missing), then C<decode> will accept
2271invalid JSON texts that contain JSON objects whose names don't
2272begin and end with quotation marks. C<encode> will not be affected
2273in anyway. I<Be aware that this option makes you accept invalid JSON
2274texts as if they were valid!>. I suggest only to use this option to
2275parse application-specific files written by humans (configuration
2276files, resource files etc.)
2277
2278If C<$enable> is false (the default), then C<decode> will only accept
2279valid JSON texts.
2280
2281 $json->allow_barekey->decode(qq|{foo:"bar"}|);
2282
2283=head2 allow_bignum
2284
2285 $json = $json->allow_bignum([$enable])
2286 $enabled = $json->get_allow_bignum
2287
2288If C<$enable> is true (or missing), then C<decode> will convert
2289big integers Perl cannot handle as integer into L<Math::BigInt>
2290objects and convert floating numbers into L<Math::BigFloat>
2291objects. C<encode> will convert C<Math::BigInt> and C<Math::BigFloat>
2292objects into JSON numbers.
2293
2294 $json->allow_nonref->allow_bignum;
2295 $bigfloat = $json->decode('2.000000000000000000000000001');
2296 print $json->encode($bigfloat);
2297 # => 2.000000000000000000000000001
2298
2299See also L<MAPPING>.
2300
2301=head2 loose
2302
2303 $json = $json->loose([$enable])
2304 $enabled = $json->get_loose
2305
2306If C<$enable> is true (or missing), then C<decode> will accept
2307invalid JSON texts that contain unescaped [\x00-\x1f\x22\x5c]
2308characters. C<encode> will not be affected in anyway.
2309I<Be aware that this option makes you accept invalid JSON texts
2310as if they were valid!>. I suggest only to use this option to
2311parse application-specific files written by humans (configuration
2312files, resource files etc.)
2313
2314If C<$enable> is false (the default), then C<decode> will only accept
2315valid JSON texts.
2316
2317 $json->loose->decode(qq|["abc
2318 def"]|);
2319
2320=head2 escape_slash
2321
2322 $json = $json->escape_slash([$enable])
2323 $enabled = $json->get_escape_slash
2324
2325If C<$enable> is true (or missing), then C<encode> will explicitly
2326escape I<slash> (solidus; C<U+002F>) characters to reduce the risk of
2327XSS (cross site scripting) that may be caused by C<< </script> >>
2328in a JSON text, with the cost of bloating the size of JSON texts.
2329
2330This option may be useful when you embed JSON in HTML, but embedding
2331arbitrary JSON in HTML (by some HTML template toolkit or by string
2332interpolation) is risky in general. You must escape necessary
2333characters in correct order, depending on the context.
2334
2335C<decode> will not be affected in anyway.
2336
2337=head2 indent_length
2338
2339 $json = $json->indent_length($number_of_spaces)
2340 $length = $json->get_indent_length
2341
2342This option is only useful when you also enable C<indent> or C<pretty>.
2343
2344JSON::XS indents with three spaces when you C<encode> (if requested
2345by C<indent> or C<pretty>), and the number cannot be changed.
2346JSON::PP allows you to change/get the number of indent spaces with these
2347mutator/accessor. The default number of spaces is three (the same as
2348JSON::XS), and the acceptable range is from C<0> (no indentation;
2349it'd be better to disable indentation by C<indent(0)>) to C<15>.
2350
2351=head2 sort_by
2352
2353 $json = $json->sort_by($code_ref)
2354 $json = $json->sort_by($subroutine_name)
2355
2356If you just want to sort keys (names) in JSON objects when you
2357C<encode>, enable C<canonical> option (see above) that allows you to
2358sort object keys alphabetically.
2359
2360If you do need to sort non-alphabetically for whatever reasons,
2361you can give a code reference (or a subroutine name) to C<sort_by>,
2362then the argument will be passed to Perl's C<sort> built-in function.
2363
2364As the sorting is done in the JSON::PP scope, you usually need to
2365prepend C<JSON::PP::> to the subroutine name, and the special variables
2366C<$a> and C<$b> used in the subrontine used by C<sort> function.
2367
2368Example:
2369
2370 my %ORDER = (id => 1, class => 2, name => 3);
2371 $json->sort_by(sub {
2372 ($ORDER{$JSON::PP::a} // 999) <=> ($ORDER{$JSON::PP::b} // 999)
2373 or $JSON::PP::a cmp $JSON::PP::b
2374 });
2375 print $json->encode([
2376 {name => 'CPAN', id => 1, href => 'http://cpan.org'}
2377 ]);
2378 # [{"id":1,"name":"CPAN","href":"http://cpan.org"}]
2379
2380Note that C<sort_by> affects all the plain hashes in the data structure.
2381If you need finer control, C<tie> necessary hashes with a module that
2382implements ordered hash (such as L<Hash::Ordered> and L<Tie::IxHash>).
2383C<canonical> and C<sort_by> don't affect the key order in C<tie>d
2384hashes.
2385
2386 use Hash::Ordered;
2387 tie my %hash, 'Hash::Ordered',
2388 (name => 'CPAN', id => 1, href => 'http://cpan.org');
2389 print $json->encode([\%hash]);
2390 # [{"name":"CPAN","id":1,"href":"http://cpan.org"}] # order is kept
d5424315
DG
2391
2392=head1 INCREMENTAL PARSING
2393
dec273dc 2394This section is also taken from JSON::XS.
d5424315 2395
dec273dc
CBW
2396In some cases, there is the need for incremental parsing of JSON
2397texts. While this module always has to keep both JSON text and resulting
2398Perl data structure in memory at one time, it does allow you to parse a
2399JSON stream incrementally. It does so by accumulating text until it has
2400a full JSON object, which it then can decode. This process is similar to
2401using C<decode_prefix> to see if a full JSON object is available, but
2402is much more efficient (and can be implemented with a minimum of method
2403calls).
d5424315 2404
dec273dc 2405JSON::PP will only attempt to parse the JSON text once it is sure it
d5424315
DG
2406has enough text to get a decisive result, using a very simple but
2407truly incremental parser. This means that it sometimes won't stop as
dec273dc
CBW
2408early as the full parser, for example, it doesn't detect mismatched
2409parentheses. The only thing it guarantees is that it starts decoding as
d5424315
DG
2410soon as a syntactically valid JSON text has been seen. This means you need
2411to set resource limits (e.g. C<max_size>) to ensure the parser will stop
2412parsing in the presence if syntax errors.
2413
2414The following methods implement this incremental parser.
2415
2416=head2 incr_parse
2417
2418 $json->incr_parse( [$string] ) # void context
2419
2420 $obj_or_undef = $json->incr_parse( [$string] ) # scalar context
2421
2422 @obj_or_empty = $json->incr_parse( [$string] ) # list context
2423
2424This is the central parsing function. It can both append new text and
2425extract objects from the stream accumulated so far (both of these
2426functions are optional).
2427
2428If C<$string> is given, then this string is appended to the already
2429existing JSON fragment stored in the C<$json> object.
2430
2431After that, if the function is called in void context, it will simply
2432return without doing anything further. This can be used to add more text
2433in as many chunks as you want.
2434
2435If the method is called in scalar context, then it will try to extract
2436exactly I<one> JSON object. If that is successful, it will return this
2437object, otherwise it will return C<undef>. If there is a parse error,
2438this method will croak just as C<decode> would do (one can then use
a1e5c561 2439C<incr_skip> to skip the erroneous part). This is the most common way of
d5424315
DG
2440using the method.
2441
2442And finally, in list context, it will try to extract as many objects
2443from the stream as it can find and return them, or the empty list
dec273dc
CBW
2444otherwise. For this to work, there must be no separators (other than
2445whitespace) between the JSON objects or arrays, instead they must be
2446concatenated back-to-back. If an error occurs, an exception will be
2447raised as in the scalar context case. Note that in this case, any
2448previously-parsed JSON texts will be lost.
d5424315 2449
dec273dc
CBW
2450Example: Parse some JSON arrays/objects in a given string and return
2451them.
d5424315 2452
dec273dc 2453 my @objs = JSON::PP->new->incr_parse ("[5][7][1,2]");
d5424315
DG
2454
2455=head2 incr_text
2456
2457 $lvalue_string = $json->incr_text
2458
2459This method returns the currently stored JSON fragment as an lvalue, that
2460is, you can manipulate it. This I<only> works when a preceding call to
2461C<incr_parse> in I<scalar context> successfully returned an object. Under
2462all other circumstances you must not call this function (I mean it.
2463although in simple tests it might actually work, it I<will> fail under
2464real world conditions). As a special exception, you can also call this
2465method before having parsed anything.
2466
dec273dc
CBW
2467That means you can only use this function to look at or manipulate text
2468before or after complete JSON objects, not while the parser is in the
2469middle of parsing a JSON object.
2470
d5424315
DG
2471This function is useful in two cases: a) finding the trailing text after a
2472JSON object or b) parsing multiple JSON objects separated by non-JSON text
2473(such as commas).
2474
d5424315
DG
2475=head2 incr_skip
2476
2477 $json->incr_skip
2478
dec273dc
CBW
2479This will reset the state of the incremental parser and will remove
2480the parsed text from the input buffer so far. This is useful after
2481C<incr_parse> died, in which case the input buffer and incremental parser
2482state is left unchanged, to skip the text parsed so far and to reset the
2483parse state.
2484
2485The difference to C<incr_reset> is that only text until the parse error
2486occurred is removed.
d5424315
DG
2487
2488=head2 incr_reset
2489
2490 $json->incr_reset
2491
2492This completely resets the incremental parser, that is, after this call,
2493it will be as if the parser had never parsed anything.
2494
a1e5c561 2495This is useful if you want to repeatedly parse JSON objects and want to
d5424315
DG
2496ignore any trailing data, which means you have to reset the parser after
2497each successful decode.
2498
d5424315
DG
2499=head1 MAPPING
2500
dec273dc
CBW
2501Most of this section is also taken from JSON::XS.
2502
2503This section describes how JSON::PP maps Perl values to JSON values and
2504vice versa. These mappings are designed to "do the right thing" in most
2505circumstances automatically, preserving round-tripping characteristics
2506(what you put in comes out as something equivalent).
d5424315 2507
dec273dc
CBW
2508For the more enlightened: note that in the following descriptions,
2509lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
2510refers to the abstract Perl language itself.
d5424315
DG
2511
2512=head2 JSON -> PERL
2513
2514=over 4
2515
2516=item object
2517
2518A JSON object becomes a reference to a hash in Perl. No ordering of object
dec273dc 2519keys is preserved (JSON does not preserve object key ordering itself).
d5424315
DG
2520
2521=item array
2522
2523A JSON array becomes a reference to an array in Perl.
2524
2525=item string
2526
2527A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON
2528are represented by the same codepoints in the Perl string, so no manual
2529decoding is necessary.
2530
2531=item number
2532
2533A JSON number becomes either an integer, numeric (floating point) or
2534string scalar in perl, depending on its range and any fractional parts. On
2535the Perl level, there is no difference between those as Perl handles all
2536the conversion details, but an integer may take slightly less memory and
2537might represent more values exactly than floating point numbers.
2538
dec273dc 2539If the number consists of digits only, JSON::PP will try to represent
d5424315
DG
2540it as an integer value. If that fails, it will try to represent it as
2541a numeric (floating point) value if that is possible without loss of
2542precision. Otherwise it will preserve the number as a string value (in
2543which case you lose roundtripping ability, as the JSON number will be
a1e5c561 2544re-encoded to a JSON string).
d5424315
DG
2545
2546Numbers containing a fractional or exponential part will always be
2547represented as numeric (floating point) values, possibly at a loss of
2548precision (in which case you might lose perfect roundtripping ability, but
2549the JSON number will still be re-encoded as a JSON number).
2550
2551Note that precision is not accuracy - binary floating point values cannot
2552represent most decimal fractions exactly, and when converting from and to
dec273dc 2553floating point, JSON::PP only guarantees precision up to but not including
a1e5c561 2554the least significant bit.
d5424315 2555
dec273dc
CBW
2556When C<allow_bignum> is enabled, big integer values and any numeric
2557values will be converted into L<Math::BigInt> and L<Math::BigFloat>
2558objects respectively, without becoming string scalars or losing
2559precision.
d5424315
DG
2560
2561=item true, false
2562
2563These JSON atoms become C<JSON::PP::true> and C<JSON::PP::false>,
2564respectively. They are overloaded to act almost exactly like the numbers
a1e5c561 2565C<1> and C<0>. You can check whether a scalar is a JSON boolean by using
dec273dc 2566the C<JSON::PP::is_bool> function.
d5424315
DG
2567
2568=item null
2569
2570A JSON null atom becomes C<undef> in Perl.
2571
dec273dc
CBW
2572=item shell-style comments (C<< # I<text> >>)
2573
2574As a nonstandard extension to the JSON syntax that is enabled by the
2575C<relaxed> setting, shell-style comments are allowed. They can start
2576anywhere outside strings and go till the end of the line.
d5424315
DG
2577
2578=back
2579
2580
2581=head2 PERL -> JSON
2582
2583The mapping from Perl to JSON is slightly more difficult, as Perl is a
2584truly typeless language, so we can only guess which JSON type is meant by
2585a Perl value.
2586
2587=over 4
2588
2589=item hash references
2590
dec273dc
CBW
2591Perl hash references become JSON objects. As there is no inherent
2592ordering in hash keys (or JSON objects), they will usually be encoded
2593in a pseudo-random order. JSON::PP can optionally sort the hash keys
2594(determined by the I<canonical> flag and/or I<sort_by> property), so
2595the same data structure will serialise to the same JSON text (given
2596same settings and version of JSON::PP), but this incurs a runtime
2597overhead and is only rarely useful, e.g. when you want to compare some
2598JSON text against another for equality.
d5424315
DG
2599
2600=item array references
2601
2602Perl array references become JSON arrays.
2603
2604=item other references
2605
2606Other unblessed references are generally not allowed and will cause an
2607exception to be thrown, except for references to the integers C<0> and
2608C<1>, which get turned into C<false> and C<true> atoms in JSON. You can
dec273dc
CBW
2609also use C<JSON::PP::false> and C<JSON::PP::true> to improve
2610readability.
d5424315 2611
dec273dc 2612 to_json [\0, JSON::PP::true] # yields [false,true]
d5424315 2613
dec273dc 2614=item JSON::PP::true, JSON::PP::false
d5424315
DG
2615
2616These special values become JSON true and JSON false values,
2617respectively. You can also use C<\1> and C<\0> directly if you want.
2618
dec273dc 2619=item JSON::PP::null
d5424315 2620
dec273dc 2621This special value becomes JSON null.
d5424315 2622
dec273dc 2623=item blessed objects
d5424315 2624
dec273dc
CBW
2625Blessed objects are not directly representable in JSON, but C<JSON::PP>
2626allows various ways of handling objects. See L<OBJECT SERIALISATION>,
2627below, for details.
d5424315
DG
2628
2629=item simple scalars
2630
2631Simple Perl scalars (any scalar that is not a reference) are the most
dec273dc 2632difficult objects to encode: JSON::PP will encode undefined scalars as
d5424315
DG
2633JSON C<null> values, scalars that have last been used in a string context
2634before encoding as JSON strings, and anything else as number value:
2635
2636 # dump as number
2637 encode_json [2] # yields [2]
2638 encode_json [-3.0e17] # yields [-3e+17]
2639 my $value = 5; encode_json [$value] # yields [5]
2640
2641 # used as string, so dump as string
2642 print $value;
2643 encode_json [$value] # yields ["5"]
2644
2645 # undef becomes null
2646 encode_json [undef] # yields [null]
2647
2648You can force the type to be a string by stringifying it:
2649
2650 my $x = 3.1; # some variable containing a number
2651 "$x"; # stringified
2652 $x .= ""; # another, more awkward way to stringify
2653 print $x; # perl does it for you, too, quite often
dec273dc 2654 # (but for older perls)
d5424315
DG
2655
2656You can force the type to be a number by numifying it:
2657
2658 my $x = "3"; # some variable containing a string
2659 $x += 0; # numify it, ensuring it will be dumped as a number
a1e5c561 2660 $x *= 1; # same thing, the choice is yours.
d5424315 2661
a1e5c561 2662You cannot currently force the type in other, less obscure, ways.
d5424315
DG
2663
2664Note that numerical precision has the same meaning as under Perl (so
2665binary to decimal conversion follows the same rules as in Perl, which
2666can differ to other languages). Also, your perl interpreter might expose
2667extensions to the floating point numbers of your platform, such as
2668infinities or NaN's - these cannot be represented in JSON, and it is an
2669error to pass those in.
2670
dec273dc
CBW
2671JSON::PP (and JSON::XS) trusts what you pass to C<encode> method
2672(or C<encode_json> function) is a clean, validated data structure with
2673values that can be represented as valid JSON values only, because it's
2674not from an external data source (as opposed to JSON texts you pass to
2675C<decode> or C<decode_json>, which JSON::PP considers tainted and
2676doesn't trust). As JSON::PP doesn't know exactly what you and consumers
2677of your JSON texts want the unexpected values to be (you may want to
2678convert them into null, or to stringify them with or without
2679normalisation (string representation of infinities/NaN may vary
2680depending on platforms), or to croak without conversion), you're advised
2681to do what you and your consumers need before you encode, and also not
2682to numify values that may start with values that look like a number
2683(including infinities/NaN), without validating.
d5424315
DG
2684
2685=back
2686
dec273dc 2687=head2 OBJECT SERIALISATION
d5424315 2688
dec273dc 2689As for Perl objects, JSON::PP only supports a pure JSON representation (without the ability to deserialise the object automatically again).
d5424315 2690
dec273dc 2691=head3 SERIALISATION
d5424315 2692
dec273dc
CBW
2693What happens when C<JSON::PP> encounters a Perl object depends on the
2694C<allow_blessed>, C<convert_blessed> and C<allow_bignum> settings, which are
2695used in this order:
d5424315 2696
dec273dc 2697=over 4
d5424315 2698
dec273dc 2699=item 1. C<convert_blessed> is enabled and the object has a C<TO_JSON> method.
d5424315 2700
dec273dc
CBW
2701In this case, the C<TO_JSON> method of the object is invoked in scalar
2702context. It must return a single scalar that can be directly encoded into
2703JSON. This scalar replaces the object in the JSON text.
d5424315 2704
dec273dc
CBW
2705For example, the following C<TO_JSON> method will convert all L<URI>
2706objects to JSON strings when serialised. The fact that these values
2707originally were L<URI> objects is lost.
d5424315 2708
dec273dc
CBW
2709 sub URI::TO_JSON {
2710 my ($uri) = @_;
2711 $uri->as_string
2712 }
d5424315 2713
dec273dc 2714=item 2. C<allow_bignum> is enabled and the object is a C<Math::BigInt> or C<Math::BigFloat>.
d5424315 2715
dec273dc 2716The object will be serialised as a JSON number value.
d5424315 2717
dec273dc 2718=item 3. C<allow_blessed> is enabled.
d5424315 2719
dec273dc 2720The object will be serialised as a JSON null value.
d5424315 2721
dec273dc 2722=item 4. none of the above
d5424315 2723
dec273dc
CBW
2724If none of the settings are enabled or the respective methods are missing,
2725C<JSON::PP> throws an exception.
d5424315 2726
dec273dc 2727=back
d5424315 2728
dec273dc 2729=head1 ENCODING/CODESET FLAG NOTES
d5424315 2730
dec273dc 2731This section is taken from JSON::XS.
d5424315 2732
dec273dc
CBW
2733The interested reader might have seen a number of flags that signify
2734encodings or codesets - C<utf8>, C<latin1> and C<ascii>. There seems to be
2735some confusion on what these do, so here is a short comparison:
d5424315 2736
dec273dc
CBW
2737C<utf8> controls whether the JSON text created by C<encode> (and expected
2738by C<decode>) is UTF-8 encoded or not, while C<latin1> and C<ascii> only
2739control whether C<encode> escapes character values outside their respective
2740codeset range. Neither of these flags conflict with each other, although
2741some combinations make less sense than others.
d5424315 2742
dec273dc
CBW
2743Care has been taken to make all flags symmetrical with respect to
2744C<encode> and C<decode>, that is, texts encoded with any combination of
2745these flag values will be correctly decoded when the same flags are used
2746- in general, if you use different flag settings while encoding vs. when
2747decoding you likely have a bug somewhere.
d5424315 2748
dec273dc
CBW
2749Below comes a verbose discussion of these flags. Note that a "codeset" is
2750simply an abstract set of character-codepoint pairs, while an encoding
2751takes those codepoint numbers and I<encodes> them, in our case into
2752octets. Unicode is (among other things) a codeset, UTF-8 is an encoding,
2753and ISO-8859-1 (= latin 1) and ASCII are both codesets I<and> encodings at
2754the same time, which can be confusing.
d5424315 2755
dec273dc 2756=over 4
d5424315 2757
dec273dc
CBW
2758=item C<utf8> flag disabled
2759
2760When C<utf8> is disabled (the default), then C<encode>/C<decode> generate
2761and expect Unicode strings, that is, characters with high ordinal Unicode
2762values (> 255) will be encoded as such characters, and likewise such
2763characters are decoded as-is, no changes to them will be done, except
2764"(re-)interpreting" them as Unicode codepoints or Unicode characters,
2765respectively (to Perl, these are the same thing in strings unless you do
2766funny/weird/dumb stuff).
2767
2768This is useful when you want to do the encoding yourself (e.g. when you
2769want to have UTF-16 encoded JSON texts) or when some other layer does
2770the encoding for you (for example, when printing to a terminal using a
2771filehandle that transparently encodes to UTF-8 you certainly do NOT want
2772to UTF-8 encode your data first and have Perl encode it another time).
2773
2774=item C<utf8> flag enabled
2775
2776If the C<utf8>-flag is enabled, C<encode>/C<decode> will encode all
2777characters using the corresponding UTF-8 multi-byte sequence, and will
2778expect your input strings to be encoded as UTF-8, that is, no "character"
2779of the input string must have any value > 255, as UTF-8 does not allow
2780that.
2781
2782The C<utf8> flag therefore switches between two modes: disabled means you
2783will get a Unicode string in Perl, enabled means you get an UTF-8 encoded
2784octet/binary string in Perl.
2785
2786=item C<latin1> or C<ascii> flags enabled
2787
2788With C<latin1> (or C<ascii>) enabled, C<encode> will escape characters
2789with ordinal values > 255 (> 127 with C<ascii>) and encode the remaining
2790characters as specified by the C<utf8> flag.
2791
2792If C<utf8> is disabled, then the result is also correctly encoded in those
2793character sets (as both are proper subsets of Unicode, meaning that a
2794Unicode string with all character values < 256 is the same thing as a
2795ISO-8859-1 string, and a Unicode string with all character values < 128 is
2796the same thing as an ASCII string in Perl).
2797
2798If C<utf8> is enabled, you still get a correct UTF-8-encoded string,
2799regardless of these flags, just some more characters will be escaped using
2800C<\uXXXX> then before.
2801
2802Note that ISO-8859-1-I<encoded> strings are not compatible with UTF-8
2803encoding, while ASCII-encoded strings are. That is because the ISO-8859-1
2804encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 I<codeset> being
2805a subset of Unicode), while ASCII is.
2806
2807Surprisingly, C<decode> will ignore these flags and so treat all input
2808values as governed by the C<utf8> flag. If it is disabled, this allows you
2809to decode ISO-8859-1- and ASCII-encoded strings, as both strict subsets of
2810Unicode. If it is enabled, you can correctly decode UTF-8 encoded strings.
2811
2812So neither C<latin1> nor C<ascii> are incompatible with the C<utf8> flag -
2813they only govern when the JSON output engine escapes a character or not.
2814
2815The main use for C<latin1> is to relatively efficiently store binary data
2816as JSON, at the expense of breaking compatibility with most JSON decoders.
2817
2818The main use for C<ascii> is to force the output to not contain characters
2819with values > 127, which means you can interpret the resulting string
2820as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about any character set and
28218-bit-encoding, and still get the same data structure back. This is useful
2822when your channel for JSON transfer is not 8-bit clean or the encoding
2823might be mangled in between (e.g. in mail), and works because ASCII is a
2824proper subset of most 8-bit and multibyte encodings in use in the world.
d5424315
DG
2825
2826=back
2827
d5424315
DG
2828=head1 SEE ALSO
2829
dec273dc
CBW
2830The F<json_pp> command line utility for quick experiments.
2831
2832L<JSON::XS>, L<Cpanel::JSON::XS>, and L<JSON::Tiny> for faster alternatives.
2833L<JSON> and L<JSON::MaybeXS> for easy migration.
d5424315 2834
dec273dc 2835L<JSON::PP::Compat5005> and L<JSON::PP::Compat5006> for older perl users.
d5424315
DG
2836
2837RFC4627 (L<http://www.ietf.org/rfc/rfc4627.txt>)
2838
2839=head1 AUTHOR
2840
2841Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
2842
2843
2844=head1 COPYRIGHT AND LICENSE
2845
a1e5c561 2846Copyright 2007-2016 by Makamaka Hannyaharamitu
d5424315
DG
2847
2848This library is free software; you can redistribute it and/or modify
2849it under the same terms as Perl itself.
2850
2851=cut