This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update JSON-PP to CPAN version 2.97001
[perl5.git] / cpan / JSON-PP / lib / JSON / PP.pm
CommitLineData
d5424315
DG
1package JSON::PP;
2
3# JSON-2.0
4
5use 5.005;
6use strict;
dec273dc
CBW
7
8use Exporter ();
9BEGIN { @JSON::PP::ISA = ('Exporter') }
10
d5424315 11use overload ();
dec273dc 12use JSON::PP::Boolean;
d5424315
DG
13
14use Carp ();
d5424315
DG
15#use Devel::Peek;
16
9bccc4c2 17$JSON::PP::VERSION = '2.97001';
d5424315
DG
18
19@JSON::PP::EXPORT = qw(encode_json decode_json from_json to_json);
20
21# instead of hash-access, i tried index-access for speed.
22# but this method is not faster than what i expected. so it will be changed.
23
24use constant P_ASCII => 0;
25use constant P_LATIN1 => 1;
26use constant P_UTF8 => 2;
27use constant P_INDENT => 3;
28use constant P_CANONICAL => 4;
29use constant P_SPACE_BEFORE => 5;
30use constant P_SPACE_AFTER => 6;
31use constant P_ALLOW_NONREF => 7;
32use constant P_SHRINK => 8;
33use constant P_ALLOW_BLESSED => 9;
34use constant P_CONVERT_BLESSED => 10;
35use constant P_RELAXED => 11;
36
37use constant P_LOOSE => 12;
38use constant P_ALLOW_BIGNUM => 13;
39use constant P_ALLOW_BAREKEY => 14;
40use constant P_ALLOW_SINGLEQUOTE => 15;
41use constant P_ESCAPE_SLASH => 16;
42use constant P_AS_NONBLESSED => 17;
43
44use constant P_ALLOW_UNKNOWN => 18;
45
46use constant OLD_PERL => $] < 5.008 ? 1 : 0;
dec273dc
CBW
47use constant USE_B => 0;
48
49BEGIN {
50if (USE_B) {
51 require B;
52}
53}
d5424315
DG
54
55BEGIN {
56 my @xs_compati_bit_properties = qw(
57 latin1 ascii utf8 indent canonical space_before space_after allow_nonref shrink
58 allow_blessed convert_blessed relaxed allow_unknown
59 );
60 my @pp_bit_properties = qw(
61 allow_singlequote allow_bignum loose
62 allow_barekey escape_slash as_nonblessed
63 );
64
a1e5c561 65 # Perl version check, Unicode handling is enabled?
d5424315 66 # Helper module sets @JSON::PP::_properties.
dec273dc 67 if ( OLD_PERL ) {
d5424315
DG
68 my $helper = $] >= 5.006 ? 'JSON::PP::Compat5006' : 'JSON::PP::Compat5005';
69 eval qq| require $helper |;
70 if ($@) { Carp::croak $@; }
71 }
72
73 for my $name (@xs_compati_bit_properties, @pp_bit_properties) {
dec273dc 74 my $property_id = 'P_' . uc($name);
d5424315
DG
75
76 eval qq/
77 sub $name {
78 my \$enable = defined \$_[1] ? \$_[1] : 1;
79
80 if (\$enable) {
dec273dc 81 \$_[0]->{PROPS}->[$property_id] = 1;
d5424315
DG
82 }
83 else {
dec273dc 84 \$_[0]->{PROPS}->[$property_id] = 0;
d5424315
DG
85 }
86
87 \$_[0];
88 }
89
90 sub get_$name {
dec273dc 91 \$_[0]->{PROPS}->[$property_id] ? 1 : '';
d5424315
DG
92 }
93 /;
94 }
95
96}
97
98
99
100# Functions
101
d5424315
DG
102my $JSON; # cache
103
104sub encode_json ($) { # encode
105 ($JSON ||= __PACKAGE__->new->utf8)->encode(@_);
106}
107
108
109sub decode_json { # decode
110 ($JSON ||= __PACKAGE__->new->utf8)->decode(@_);
111}
112
113# Obsoleted
114
115sub to_json($) {
116 Carp::croak ("JSON::PP::to_json has been renamed to encode_json.");
117}
118
119
120sub from_json($) {
121 Carp::croak ("JSON::PP::from_json has been renamed to decode_json.");
122}
123
124
125# Methods
126
127sub new {
128 my $class = shift;
129 my $self = {
130 max_depth => 512,
131 max_size => 0,
d5424315
DG
132 indent_length => 3,
133 };
134
135 bless $self, $class;
136}
137
138
139sub encode {
140 return $_[0]->PP_encode_json($_[1]);
141}
142
143
144sub decode {
145 return $_[0]->PP_decode_json($_[1], 0x00000000);
146}
147
148
149sub decode_prefix {
150 return $_[0]->PP_decode_json($_[1], 0x00000001);
151}
152
153
154# accessor
155
156
157# pretty printing
158
159sub pretty {
160 my ($self, $v) = @_;
161 my $enable = defined $v ? $v : 1;
162
163 if ($enable) { # indent_length(3) for JSON::XS compatibility
dec273dc 164 $self->indent(1)->space_before(1)->space_after(1);
d5424315
DG
165 }
166 else {
167 $self->indent(0)->space_before(0)->space_after(0);
168 }
169
170 $self;
171}
172
173# etc
174
175sub max_depth {
176 my $max = defined $_[1] ? $_[1] : 0x80000000;
177 $_[0]->{max_depth} = $max;
178 $_[0];
179}
180
181
182sub get_max_depth { $_[0]->{max_depth}; }
183
184
185sub max_size {
186 my $max = defined $_[1] ? $_[1] : 0;
187 $_[0]->{max_size} = $max;
188 $_[0];
189}
190
191
192sub get_max_size { $_[0]->{max_size}; }
193
194
195sub filter_json_object {
dec273dc
CBW
196 if (defined $_[1] and ref $_[1] eq 'CODE') {
197 $_[0]->{cb_object} = $_[1];
198 } else {
199 delete $_[0]->{cb_object};
200 }
d5424315
DG
201 $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0;
202 $_[0];
203}
204
205sub filter_json_single_key_object {
dec273dc
CBW
206 if (@_ == 1 or @_ > 3) {
207 Carp::croak("Usage: JSON::PP::filter_json_single_key_object(self, key, callback = undef)");
208 }
209 if (defined $_[2] and ref $_[2] eq 'CODE') {
d5424315 210 $_[0]->{cb_sk_object}->{$_[1]} = $_[2];
dec273dc
CBW
211 } else {
212 delete $_[0]->{cb_sk_object}->{$_[1]};
213 delete $_[0]->{cb_sk_object} unless %{$_[0]->{cb_sk_object} || {}};
d5424315
DG
214 }
215 $_[0]->{F_HOOK} = ($_[0]->{cb_object} or $_[0]->{cb_sk_object}) ? 1 : 0;
216 $_[0];
217}
218
219sub indent_length {
220 if (!defined $_[1] or $_[1] > 15 or $_[1] < 0) {
221 Carp::carp "The acceptable range of indent_length() is 0 to 15.";
222 }
223 else {
224 $_[0]->{indent_length} = $_[1];
225 }
226 $_[0];
227}
228
229sub get_indent_length {
230 $_[0]->{indent_length};
231}
232
233sub sort_by {
234 $_[0]->{sort_by} = defined $_[1] ? $_[1] : 1;
235 $_[0];
236}
237
238sub allow_bigint {
dec273dc
CBW
239 Carp::carp("allow_bigint() is obsoleted. use allow_bignum() instead.");
240 $_[0]->allow_bignum;
d5424315
DG
241}
242
243###############################
244
245###
246### Perl => JSON
247###
248
249
250{ # Convert
251
252 my $max_depth;
253 my $indent;
254 my $ascii;
255 my $latin1;
256 my $utf8;
257 my $space_before;
258 my $space_after;
259 my $canonical;
260 my $allow_blessed;
261 my $convert_blessed;
262
263 my $indent_length;
264 my $escape_slash;
265 my $bignum;
266 my $as_nonblessed;
267
268 my $depth;
269 my $indent_count;
270 my $keysort;
271
272
273 sub PP_encode_json {
274 my $self = shift;
275 my $obj = shift;
276
277 $indent_count = 0;
278 $depth = 0;
279
dec273dc 280 my $props = $self->{PROPS};
d5424315
DG
281
282 ($ascii, $latin1, $utf8, $indent, $canonical, $space_before, $space_after, $allow_blessed,
283 $convert_blessed, $escape_slash, $bignum, $as_nonblessed)
dec273dc 284 = @{$props}[P_ASCII .. P_SPACE_AFTER, P_ALLOW_BLESSED, P_CONVERT_BLESSED,
d5424315
DG
285 P_ESCAPE_SLASH, P_ALLOW_BIGNUM, P_AS_NONBLESSED];
286
287 ($max_depth, $indent_length) = @{$self}{qw/max_depth indent_length/};
288
289 $keysort = $canonical ? sub { $a cmp $b } : undef;
290
291 if ($self->{sort_by}) {
292 $keysort = ref($self->{sort_by}) eq 'CODE' ? $self->{sort_by}
293 : $self->{sort_by} =~ /\D+/ ? $self->{sort_by}
294 : sub { $a cmp $b };
295 }
296
297 encode_error("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)")
dec273dc 298 if(!ref $obj and !$props->[ P_ALLOW_NONREF ]);
d5424315
DG
299
300 my $str = $self->object_to_json($obj);
301
302 $str .= "\n" if ( $indent ); # JSON::XS 2.26 compatible
303
304 unless ($ascii or $latin1 or $utf8) {
305 utf8::upgrade($str);
306 }
307
dec273dc 308 if ($props->[ P_SHRINK ]) {
d5424315
DG
309 utf8::downgrade($str, 1);
310 }
311
312 return $str;
313 }
314
315
316 sub object_to_json {
317 my ($self, $obj) = @_;
318 my $type = ref($obj);
319
320 if($type eq 'HASH'){
321 return $self->hash_to_json($obj);
322 }
323 elsif($type eq 'ARRAY'){
324 return $self->array_to_json($obj);
325 }
326 elsif ($type) { # blessed object?
327 if (blessed($obj)) {
328
329 return $self->value_to_json($obj) if ( $obj->isa('JSON::PP::Boolean') );
330
331 if ( $convert_blessed and $obj->can('TO_JSON') ) {
332 my $result = $obj->TO_JSON();
05bd2440
CBW
333 if ( defined $result and ref( $result ) ) {
334 if ( refaddr( $obj ) eq refaddr( $result ) ) {
d5424315
DG
335 encode_error( sprintf(
336 "%s::TO_JSON method returned same object as was passed instead of a new one",
337 ref $obj
338 ) );
339 }
340 }
341
342 return $self->object_to_json( $result );
343 }
344
345 return "$obj" if ( $bignum and _is_bignum($obj) );
d5424315 346
dec273dc
CBW
347 if ($allow_blessed) {
348 return $self->blessed_to_json($obj) if ($as_nonblessed); # will be removed.
349 return 'null';
350 }
d5424315
DG
351 encode_error( sprintf("encountered object '%s', but neither allow_blessed "
352 . "nor convert_blessed settings are enabled", $obj)
dec273dc 353 );
d5424315
DG
354 }
355 else {
356 return $self->value_to_json($obj);
357 }
358 }
359 else{
360 return $self->value_to_json($obj);
361 }
362 }
363
364
365 sub hash_to_json {
366 my ($self, $obj) = @_;
367 my @res;
368
369 encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)")
370 if (++$depth > $max_depth);
371
372 my ($pre, $post) = $indent ? $self->_up_indent() : ('', '');
373 my $del = ($space_before ? ' ' : '') . ':' . ($space_after ? ' ' : '');
374
375 for my $k ( _sort( $obj ) ) {
376 if ( OLD_PERL ) { utf8::decode($k) } # key for Perl 5.6 / be optimized
dec273dc 377 push @res, $self->string_to_json( $k )
d5424315 378 . $del
dec273dc 379 . ( ref $obj->{$k} ? $self->object_to_json( $obj->{$k} ) : $self->value_to_json( $obj->{$k} ) );
d5424315
DG
380 }
381
382 --$depth;
383 $self->_down_indent() if ($indent);
384
dec273dc
CBW
385 return '{}' unless @res;
386 return '{' . $pre . join( ",$pre", @res ) . $post . '}';
d5424315
DG
387 }
388
389
390 sub array_to_json {
391 my ($self, $obj) = @_;
392 my @res;
393
394 encode_error("json text or perl structure exceeds maximum nesting level (max_depth set too low?)")
395 if (++$depth > $max_depth);
396
397 my ($pre, $post) = $indent ? $self->_up_indent() : ('', '');
398
399 for my $v (@$obj){
dec273dc 400 push @res, ref($v) ? $self->object_to_json($v) : $self->value_to_json($v);
d5424315
DG
401 }
402
403 --$depth;
404 $self->_down_indent() if ($indent);
405
dec273dc
CBW
406 return '[]' unless @res;
407 return '[' . $pre . join( ",$pre", @res ) . $post . ']';
d5424315
DG
408 }
409
dec273dc
CBW
410 sub _looks_like_number {
411 my $value = shift;
412 if (USE_B) {
413 my $b_obj = B::svref_2object(\$value);
414 my $flags = $b_obj->FLAGS;
415 return 1 if $flags & ( B::SVp_IOK() | B::SVp_NOK() ) and !( $flags & B::SVp_POK() );
416 return;
417 } else {
418 no warnings 'numeric';
9bccc4c2
CBW
419 # if the utf8 flag is on, it almost certainly started as a string
420 return if utf8::is_utf8($value);
dec273dc
CBW
421 # detect numbers
422 # string & "" -> ""
423 # number & "" -> 0 (with warning)
424 # nan and inf can detect as numbers, so check with * 0
425 return unless length((my $dummy = "") & $value);
426 return unless 0 + $value eq $value;
427 return 1 if $value * 0 == 0;
428 return -1; # inf/nan
429 }
430 }
d5424315
DG
431
432 sub value_to_json {
433 my ($self, $value) = @_;
434
435 return 'null' if(!defined $value);
436
d5424315
DG
437 my $type = ref($value);
438
dec273dc
CBW
439 if (!$type) {
440 if (_looks_like_number($value)) {
441 return $value;
442 }
443 return $self->string_to_json($value);
d5424315
DG
444 }
445 elsif( blessed($value) and $value->isa('JSON::PP::Boolean') ){
446 return $$value == 1 ? 'true' : 'false';
447 }
dec273dc 448 else {
d5424315
DG
449 if ((overload::StrVal($value) =~ /=(\w+)/)[0]) {
450 return $self->value_to_json("$value");
451 }
452
453 if ($type eq 'SCALAR' and defined $$value) {
454 return $$value eq '1' ? 'true'
455 : $$value eq '0' ? 'false'
456 : $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ? 'null'
457 : encode_error("cannot encode reference to scalar");
458 }
459
dec273dc
CBW
460 if ( $self->{PROPS}->[ P_ALLOW_UNKNOWN ] ) {
461 return 'null';
462 }
463 else {
464 if ( $type eq 'SCALAR' or $type eq 'REF' ) {
d5424315 465 encode_error("cannot encode reference to scalar");
dec273dc
CBW
466 }
467 else {
d5424315 468 encode_error("encountered $value, but JSON can only represent references to arrays or hashes");
dec273dc
CBW
469 }
470 }
d5424315
DG
471
472 }
d5424315
DG
473 }
474
475
476 my %esc = (
477 "\n" => '\n',
478 "\r" => '\r',
479 "\t" => '\t',
480 "\f" => '\f',
481 "\b" => '\b',
482 "\"" => '\"',
483 "\\" => '\\\\',
484 "\'" => '\\\'',
485 );
486
487
488 sub string_to_json {
489 my ($self, $arg) = @_;
490
491 $arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
492 $arg =~ s/\//\\\//g if ($escape_slash);
493 $arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
494
495 if ($ascii) {
496 $arg = JSON_PP_encode_ascii($arg);
497 }
498
499 if ($latin1) {
500 $arg = JSON_PP_encode_latin1($arg);
501 }
502
503 if ($utf8) {
504 utf8::encode($arg);
505 }
506
507 return '"' . $arg . '"';
508 }
509
510
511 sub blessed_to_json {
512 my $reftype = reftype($_[1]) || '';
513 if ($reftype eq 'HASH') {
514 return $_[0]->hash_to_json($_[1]);
515 }
516 elsif ($reftype eq 'ARRAY') {
517 return $_[0]->array_to_json($_[1]);
518 }
519 else {
520 return 'null';
521 }
522 }
523
524
525 sub encode_error {
526 my $error = shift;
527 Carp::croak "$error";
528 }
529
530
531 sub _sort {
532 defined $keysort ? (sort $keysort (keys %{$_[0]})) : keys %{$_[0]};
533 }
534
535
536 sub _up_indent {
537 my $self = shift;
538 my $space = ' ' x $indent_length;
539
540 my ($pre,$post) = ('','');
541
542 $post = "\n" . $space x $indent_count;
543
544 $indent_count++;
545
546 $pre = "\n" . $space x $indent_count;
547
548 return ($pre,$post);
549 }
550
551
552 sub _down_indent { $indent_count--; }
553
554
555 sub PP_encode_box {
556 {
557 depth => $depth,
558 indent_count => $indent_count,
559 };
560 }
561
562} # Convert
563
564
565sub _encode_ascii {
566 join('',
567 map {
568 $_ <= 127 ?
569 chr($_) :
570 $_ <= 65535 ?
571 sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
572 } unpack('U*', $_[0])
573 );
574}
575
576
577sub _encode_latin1 {
578 join('',
579 map {
580 $_ <= 255 ?
581 chr($_) :
582 $_ <= 65535 ?
583 sprintf('\u%04x', $_) : sprintf('\u%x\u%x', _encode_surrogates($_));
584 } unpack('U*', $_[0])
585 );
586}
587
588
589sub _encode_surrogates { # from perlunicode
590 my $uni = $_[0] - 0x10000;
591 return ($uni / 0x400 + 0xD800, $uni % 0x400 + 0xDC00);
592}
593
594
595sub _is_bignum {
596 $_[0]->isa('Math::BigInt') or $_[0]->isa('Math::BigFloat');
597}
598
599
600
601#
602# JSON => Perl
603#
604
605my $max_intsize;
606
607BEGIN {
608 my $checkint = 1111;
609 for my $d (5..64) {
610 $checkint .= 1;
611 my $int = eval qq| $checkint |;
612 if ($int =~ /[eE]/) {
613 $max_intsize = $d - 1;
614 last;
615 }
616 }
617}
618
619{ # PARSE
620
621 my %escapes = ( # by Jeremy Muhlich <jmuhlich [at] bitflood.org>
622 b => "\x8",
623 t => "\x9",
624 n => "\xA",
625 f => "\xC",
626 r => "\xD",
627 '\\' => '\\',
628 '"' => '"',
629 '/' => '/',
630 );
631
632 my $text; # json data
633 my $at; # offset
a1e5c561 634 my $ch; # first character
d5424315
DG
635 my $len; # text length (changed according to UTF8 or NON UTF8)
636 # INTERNAL
637 my $depth; # nest counter
638 my $encoding; # json text encoding
639 my $is_valid_utf8; # temp variable
640 my $utf8_len; # utf8 byte length
641 # FLAGS
642 my $utf8; # must be utf8
a1e5c561 643 my $max_depth; # max nest number of objects and arrays
d5424315
DG
644 my $max_size;
645 my $relaxed;
646 my $cb_object;
647 my $cb_sk_object;
648
649 my $F_HOOK;
650
dec273dc 651 my $allow_bignum; # using Math::BigInt/BigFloat
d5424315
DG
652 my $singlequote; # loosely quoting
653 my $loose; #
654 my $allow_barekey; # bareKey
655
dec273dc
CBW
656 sub _detect_utf_encoding {
657 my $text = shift;
658 my @octets = unpack('C4', $text);
659 return 'unknown' unless defined $octets[3];
660 return ( $octets[0] and $octets[1]) ? 'UTF-8'
661 : (!$octets[0] and $octets[1]) ? 'UTF-16BE'
662 : (!$octets[0] and !$octets[1]) ? 'UTF-32BE'
663 : ( $octets[2] ) ? 'UTF-16LE'
664 : (!$octets[2] ) ? 'UTF-32LE'
665 : 'unknown';
666 }
d5424315
DG
667
668 sub PP_decode_json {
dec273dc 669 my ($self, $want_offset);
d5424315 670
dec273dc 671 ($self, $text, $want_offset) = @_;
d5424315
DG
672
673 ($at, $ch, $depth) = (0, '', 0);
674
675 if ( !defined $text or ref $text ) {
676 decode_error("malformed JSON string, neither array, object, number, string or atom");
677 }
678
dec273dc 679 my $props = $self->{PROPS};
d5424315 680
dec273dc
CBW
681 ($utf8, $relaxed, $loose, $allow_bignum, $allow_barekey, $singlequote)
682 = @{$props}[P_UTF8, P_RELAXED, P_LOOSE .. P_ALLOW_SINGLEQUOTE];
d5424315
DG
683
684 if ( $utf8 ) {
dec273dc
CBW
685 $encoding = _detect_utf_encoding($text);
686 if ($encoding ne 'UTF-8' and $encoding ne 'unknown') {
687 require Encode;
688 Encode::from_to($text, $encoding, 'utf-8');
689 } else {
690 utf8::downgrade( $text, 1 ) or Carp::croak("Wide character in subroutine entry");
691 }
d5424315
DG
692 }
693 else {
694 utf8::upgrade( $text );
87f3ebe4 695 utf8::encode( $text );
d5424315
DG
696 }
697
698 $len = length $text;
699
700 ($max_depth, $max_size, $cb_object, $cb_sk_object, $F_HOOK)
701 = @{$self}{qw/max_depth max_size cb_object cb_sk_object F_HOOK/};
702
703 if ($max_size > 1) {
704 use bytes;
705 my $bytes = length $text;
706 decode_error(
707 sprintf("attempted decode of JSON text of %s bytes size, but max_size is set to %s"
708 , $bytes, $max_size), 1
709 ) if ($bytes > $max_size);
710 }
711
d5424315
DG
712 white(); # remove head white space
713
dec273dc 714 decode_error("malformed JSON string, neither array, object, number, string or atom") unless defined $ch; # Is there a first character for JSON structure?
d5424315
DG
715
716 my $result = value();
717
dec273dc 718 if ( !$props->[ P_ALLOW_NONREF ] and !ref $result ) {
d5424315
DG
719 decode_error(
720 'JSON text must be an object or array (but found number, string, true, false or null,'
721 . ' use allow_nonref to allow this)', 1);
722 }
723
724 Carp::croak('something wrong.') if $len < $at; # we won't arrive here.
725
726 my $consumed = defined $ch ? $at - 1 : $at; # consumed JSON text length
727
728 white(); # remove tail white space
729
dec273dc
CBW
730 return ( $result, $consumed ) if $want_offset; # all right if decode_prefix
731
732 decode_error("garbage after JSON object") if defined $ch;
d5424315 733
dec273dc 734 $result;
d5424315
DG
735 }
736
737
738 sub next_chr {
739 return $ch = undef if($at >= $len);
740 $ch = substr($text, $at++, 1);
741 }
742
743
744 sub value {
745 white();
746 return if(!defined $ch);
747 return object() if($ch eq '{');
748 return array() if($ch eq '[');
749 return string() if($ch eq '"' or ($singlequote and $ch eq "'"));
750 return number() if($ch =~ /[0-9]/ or $ch eq '-');
751 return word();
752 }
753
754 sub string {
d5424315
DG
755 my $utf16;
756 my $is_utf8;
757
758 ($is_valid_utf8, $utf8_len) = ('', 0);
759
dec273dc 760 my $s = ''; # basically UTF8 flag on
d5424315
DG
761
762 if($ch eq '"' or ($singlequote and $ch eq "'")){
763 my $boundChar = $ch;
764
765 OUTER: while( defined(next_chr()) ){
766
767 if($ch eq $boundChar){
768 next_chr();
769
770 if ($utf16) {
771 decode_error("missing low surrogate character in surrogate pair");
772 }
773
774 utf8::decode($s) if($is_utf8);
775
776 return $s;
777 }
778 elsif($ch eq '\\'){
779 next_chr();
780 if(exists $escapes{$ch}){
781 $s .= $escapes{$ch};
782 }
783 elsif($ch eq 'u'){ # UNICODE handling
784 my $u = '';
785
786 for(1..4){
787 $ch = next_chr();
788 last OUTER if($ch !~ /[0-9a-fA-F]/);
789 $u .= $ch;
790 }
791
792 # U+D800 - U+DBFF
793 if ($u =~ /^[dD][89abAB][0-9a-fA-F]{2}/) { # UTF-16 high surrogate?
794 $utf16 = $u;
795 }
796 # U+DC00 - U+DFFF
797 elsif ($u =~ /^[dD][c-fC-F][0-9a-fA-F]{2}/) { # UTF-16 low surrogate?
798 unless (defined $utf16) {
799 decode_error("missing high surrogate character in surrogate pair");
800 }
801 $is_utf8 = 1;
802 $s .= JSON_PP_decode_surrogates($utf16, $u) || next;
803 $utf16 = undef;
804 }
805 else {
806 if (defined $utf16) {
807 decode_error("surrogate pair expected");
808 }
809
810 if ( ( my $hex = hex( $u ) ) > 127 ) {
811 $is_utf8 = 1;
812 $s .= JSON_PP_decode_unicode($u) || next;
813 }
814 else {
815 $s .= chr $hex;
816 }
817 }
818
819 }
820 else{
821 unless ($loose) {
822 $at -= 2;
823 decode_error('illegal backslash escape sequence in string');
824 }
825 $s .= $ch;
826 }
827 }
828 else{
829
830 if ( ord $ch > 127 ) {
87f3ebe4
CBW
831 unless( $ch = is_valid_utf8($ch) ) {
832 $at -= 1;
833 decode_error("malformed UTF-8 character in JSON string");
d5424315
DG
834 }
835 else {
87f3ebe4 836 $at += $utf8_len - 1;
d5424315
DG
837 }
838
839 $is_utf8 = 1;
840 }
841
842 if (!$loose) {
843 if ($ch =~ /[\x00-\x1f\x22\x5c]/) { # '/' ok
844 $at--;
845 decode_error('invalid character encountered while parsing JSON string');
846 }
847 }
848
849 $s .= $ch;
850 }
851 }
852 }
853
854 decode_error("unexpected end of string while parsing JSON string");
855 }
856
857
858 sub white {
859 while( defined $ch ){
dec273dc 860 if($ch eq '' or $ch =~ /\A[ \t\r\n]\z/){
d5424315
DG
861 next_chr();
862 }
dec273dc 863 elsif($relaxed and $ch eq '/'){
d5424315
DG
864 next_chr();
865 if(defined $ch and $ch eq '/'){
866 1 while(defined(next_chr()) and $ch ne "\n" and $ch ne "\r");
867 }
868 elsif(defined $ch and $ch eq '*'){
869 next_chr();
870 while(1){
871 if(defined $ch){
872 if($ch eq '*'){
873 if(defined(next_chr()) and $ch eq '/'){
874 next_chr();
875 last;
876 }
877 }
878 else{
879 next_chr();
880 }
881 }
882 else{
883 decode_error("Unterminated comment");
884 }
885 }
886 next;
887 }
888 else{
889 $at--;
890 decode_error("malformed JSON string, neither array, object, number, string or atom");
891 }
892 }
893 else{
894 if ($relaxed and $ch eq '#') { # correctly?
895 pos($text) = $at;
896 $text =~ /\G([^\n]*(?:\r\n|\r|\n|$))/g;
897 $at = pos($text);
898 next_chr;
899 next;
900 }
901
902 last;
903 }
904 }
905 }
906
907
908 sub array {
05bd2440 909 my $a = $_[0] || []; # you can use this code to use another array ref object.
d5424315
DG
910
911 decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)')
912 if (++$depth > $max_depth);
913
914 next_chr();
915 white();
916
917 if(defined $ch and $ch eq ']'){
918 --$depth;
919 next_chr();
920 return $a;
921 }
922 else {
923 while(defined($ch)){
924 push @$a, value();
925
926 white();
927
928 if (!defined $ch) {
929 last;
930 }
931
932 if($ch eq ']'){
933 --$depth;
934 next_chr();
935 return $a;
936 }
937
938 if($ch ne ','){
939 last;
940 }
941
942 next_chr();
943 white();
944
945 if ($relaxed and $ch eq ']') {
946 --$depth;
947 next_chr();
948 return $a;
949 }
950
951 }
952 }
953
dec273dc 954 $at-- if defined $ch and $ch ne '';
d5424315
DG
955 decode_error(", or ] expected while parsing array");
956 }
957
958
959 sub object {
05bd2440 960 my $o = $_[0] || {}; # you can use this code to use another hash ref object.
d5424315
DG
961 my $k;
962
963 decode_error('json text or perl structure exceeds maximum nesting level (max_depth set too low?)')
964 if (++$depth > $max_depth);
965 next_chr();
966 white();
967
968 if(defined $ch and $ch eq '}'){
969 --$depth;
970 next_chr();
971 if ($F_HOOK) {
972 return _json_object_hook($o);
973 }
974 return $o;
975 }
976 else {
977 while (defined $ch) {
978 $k = ($allow_barekey and $ch ne '"' and $ch ne "'") ? bareKey() : string();
979 white();
980
981 if(!defined $ch or $ch ne ':'){
982 $at--;
983 decode_error("':' expected");
984 }
985
986 next_chr();
987 $o->{$k} = value();
988 white();
989
990 last if (!defined $ch);
991
992 if($ch eq '}'){
993 --$depth;
994 next_chr();
995 if ($F_HOOK) {
996 return _json_object_hook($o);
997 }
998 return $o;
999 }
1000
1001 if($ch ne ','){
1002 last;
1003 }
1004
1005 next_chr();
1006 white();
1007
1008 if ($relaxed and $ch eq '}') {
1009 --$depth;
1010 next_chr();
1011 if ($F_HOOK) {
1012 return _json_object_hook($o);
1013 }
1014 return $o;
1015 }
1016
1017 }
1018
1019 }
1020
dec273dc 1021 $at-- if defined $ch and $ch ne '';
d5424315
DG
1022 decode_error(", or } expected while parsing object/hash");
1023 }
1024
1025
1026 sub bareKey { # doesn't strictly follow Standard ECMA-262 3rd Edition
1027 my $key;
1028 while($ch =~ /[^\x00-\x23\x25-\x2F\x3A-\x40\x5B-\x5E\x60\x7B-\x7F]/){
1029 $key .= $ch;
1030 next_chr();
1031 }
1032 return $key;
1033 }
1034
1035
1036 sub word {
1037 my $word = substr($text,$at-1,4);
1038
1039 if($word eq 'true'){
1040 $at += 3;
1041 next_chr;
1042 return $JSON::PP::true;
1043 }
1044 elsif($word eq 'null'){
1045 $at += 3;
1046 next_chr;
1047 return undef;
1048 }
1049 elsif($word eq 'fals'){
1050 $at += 3;
1051 if(substr($text,$at,1) eq 'e'){
1052 $at++;
1053 next_chr;
1054 return $JSON::PP::false;
1055 }
1056 }
1057
1058 $at--; # for decode_error report
1059
1060 decode_error("'null' expected") if ($word =~ /^n/);
1061 decode_error("'true' expected") if ($word =~ /^t/);
1062 decode_error("'false' expected") if ($word =~ /^f/);
1063 decode_error("malformed JSON string, neither array, object, number, string or atom");
1064 }
1065
1066
1067 sub number {
1068 my $n = '';
1069 my $v;
a1e5c561 1070 my $is_dec;
dec273dc 1071 my $is_exp;
d5424315
DG
1072
1073 if($ch eq '-'){
1074 $n = '-';
1075 next_chr;
1076 if (!defined $ch or $ch !~ /\d/) {
1077 decode_error("malformed number (no digits after initial minus)");
1078 }
1079 }
1080
dec273dc
CBW
1081 # According to RFC4627, hex or oct digits are invalid.
1082 if($ch eq '0'){
1083 my $peek = substr($text,$at,1);
1084 if($peek =~ /^[0-9a-dfA-DF]/){ # e may be valid (exponential)
1085 decode_error("malformed number (leading zero must not be followed by another digit)");
1086 }
1087 $n .= $ch;
1088 next_chr;
1089 }
1090
d5424315
DG
1091 while(defined $ch and $ch =~ /\d/){
1092 $n .= $ch;
1093 next_chr;
1094 }
1095
1096 if(defined $ch and $ch eq '.'){
1097 $n .= '.';
a1e5c561 1098 $is_dec = 1;
d5424315
DG
1099
1100 next_chr;
1101 if (!defined $ch or $ch !~ /\d/) {
1102 decode_error("malformed number (no digits after decimal point)");
1103 }
1104 else {
1105 $n .= $ch;
1106 }
1107
1108 while(defined(next_chr) and $ch =~ /\d/){
1109 $n .= $ch;
1110 }
1111 }
1112
1113 if(defined $ch and ($ch eq 'e' or $ch eq 'E')){
1114 $n .= $ch;
dec273dc 1115 $is_exp = 1;
d5424315
DG
1116 next_chr;
1117
1118 if(defined($ch) and ($ch eq '+' or $ch eq '-')){
1119 $n .= $ch;
1120 next_chr;
1121 if (!defined $ch or $ch =~ /\D/) {
1122 decode_error("malformed number (no digits after exp sign)");
1123 }
1124 $n .= $ch;
1125 }
1126 elsif(defined($ch) and $ch =~ /\d/){
1127 $n .= $ch;
1128 }
1129 else {
1130 decode_error("malformed number (no digits after exp sign)");
1131 }
1132
1133 while(defined(next_chr) and $ch =~ /\d/){
1134 $n .= $ch;
1135 }
1136
1137 }
1138
1139 $v .= $n;
1140
dec273dc
CBW
1141 if ($is_dec or $is_exp) {
1142 if ($allow_bignum) {
1143 require Math::BigFloat;
1144 return Math::BigFloat->new($v);
d5424315 1145 }
dec273dc
CBW
1146 } else {
1147 if (length $v > $max_intsize) {
1148 if ($allow_bignum) { # from Adam Sussman
1149 require Math::BigInt;
1150 return Math::BigInt->new($v);
1151 }
1152 else {
1153 return "$v";
1154 }
d5424315
DG
1155 }
1156 }
d5424315 1157
a1e5c561 1158 return $is_dec ? $v/1.0 : 0+$v;
d5424315
DG
1159 }
1160
1161
1162 sub is_valid_utf8 {
1163
1164 $utf8_len = $_[0] =~ /[\x00-\x7F]/ ? 1
1165 : $_[0] =~ /[\xC2-\xDF]/ ? 2
1166 : $_[0] =~ /[\xE0-\xEF]/ ? 3
1167 : $_[0] =~ /[\xF0-\xF4]/ ? 4
1168 : 0
1169 ;
1170
1171 return unless $utf8_len;
1172
1173 my $is_valid_utf8 = substr($text, $at - 1, $utf8_len);
1174
1175 return ( $is_valid_utf8 =~ /^(?:
1176 [\x00-\x7F]
1177 |[\xC2-\xDF][\x80-\xBF]
1178 |[\xE0][\xA0-\xBF][\x80-\xBF]
1179 |[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
1180 |[\xED][\x80-\x9F][\x80-\xBF]
1181 |[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
1182 |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
1183 |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
1184 |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
1185 )$/x ) ? $is_valid_utf8 : '';
1186 }
1187
1188
1189 sub decode_error {
1190 my $error = shift;
1191 my $no_rep = shift;
1192 my $str = defined $text ? substr($text, $at) : '';
1193 my $mess = '';
dec273dc
CBW
1194 my $type = 'U*';
1195
1196 if ( OLD_PERL ) {
1197 my $type = $] < 5.006 ? 'C*'
1198 : utf8::is_utf8( $str ) ? 'U*' # 5.6
1199 : 'C*'
1200 ;
1201 }
d5424315
DG
1202
1203 for my $c ( unpack( $type, $str ) ) { # emulate pv_uni_display() ?
1204 $mess .= $c == 0x07 ? '\a'
1205 : $c == 0x09 ? '\t'
1206 : $c == 0x0a ? '\n'
1207 : $c == 0x0d ? '\r'
1208 : $c == 0x0c ? '\f'
1209 : $c < 0x20 ? sprintf('\x{%x}', $c)
1210 : $c == 0x5c ? '\\\\'
1211 : $c < 0x80 ? chr($c)
1212 : sprintf('\x{%x}', $c)
1213 ;
1214 if ( length $mess >= 20 ) {
1215 $mess .= '...';
1216 last;
1217 }
1218 }
1219
1220 unless ( length $mess ) {
1221 $mess = '(end of string)';
1222 }
1223
1224 Carp::croak (
1225 $no_rep ? "$error" : "$error, at character offset $at (before \"$mess\")"
1226 );
1227
1228 }
1229
1230
1231 sub _json_object_hook {
1232 my $o = $_[0];
1233 my @ks = keys %{$o};
1234
1235 if ( $cb_sk_object and @ks == 1 and exists $cb_sk_object->{ $ks[0] } and ref $cb_sk_object->{ $ks[0] } ) {
1236 my @val = $cb_sk_object->{ $ks[0] }->( $o->{$ks[0]} );
1237 if (@val == 1) {
1238 return $val[0];
1239 }
1240 }
1241
1242 my @val = $cb_object->($o) if ($cb_object);
1243 if (@val == 0 or @val > 1) {
1244 return $o;
1245 }
1246 else {
1247 return $val[0];
1248 }
1249 }
1250
1251
1252 sub PP_decode_box {
1253 {
1254 text => $text,
1255 at => $at,
1256 ch => $ch,
1257 len => $len,
1258 depth => $depth,
1259 encoding => $encoding,
1260 is_valid_utf8 => $is_valid_utf8,
1261 };
1262 }
1263
1264} # PARSE
1265
1266
1267sub _decode_surrogates { # from perlunicode
1268 my $uni = 0x10000 + (hex($_[0]) - 0xD800) * 0x400 + (hex($_[1]) - 0xDC00);
1269 my $un = pack('U*', $uni);
1270 utf8::encode( $un );
1271 return $un;
1272}
1273
1274
1275sub _decode_unicode {
1276 my $un = pack('U', hex shift);
1277 utf8::encode( $un );
1278 return $un;
1279}
1280
1281#
1282# Setup for various Perl versions (the code from JSON::PP58)
1283#
1284
1285BEGIN {
1286
1287 unless ( defined &utf8::is_utf8 ) {
1288 require Encode;
1289 *utf8::is_utf8 = *Encode::is_utf8;
1290 }
1291
dec273dc 1292 if ( !OLD_PERL ) {
d5424315
DG
1293 *JSON::PP::JSON_PP_encode_ascii = \&_encode_ascii;
1294 *JSON::PP::JSON_PP_encode_latin1 = \&_encode_latin1;
1295 *JSON::PP::JSON_PP_decode_surrogates = \&_decode_surrogates;
1296 *JSON::PP::JSON_PP_decode_unicode = \&_decode_unicode;
d5424315 1297
dec273dc
CBW
1298 if ($] < 5.008003) { # join() in 5.8.0 - 5.8.2 is broken.
1299 package JSON::PP;
1300 require subs;
1301 subs->import('join');
1302 eval q|
1303 sub join {
1304 return '' if (@_ < 2);
1305 my $j = shift;
1306 my $str = shift;
1307 for (@_) { $str .= $j . $_; }
1308 return $str;
1309 }
1310 |;
1311 }
d5424315
DG
1312 }
1313
1314
1315 sub JSON::PP::incr_parse {
1316 local $Carp::CarpLevel = 1;
1317 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_parse( @_ );
1318 }
1319
1320
1321 sub JSON::PP::incr_skip {
1322 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_skip;
1323 }
1324
1325
1326 sub JSON::PP::incr_reset {
1327 ( $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new )->incr_reset;
1328 }
1329
1330 eval q{
1331 sub JSON::PP::incr_text : lvalue {
1332 $_[0]->{_incr_parser} ||= JSON::PP::IncrParser->new;
1333
dec273dc 1334 if ( $_[0]->{_incr_parser}->{incr_pos} ) {
a1e5c561 1335 Carp::croak("incr_text cannot be called when the incremental parser already started parsing");
d5424315
DG
1336 }
1337 $_[0]->{_incr_parser}->{incr_text};
1338 }
1339 } if ( $] >= 5.006 );
1340
1341} # Setup for various Perl versions (the code from JSON::PP58)
1342
1343
1344###############################
1345# Utilities
1346#
1347
1348BEGIN {
1349 eval 'require Scalar::Util';
1350 unless($@){
1351 *JSON::PP::blessed = \&Scalar::Util::blessed;
1352 *JSON::PP::reftype = \&Scalar::Util::reftype;
05bd2440 1353 *JSON::PP::refaddr = \&Scalar::Util::refaddr;
d5424315 1354 }
dec273dc 1355 else{ # This code is from Scalar::Util.
d5424315
DG
1356 # warn $@;
1357 eval 'sub UNIVERSAL::a_sub_not_likely_to_be_here { ref($_[0]) }';
1358 *JSON::PP::blessed = sub {
1359 local($@, $SIG{__DIE__}, $SIG{__WARN__});
1360 ref($_[0]) ? eval { $_[0]->a_sub_not_likely_to_be_here } : undef;
1361 };
dec273dc 1362 require B;
d5424315
DG
1363 my %tmap = qw(
1364 B::NULL SCALAR
1365 B::HV HASH
1366 B::AV ARRAY
1367 B::CV CODE
1368 B::IO IO
1369 B::GV GLOB
1370 B::REGEXP REGEXP
1371 );
1372 *JSON::PP::reftype = sub {
1373 my $r = shift;
1374
1375 return undef unless length(ref($r));
1376
1377 my $t = ref(B::svref_2object($r));
1378
1379 return
1380 exists $tmap{$t} ? $tmap{$t}
1381 : length(ref($$r)) ? 'REF'
1382 : 'SCALAR';
1383 };
05bd2440
CBW
1384 *JSON::PP::refaddr = sub {
1385 return undef unless length(ref($_[0]));
1386
1387 my $addr;
1388 if(defined(my $pkg = blessed($_[0]))) {
1389 $addr .= bless $_[0], 'Scalar::Util::Fake';
1390 bless $_[0], $pkg;
1391 }
1392 else {
1393 $addr .= $_[0]
1394 }
1395
1396 $addr =~ /0x(\w+)/;
1397 local $^W;
1398 #no warnings 'portable';
1399 hex($1);
1400 }
d5424315
DG
1401 }
1402}
1403
1404
a1e5c561 1405# shamelessly copied and modified from JSON::XS code.
d5424315
DG
1406
1407$JSON::PP::true = do { bless \(my $dummy = 1), "JSON::PP::Boolean" };
1408$JSON::PP::false = do { bless \(my $dummy = 0), "JSON::PP::Boolean" };
1409
9c399ae2 1410sub is_bool { blessed $_[0] and $_[0]->isa("JSON::PP::Boolean"); }
d5424315
DG
1411
1412sub true { $JSON::PP::true }
1413sub false { $JSON::PP::false }
1414sub null { undef; }
1415
1416###############################
1417
d5424315
DG
1418package JSON::PP::IncrParser;
1419
1420use strict;
1421
1422use constant INCR_M_WS => 0; # initial whitespace skipping
1423use constant INCR_M_STR => 1; # inside string
1424use constant INCR_M_BS => 2; # inside backslash
1425use constant INCR_M_JSON => 3; # outside anything, count nesting
1426use constant INCR_M_C0 => 4;
1427use constant INCR_M_C1 => 5;
1428
1429$JSON::PP::IncrParser::VERSION = '1.01';
1430
d5424315
DG
1431sub new {
1432 my ( $class ) = @_;
1433
1434 bless {
1435 incr_nest => 0,
1436 incr_text => undef,
dec273dc
CBW
1437 incr_pos => 0,
1438 incr_mode => 0,
d5424315
DG
1439 }, $class;
1440}
1441
1442
1443sub incr_parse {
1444 my ( $self, $coder, $text ) = @_;
1445
1446 $self->{incr_text} = '' unless ( defined $self->{incr_text} );
1447
1448 if ( defined $text ) {
1449 if ( utf8::is_utf8( $text ) and !utf8::is_utf8( $self->{incr_text} ) ) {
1450 utf8::upgrade( $self->{incr_text} ) ;
1451 utf8::decode( $self->{incr_text} ) ;
1452 }
1453 $self->{incr_text} .= $text;
1454 }
1455
d5424315 1456 if ( defined wantarray ) {
dec273dc
CBW
1457 my $max_size = $coder->get_max_size;
1458 my $p = $self->{incr_pos};
1459 my @ret;
1460 {
d5424315 1461 do {
dec273dc
CBW
1462 unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) {
1463 $self->_incr_parse( $coder );
d5424315 1464
dec273dc
CBW
1465 if ( $max_size and $self->{incr_pos} > $max_size ) {
1466 Carp::croak("attempted decode of JSON text of $self->{incr_pos} bytes size, but max_size is set to $max_size");
1467 }
1468 unless ( $self->{incr_nest} <= 0 and $self->{incr_mode} == INCR_M_JSON ) {
1469 # as an optimisation, do not accumulate white space in the incr buffer
1470 if ( $self->{incr_mode} == INCR_M_WS and $self->{incr_pos} ) {
1471 $self->{incr_pos} = 0;
1472 $self->{incr_text} = '';
1473 }
1474 last;
1475 }
d5424315
DG
1476 }
1477
dec273dc
CBW
1478 my ($obj, $offset) = $coder->PP_decode_json( $self->{incr_text}, 0x00000001 );
1479 push @ret, $obj;
1480 use bytes;
1481 $self->{incr_text} = substr( $self->{incr_text}, $offset || 0 );
1482 $self->{incr_pos} = 0;
1483 $self->{incr_nest} = 0;
1484 $self->{incr_mode} = 0;
1485 last unless wantarray;
1486 } while ( wantarray );
1487 }
d5424315 1488
dec273dc 1489 if ( wantarray ) {
d5424315
DG
1490 return @ret;
1491 }
1492 else { # in scalar context
dec273dc 1493 return $ret[0] ? $ret[0] : undef;
d5424315 1494 }
d5424315 1495 }
d5424315
DG
1496}
1497
1498
1499sub _incr_parse {
dec273dc
CBW
1500 my ($self, $coder) = @_;
1501 my $text = $self->{incr_text};
d5424315 1502 my $len = length $text;
dec273dc 1503 my $p = $self->{incr_pos};
d5424315 1504
dec273dc 1505INCR_PARSE:
d5424315 1506 while ( $len > $p ) {
dec273dc
CBW
1507 my $s = substr( $text, $p, 1 );
1508 last INCR_PARSE unless defined $s;
1509 my $mode = $self->{incr_mode};
1510
1511 if ( $mode == INCR_M_WS ) {
1512 while ( $len > $p ) {
1513 $s = substr( $text, $p, 1 );
1514 last INCR_PARSE unless defined $s;
1515 if ( ord($s) > 0x20 ) {
1516 if ( $s eq '#' ) {
1517 $self->{incr_mode} = INCR_M_C0;
1518 redo INCR_PARSE;
1519 } else {
1520 $self->{incr_mode} = INCR_M_JSON;
1521 redo INCR_PARSE;
1522 }
1523 }
1524 $p++;
d5424315 1525 }
dec273dc
CBW
1526 } elsif ( $mode == INCR_M_BS ) {
1527 $p++;
1528 $self->{incr_mode} = INCR_M_STR;
1529 redo INCR_PARSE;
1530 } elsif ( $mode == INCR_M_C0 or $mode == INCR_M_C1 ) {
1531 while ( $len > $p ) {
1532 $s = substr( $text, $p, 1 );
1533 last INCR_PARSE unless defined $s;
1534 if ( $s eq "\n" ) {
1535 $self->{incr_mode} = $self->{incr_mode} == INCR_M_C0 ? INCR_M_WS : INCR_M_JSON;
d5424315
DG
1536 last;
1537 }
dec273dc 1538 $p++;
d5424315 1539 }
dec273dc
CBW
1540 next;
1541 } elsif ( $mode == INCR_M_STR ) {
1542 while ( $len > $p ) {
1543 $s = substr( $text, $p, 1 );
1544 last INCR_PARSE unless defined $s;
1545 if ( $s eq '"' ) {
1546 $p++;
1547 $self->{incr_mode} = INCR_M_JSON;
1548
1549 last INCR_PARSE unless $self->{incr_nest};
1550 redo INCR_PARSE;
d5424315 1551 }
dec273dc
CBW
1552 elsif ( $s eq '\\' ) {
1553 $p++;
1554 if ( !defined substr($text, $p, 1) ) {
1555 $self->{incr_mode} = INCR_M_BS;
1556 last INCR_PARSE;
1557 }
1558 }
1559 $p++;
d5424315 1560 }
dec273dc
CBW
1561 } elsif ( $mode == INCR_M_JSON ) {
1562 while ( $len > $p ) {
1563 $s = substr( $text, $p++, 1 );
1564 if ( $s eq "\x00" ) {
1565 $p--;
1566 last INCR_PARSE;
1567 } elsif ( $s eq "\x09" or $s eq "\x0a" or $s eq "\x0d" or $s eq "\x20" ) {
1568 if ( !$self->{incr_nest} ) {
1569 $p--; # do not eat the whitespace, let the next round do it
1570 last INCR_PARSE;
1571 }
1572 next;
1573 } elsif ( $s eq '"' ) {
1574 $self->{incr_mode} = INCR_M_STR;
1575 redo INCR_PARSE;
1576 } elsif ( $s eq '[' or $s eq '{' ) {
1577 if ( ++$self->{incr_nest} > $coder->get_max_depth ) {
1578 Carp::croak('json text or perl structure exceeds maximum nesting level (max_depth set too low?)');
1579 }
1580 next;
1581 } elsif ( $s eq ']' or $s eq '}' ) {
1582 if ( --$self->{incr_nest} <= 0 ) {
1583 last INCR_PARSE;
1584 }
1585 } elsif ( $s eq '#' ) {
1586 $self->{incr_mode} = INCR_M_C1;
1587 redo INCR_PARSE;
d5424315
DG
1588 }
1589 }
d5424315 1590 }
d5424315
DG
1591 }
1592
dec273dc
CBW
1593 $self->{incr_pos} = $p;
1594 $self->{incr_parsing} = $p ? 1 : 0; # for backward compatibility
d5424315
DG
1595}
1596
1597
1598sub incr_text {
dec273dc 1599 if ( $_[0]->{incr_pos} ) {
a1e5c561 1600 Carp::croak("incr_text cannot be called when the incremental parser already started parsing");
d5424315
DG
1601 }
1602 $_[0]->{incr_text};
1603}
1604
1605
1606sub incr_skip {
1607 my $self = shift;
dec273dc
CBW
1608 $self->{incr_text} = substr( $self->{incr_text}, $self->{incr_pos} );
1609 $self->{incr_pos} = 0;
1610 $self->{incr_mode} = 0;
1611 $self->{incr_nest} = 0;
d5424315
DG
1612}
1613
1614
1615sub incr_reset {
1616 my $self = shift;
1617 $self->{incr_text} = undef;
dec273dc 1618 $self->{incr_pos} = 0;
d5424315
DG
1619 $self->{incr_mode} = 0;
1620 $self->{incr_nest} = 0;
d5424315
DG
1621}
1622
1623###############################
1624
1625
16261;
1627__END__
1628=pod
1629
1630=head1 NAME
1631
1632JSON::PP - JSON::XS compatible pure-Perl module.
1633
1634=head1 SYNOPSIS
1635
1636 use JSON::PP;
1637
1638 # exported functions, they croak on error
1639 # and expect/generate UTF-8
1640
1641 $utf8_encoded_json_text = encode_json $perl_hash_or_arrayref;
1642 $perl_hash_or_arrayref = decode_json $utf8_encoded_json_text;
1643
1644 # OO-interface
1645
dec273dc 1646 $json = JSON::PP->new->ascii->pretty->allow_nonref;
d5424315 1647
dec273dc 1648 $pretty_printed_json_text = $json->encode( $perl_scalar );
d5424315
DG
1649 $perl_scalar = $json->decode( $json_text );
1650
d5424315
DG
1651 # Note that JSON version 2.0 and above will automatically use
1652 # JSON::XS or JSON::PP, so you should be able to just:
1653
1654 use JSON;
1655
1656
1657=head1 VERSION
1658
9bccc4c2 1659 2.97001
d5424315
DG
1660
1661=head1 DESCRIPTION
1662
dec273dc
CBW
1663JSON::PP is a pure perl JSON decoder/encoder (as of RFC4627, which
1664we know is obsolete but we still stick to; see below for an option
1665to support part of RFC7159), and (almost) compatible to much
1666faster L<JSON::XS> written by Marc Lehmann in C. JSON::PP works as
1667a fallback module when you use L<JSON> module without having
1668installed JSON::XS.
1669
1670Because of this fallback feature of JSON.pm, JSON::PP tries not to
1671be more JavaScript-friendly than JSON::XS (i.e. not to escape extra
1672characters such as U+2028 and U+2029 nor support RFC7159/ECMA-404),
1673in order for you not to lose such JavaScript-friendliness silently
1674when you use JSON.pm and install JSON::XS for speed or by accident.
1675If you need JavaScript-friendly RFC7159-compliant pure perl module,
1676try L<JSON::Tiny>, which is derived from L<Mojolicious> web
1677framework and is also smaller and faster than JSON::PP.
1678
1679JSON::PP has been in the Perl core since Perl 5.14, mainly for
1680CPAN toolchain modules to parse META.json.
d5424315
DG
1681
1682=head1 FUNCTIONAL INTERFACE
1683
dec273dc
CBW
1684This section is taken from JSON::XS almost verbatim. C<encode_json>
1685and C<decode_json> are exported by default.
d5424315
DG
1686
1687=head2 encode_json
1688
1689 $json_text = encode_json $perl_scalar
1690
dec273dc
CBW
1691Converts the given Perl data structure to a UTF-8 encoded, binary string
1692(that is, the string contains octets only). Croaks on error.
d5424315
DG
1693
1694This function call is functionally identical to:
1695
1696 $json_text = JSON::PP->new->utf8->encode($perl_scalar)
1697
dec273dc
CBW
1698Except being faster.
1699
d5424315
DG
1700=head2 decode_json
1701
1702 $perl_scalar = decode_json $json_text
1703
1704The opposite of C<encode_json>: expects an UTF-8 (binary) string and tries
1705to parse that as an UTF-8 encoded JSON text, returning the resulting
dec273dc 1706reference. Croaks on error.
d5424315
DG
1707
1708This function call is functionally identical to:
1709
1710 $perl_scalar = JSON::PP->new->utf8->decode($json_text)
1711
dec273dc
CBW
1712Except being faster.
1713
d5424315
DG
1714=head2 JSON::PP::is_bool
1715
1716 $is_boolean = JSON::PP::is_bool($scalar)
1717
1718Returns true if the passed scalar represents either JSON::PP::true or
1719JSON::PP::false, two constants that act like C<1> and C<0> respectively
1720and are also used to represent JSON C<true> and C<false> in Perl strings.
1721
d5424315
DG
1722See L<MAPPING>, below, for more information on how JSON values are mapped to
1723Perl.
1724
dec273dc 1725=head1 OBJECT-ORIENTED INTERFACE
d5424315 1726
dec273dc 1727This section is also taken from JSON::XS.
d5424315 1728
dec273dc
CBW
1729The object oriented interface lets you configure your own encoding or
1730decoding style, within the limits of supported formats.
d5424315
DG
1731
1732=head2 new
1733
7942a65c 1734 $json = JSON::PP->new
d5424315 1735
dec273dc
CBW
1736Creates a new JSON::PP object that can be used to de/encode JSON
1737strings. All boolean flags described below are by default I<disabled>.
d5424315 1738
dec273dc 1739The mutators for flags all return the JSON::PP object again and thus calls can
d5424315
DG
1740be chained:
1741
1742 my $json = JSON::PP->new->utf8->space_after->encode({a => [1,2]})
1743 => {"a": [1, 2]}
1744
1745=head2 ascii
1746
1747 $json = $json->ascii([$enable])
1748
1749 $enabled = $json->get_ascii
1750
dec273dc
CBW
1751If C<$enable> is true (or missing), then the C<encode> method will not
1752generate characters outside the code range C<0..127> (which is ASCII). Any
1753Unicode characters outside that range will be escaped using either a
1754single \uXXXX (BMP characters) or a double \uHHHH\uLLLLL escape sequence,
1755as per RFC4627. The resulting encoded JSON text can be treated as a native
1756Unicode string, an ascii-encoded, latin1-encoded or UTF-8 encoded string,
1757or any other superset of ASCII.
1758
1759If C<$enable> is false, then the C<encode> method will not escape Unicode
1760characters unless required by the JSON syntax or other flags. This results
1761in a faster and more compact format.
d5424315 1762
dec273dc 1763See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
d5424315 1764
dec273dc
CBW
1765The main use for this flag is to produce JSON texts that can be
1766transmitted over a 7-bit channel, as the encoded JSON texts will not
1767contain any 8 bit characters.
d5424315
DG
1768
1769 JSON::PP->new->ascii(1)->encode([chr 0x10401])
1770 => ["\ud801\udc01"]
1771
1772=head2 latin1
1773
1774 $json = $json->latin1([$enable])
1775
1776 $enabled = $json->get_latin1
1777
dec273dc
CBW
1778If C<$enable> is true (or missing), then the C<encode> method will encode
1779the resulting JSON text as latin1 (or iso-8859-1), escaping any characters
1780outside the code range C<0..255>. The resulting string can be treated as a
1781latin1-encoded JSON text or a native Unicode string. The C<decode> method
1782will not be affected in any way by this flag, as C<decode> by default
1783expects Unicode, which is a strict superset of latin1.
d5424315 1784
dec273dc
CBW
1785If C<$enable> is false, then the C<encode> method will not escape Unicode
1786characters unless required by the JSON syntax or other flags.
d5424315 1787
dec273dc
CBW
1788See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
1789
1790The main use for this flag is efficiently encoding binary data as JSON
1791text, as most octets will not be escaped, resulting in a smaller encoded
1792size. The disadvantage is that the resulting JSON text is encoded
1793in latin1 (and must correctly be treated as such when storing and
1794transferring), a rare encoding for JSON. It is therefore most useful when
1795you want to store data structures known to contain binary data efficiently
1796in files or databases, not when talking to other JSON encoders/decoders.
d5424315 1797
dec273dc
CBW
1798 JSON::PP->new->latin1->encode (["\x{89}\x{abc}"]
1799 => ["\x{89}\\u0abc"] # (perl syntax, U+abc escaped, U+89 not)
d5424315
DG
1800
1801=head2 utf8
1802
1803 $json = $json->utf8([$enable])
1804
1805 $enabled = $json->get_utf8
1806
dec273dc
CBW
1807If C<$enable> is true (or missing), then the C<encode> method will encode
1808the JSON result into UTF-8, as required by many protocols, while the
1809C<decode> method expects to be handled an UTF-8-encoded string. Please
1810note that UTF-8-encoded strings do not contain any characters outside the
1811range C<0..255>, they are thus useful for bytewise/binary I/O. In future
1812versions, enabling this option might enable autodetection of the UTF-16
1813and UTF-32 encoding families, as described in RFC4627.
d5424315 1814
dec273dc
CBW
1815If C<$enable> is false, then the C<encode> method will return the JSON
1816string as a (non-encoded) Unicode string, while C<decode> expects thus a
1817Unicode string. Any decoding or encoding (e.g. to UTF-8 or UTF-16) needs
1818to be done yourself, e.g. using the Encode module.
d5424315 1819
dec273dc 1820See also the section I<ENCODING/CODESET FLAG NOTES> later in this document.
d5424315
DG
1821
1822Example, output UTF-16BE-encoded JSON:
1823
1824 use Encode;
1825 $jsontext = encode "UTF-16BE", JSON::PP->new->encode ($object);
1826
1827Example, decode UTF-32LE-encoded JSON:
1828
1829 use Encode;
1830 $object = JSON::PP->new->decode (decode "UTF-32LE", $jsontext);
1831
d5424315
DG
1832=head2 pretty
1833
1834 $json = $json->pretty([$enable])
1835
1836This enables (or disables) all of the C<indent>, C<space_before> and
dec273dc
CBW
1837C<space_after> (and in the future possibly more) flags in one call to
1838generate the most readable (or most compact) form possible.
d5424315
DG
1839
1840=head2 indent
1841
1842 $json = $json->indent([$enable])
1843
1844 $enabled = $json->get_indent
1845
dec273dc
CBW
1846If C<$enable> is true (or missing), then the C<encode> method will use a multiline
1847format as output, putting every array member or object/hash key-value pair
1848into its own line, indenting them properly.
1849
1850If C<$enable> is false, no newlines or indenting will be produced, and the
1851resulting JSON text is guaranteed not to contain any C<newlines>.
1852
1853This setting has no effect when decoding JSON texts.
1854
d5424315
DG
1855The default indent space length is three.
1856You can use C<indent_length> to change the length.
1857
1858=head2 space_before
1859
1860 $json = $json->space_before([$enable])
1861
1862 $enabled = $json->get_space_before
1863
1864If C<$enable> is true (or missing), then the C<encode> method will add an extra
1865optional space before the C<:> separating keys from values in JSON objects.
1866
1867If C<$enable> is false, then the C<encode> method will not add any extra
1868space at those places.
1869
dec273dc
CBW
1870This setting has no effect when decoding JSON texts. You will also
1871most likely combine this setting with C<space_after>.
d5424315
DG
1872
1873Example, space_before enabled, space_after and indent disabled:
1874
1875 {"key" :"value"}
1876
1877=head2 space_after
1878
1879 $json = $json->space_after([$enable])
1880
1881 $enabled = $json->get_space_after
1882
1883If C<$enable> is true (or missing), then the C<encode> method will add an extra
1884optional space after the C<:> separating keys from values in JSON objects
1885and extra whitespace after the C<,> separating key-value pairs and array
1886members.
1887
1888If C<$enable> is false, then the C<encode> method will not add any extra
1889space at those places.
1890
1891This setting has no effect when decoding JSON texts.
1892
1893Example, space_before and indent disabled, space_after enabled:
1894
1895 {"key": "value"}
1896
1897=head2 relaxed
1898
1899 $json = $json->relaxed([$enable])
1900
1901 $enabled = $json->get_relaxed
1902
1903If C<$enable> is true (or missing), then C<decode> will accept some
1904extensions to normal JSON syntax (see below). C<encode> will not be
1905affected in anyway. I<Be aware that this option makes you accept invalid
1906JSON texts as if they were valid!>. I suggest only to use this option to
1907parse application-specific files written by humans (configuration files,
1908resource files etc.)
1909
1910If C<$enable> is false (the default), then C<decode> will only accept
1911valid JSON texts.
1912
1913Currently accepted extensions are:
1914
1915=over 4
1916
1917=item * list items can have an end-comma
1918
1919JSON I<separates> array elements and key-value pairs with commas. This
1920can be annoying if you write JSON texts manually and want to be able to
1921quickly append elements, so this extension accepts comma at the end of
1922such items not just between them:
1923
1924 [
1925 1,
1926 2, <- this comma not normally allowed
1927 ]
1928 {
1929 "k1": "v1",
1930 "k2": "v2", <- this comma not normally allowed
1931 }
1932
1933=item * shell-style '#'-comments
1934
1935Whenever JSON allows whitespace, shell-style comments are additionally
1936allowed. They are terminated by the first carriage-return or line-feed
1937character, after which more white-space and comments are allowed.
1938
1939 [
1940 1, # this comment not allowed in JSON
1941 # neither this one...
1942 ]
1943
dec273dc
CBW
1944=item * C-style multiple-line '/* */'-comments (JSON::PP only)
1945
1946Whenever JSON allows whitespace, C-style multiple-line comments are additionally
1947allowed. Everything between C</*> and C<*/> is a comment, after which
1948more white-space and comments are allowed.
1949
1950 [
1951 1, /* this comment not allowed in JSON */
1952 /* neither this one... */
1953 ]
1954
1955=item * C++-style one-line '//'-comments (JSON::PP only)
1956
1957Whenever JSON allows whitespace, C++-style one-line comments are additionally
1958allowed. They are terminated by the first carriage-return or line-feed
1959character, after which more white-space and comments are allowed.
1960
1961 [
1962 1, // this comment not allowed in JSON
1963 // neither this one...
1964 ]
1965
d5424315
DG
1966=back
1967
1968=head2 canonical
1969
1970 $json = $json->canonical([$enable])
1971
1972 $enabled = $json->get_canonical
1973
1974If C<$enable> is true (or missing), then the C<encode> method will output JSON objects
1975by sorting their keys. This is adding a comparatively high overhead.
1976
1977If C<$enable> is false, then the C<encode> method will output key-value
1978pairs in the order Perl stores them (which will likely change between runs
dec273dc
CBW
1979of the same script, and can change even within the same run from 5.18
1980onwards).
d5424315
DG
1981
1982This option is useful if you want the same data structure to be encoded as
1983the same JSON text (given the same overall settings). If it is disabled,
1984the same hash might be encoded differently even if contains the same data,
1985as key-value pairs have no inherent ordering in Perl.
1986
1987This setting has no effect when decoding JSON texts.
1988
dec273dc 1989This setting has currently no effect on tied hashes.
d5424315
DG
1990
1991=head2 allow_nonref
1992
1993 $json = $json->allow_nonref([$enable])
1994
1995 $enabled = $json->get_allow_nonref
1996
1997If C<$enable> is true (or missing), then the C<encode> method can convert a
1998non-reference into its corresponding string, number or null JSON value,
1999which is an extension to RFC4627. Likewise, C<decode> will accept those JSON
2000values instead of croaking.
2001
2002If C<$enable> is false, then the C<encode> method will croak if it isn't
2003passed an arrayref or hashref, as JSON texts must either be an object
2004or array. Likewise, C<decode> will croak if given something that is not a
2005JSON object or array.
2006
dec273dc
CBW
2007Example, encode a Perl scalar as JSON value with enabled C<allow_nonref>,
2008resulting in an invalid JSON text:
2009
d5424315
DG
2010 JSON::PP->new->allow_nonref->encode ("Hello, World!")
2011 => "Hello, World!"
2012
2013=head2 allow_unknown
2014
2015 $json = $json->allow_unknown ([$enable])
2016
2017 $enabled = $json->get_allow_unknown
2018
dec273dc 2019If C<$enable> is true (or missing), then C<encode> will I<not> throw an
d5424315 2020exception when it encounters values it cannot represent in JSON (for
dec273dc
CBW
2021example, filehandles) but instead will encode a JSON C<null> value. Note
2022that blessed objects are not included here and are handled separately by
2023c<allow_blessed>.
d5424315 2024
dec273dc 2025If C<$enable> is false (the default), then C<encode> will throw an
d5424315
DG
2026exception when it encounters anything it cannot encode as JSON.
2027
dec273dc
CBW
2028This option does not affect C<decode> in any way, and it is recommended to
2029leave it off unless you know your communications partner.
d5424315
DG
2030
2031=head2 allow_blessed
2032
2033 $json = $json->allow_blessed([$enable])
2034
2035 $enabled = $json->get_allow_blessed
2036
dec273dc
CBW
2037See L<OBJECT SERIALISATION> for details.
2038
d5424315 2039If C<$enable> is true (or missing), then the C<encode> method will not
dec273dc
CBW
2040barf when it encounters a blessed reference that it cannot convert
2041otherwise. Instead, a JSON C<null> value is encoded instead of the object.
d5424315
DG
2042
2043If C<$enable> is false (the default), then C<encode> will throw an
dec273dc
CBW
2044exception when it encounters a blessed object that it cannot convert
2045otherwise.
2046
2047This setting has no effect on C<decode>.
d5424315
DG
2048
2049=head2 convert_blessed
2050
2051 $json = $json->convert_blessed([$enable])
2052
2053 $enabled = $json->get_convert_blessed
2054
dec273dc
CBW
2055See L<OBJECT SERIALISATION> for details.
2056
d5424315
DG
2057If C<$enable> is true (or missing), then C<encode>, upon encountering a
2058blessed object, will check for the availability of the C<TO_JSON> method
dec273dc
CBW
2059on the object's class. If found, it will be called in scalar context and
2060the resulting scalar will be encoded instead of the object.
d5424315
DG
2061
2062The C<TO_JSON> method may safely call die if it wants. If C<TO_JSON>
2063returns other blessed objects, those will be handled in the same
2064way. C<TO_JSON> must take care of not causing an endless recursion cycle
2065(== crash) in this case. The name of C<TO_JSON> was chosen because other
2066methods called by the Perl core (== not by the user of the object) are
dec273dc 2067usually in upper case letters and to avoid collisions with any C<to_json>
d5424315
DG
2068function or method.
2069
dec273dc
CBW
2070If C<$enable> is false (the default), then C<encode> will not consider
2071this type of conversion.
d5424315 2072
dec273dc 2073This setting has no effect on C<decode>.
d5424315
DG
2074
2075=head2 filter_json_object
2076
2077 $json = $json->filter_json_object([$coderef])
2078
2079When C<$coderef> is specified, it will be called from C<decode> each
dec273dc
CBW
2080time it decodes a JSON object. The only argument is a reference to the
2081newly-created hash. If the code references returns a single scalar (which
2082need not be a reference), this value (i.e. a copy of that scalar to avoid
2083aliasing) is inserted into the deserialised data structure. If it returns
2084an empty list (NOTE: I<not> C<undef>, which is a valid scalar), the
2085original deserialised hash will be inserted. This setting can slow down
2086decoding considerably.
d5424315
DG
2087
2088When C<$coderef> is omitted or undefined, any existing callback will
2089be removed and C<decode> will not change the deserialised hash in any
2090way.
2091
2092Example, convert all JSON objects into the integer 5:
2093
2094 my $js = JSON::PP->new->filter_json_object (sub { 5 });
2095 # returns [5]
2096 $js->decode ('[{}]'); # the given subroutine takes a hash reference.
2097 # throw an exception because allow_nonref is not enabled
2098 # so a lone 5 is not allowed.
2099 $js->decode ('{"a":1, "b":2}');
2100
2101=head2 filter_json_single_key_object
2102
2103 $json = $json->filter_json_single_key_object($key [=> $coderef])
2104
2105Works remotely similar to C<filter_json_object>, but is only called for
2106JSON objects having a single key named C<$key>.
2107
2108This C<$coderef> is called before the one specified via
2109C<filter_json_object>, if any. It gets passed the single value in the JSON
2110object. If it returns a single value, it will be inserted into the data
2111structure. If it returns nothing (not even C<undef> but the empty list),
2112the callback from C<filter_json_object> will be called next, as if no
2113single-key callback were specified.
2114
2115If C<$coderef> is omitted or undefined, the corresponding callback will be
2116disabled. There can only ever be one callback for a given key.
2117
2118As this callback gets called less often then the C<filter_json_object>
2119one, decoding speed will not usually suffer as much. Therefore, single-key
2120objects make excellent targets to serialise Perl objects into, especially
2121as single-key JSON objects are as close to the type-tagged value concept
2122as JSON gets (it's basically an ID/VALUE tuple). Of course, JSON does not
2123support this in any way, so you need to make sure your data never looks
2124like a serialised Perl hash.
2125
2126Typical names for the single object key are C<__class_whatever__>, or
2127C<$__dollars_are_rarely_used__$> or C<}ugly_brace_placement>, or even
2128things like C<__class_md5sum(classname)__>, to reduce the risk of clashing
2129with real hashes.
2130
2131Example, decode JSON objects of the form C<< { "__widget__" => <id> } >>
2132into the corresponding C<< $WIDGET{<id>} >> object:
2133
2134 # return whatever is in $WIDGET{5}:
2135 JSON::PP
2136 ->new
2137 ->filter_json_single_key_object (__widget__ => sub {
2138 $WIDGET{ $_[0] }
2139 })
2140 ->decode ('{"__widget__": 5')
2141
2142 # this can be used with a TO_JSON method in some "widget" class
2143 # for serialisation to json:
2144 sub WidgetBase::TO_JSON {
2145 my ($self) = @_;
2146
2147 unless ($self->{id}) {
2148 $self->{id} = ..get..some..id..;
2149 $WIDGET{$self->{id}} = $self;
2150 }
2151
2152 { __widget__ => $self->{id} }
2153 }
2154
2155=head2 shrink
2156
2157 $json = $json->shrink([$enable])
2158
2159 $enabled = $json->get_shrink
2160
dec273dc
CBW
2161If C<$enable> is true (or missing), the string returned by C<encode> will
2162be shrunk (i.e. downgraded if possible).
d5424315 2163
dec273dc
CBW
2164The actual definition of what shrink does might change in future versions,
2165but it will always try to save space at the expense of time.
d5424315 2166
dec273dc 2167If C<$enable> is false, then JSON::PP does nothing.
d5424315
DG
2168
2169=head2 max_depth
2170
2171 $json = $json->max_depth([$maximum_nesting_depth])
2172
2173 $max_depth = $json->get_max_depth
2174
2175Sets the maximum nesting level (default C<512>) accepted while encoding
2176or decoding. If a higher nesting level is detected in JSON text or a Perl
2177data structure, then the encoder and decoder will stop and croak at that
2178point.
2179
2180Nesting level is defined by number of hash- or arrayrefs that the encoder
2181needs to traverse to reach a given point or the number of C<{> or C<[>
2182characters without their matching closing parenthesis crossed to reach a
2183given character in a string.
2184
dec273dc
CBW
2185Setting the maximum depth to one disallows any nesting, so that ensures
2186that the object is only a single hash/object or array.
2187
d5424315
DG
2188If no argument is given, the highest possible setting will be used, which
2189is rarely useful.
2190
a1e5c561 2191See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful.
d5424315 2192
d5424315
DG
2193=head2 max_size
2194
2195 $json = $json->max_size([$maximum_string_size])
2196
2197 $max_size = $json->get_max_size
2198
2199Set the maximum length a JSON text may have (in bytes) where decoding is
2200being attempted. The default is C<0>, meaning no limit. When C<decode>
2201is called on a string that is longer then this many bytes, it will not
2202attempt to decode the string but throw an exception. This setting has no
2203effect on C<encode> (yet).
2204
2205If no argument is given, the limit check will be deactivated (same as when
2206C<0> is specified).
2207
a1e5c561 2208See L<JSON::XS/SECURITY CONSIDERATIONS> for more info on why this is useful.
d5424315
DG
2209
2210=head2 encode
2211
2212 $json_text = $json->encode($perl_scalar)
2213
dec273dc
CBW
2214Converts the given Perl value or data structure to its JSON
2215representation. Croaks on error.
d5424315
DG
2216
2217=head2 decode
2218
2219 $perl_scalar = $json->decode($json_text)
2220
2221The opposite of C<encode>: expects a JSON text and tries to parse it,
2222returning the resulting simple scalar or reference. Croaks on error.
2223
d5424315
DG
2224=head2 decode_prefix
2225
2226 ($perl_scalar, $characters) = $json->decode_prefix($json_text)
2227
2228This works like the C<decode> method, but instead of raising an exception
2229when there is trailing garbage after the first JSON object, it will
2230silently stop parsing there and return the number of characters consumed
2231so far.
2232
dec273dc
CBW
2233This is useful if your JSON texts are not delimited by an outer protocol
2234and you need to know where the JSON text ends.
2235
2236 JSON::PP->new->decode_prefix ("[1] the tail")
2237 => ([1], 3)
2238
2239=head1 FLAGS FOR JSON::PP ONLY
2240
2241The following flags and properties are for JSON::PP only. If you use
2242any of these, you can't make your application run faster by replacing
2243JSON::PP with JSON::XS. If you need these and also speed boost,
2244try L<Cpanel::JSON::XS>, a fork of JSON::XS by Reini Urban, which
2245supports some of these.
2246
2247=head2 allow_singlequote
2248
2249 $json = $json->allow_singlequote([$enable])
2250 $enabled = $json->get_allow_singlequote
2251
2252If C<$enable> is true (or missing), then C<decode> will accept
2253invalid JSON texts that contain strings that begin and end with
2254single quotation marks. C<encode> will not be affected in anyway.
2255I<Be aware that this option makes you accept invalid JSON texts
2256as if they were valid!>. I suggest only to use this option to
2257parse application-specific files written by humans (configuration
2258files, resource files etc.)
2259
2260If C<$enable> is false (the default), then C<decode> will only accept
2261valid JSON texts.
2262
2263 $json->allow_singlequote->decode(qq|{"foo":'bar'}|);
2264 $json->allow_singlequote->decode(qq|{'foo':"bar"}|);
2265 $json->allow_singlequote->decode(qq|{'foo':'bar'}|);
2266
2267=head2 allow_barekey
2268
2269 $json = $json->allow_barekey([$enable])
2270 $enabled = $json->get_allow_barekey
2271
2272If C<$enable> is true (or missing), then C<decode> will accept
2273invalid JSON texts that contain JSON objects whose names don't
2274begin and end with quotation marks. C<encode> will not be affected
2275in anyway. I<Be aware that this option makes you accept invalid JSON
2276texts as if they were valid!>. I suggest only to use this option to
2277parse application-specific files written by humans (configuration
2278files, resource files etc.)
2279
2280If C<$enable> is false (the default), then C<decode> will only accept
2281valid JSON texts.
2282
2283 $json->allow_barekey->decode(qq|{foo:"bar"}|);
2284
2285=head2 allow_bignum
2286
2287 $json = $json->allow_bignum([$enable])
2288 $enabled = $json->get_allow_bignum
2289
2290If C<$enable> is true (or missing), then C<decode> will convert
2291big integers Perl cannot handle as integer into L<Math::BigInt>
2292objects and convert floating numbers into L<Math::BigFloat>
2293objects. C<encode> will convert C<Math::BigInt> and C<Math::BigFloat>
2294objects into JSON numbers.
2295
2296 $json->allow_nonref->allow_bignum;
2297 $bigfloat = $json->decode('2.000000000000000000000000001');
2298 print $json->encode($bigfloat);
2299 # => 2.000000000000000000000000001
2300
2301See also L<MAPPING>.
2302
2303=head2 loose
2304
2305 $json = $json->loose([$enable])
2306 $enabled = $json->get_loose
2307
2308If C<$enable> is true (or missing), then C<decode> will accept
2309invalid JSON texts that contain unescaped [\x00-\x1f\x22\x5c]
2310characters. C<encode> will not be affected in anyway.
2311I<Be aware that this option makes you accept invalid JSON texts
2312as if they were valid!>. I suggest only to use this option to
2313parse application-specific files written by humans (configuration
2314files, resource files etc.)
2315
2316If C<$enable> is false (the default), then C<decode> will only accept
2317valid JSON texts.
2318
2319 $json->loose->decode(qq|["abc
2320 def"]|);
2321
2322=head2 escape_slash
2323
2324 $json = $json->escape_slash([$enable])
2325 $enabled = $json->get_escape_slash
2326
2327If C<$enable> is true (or missing), then C<encode> will explicitly
2328escape I<slash> (solidus; C<U+002F>) characters to reduce the risk of
2329XSS (cross site scripting) that may be caused by C<< </script> >>
2330in a JSON text, with the cost of bloating the size of JSON texts.
2331
2332This option may be useful when you embed JSON in HTML, but embedding
2333arbitrary JSON in HTML (by some HTML template toolkit or by string
2334interpolation) is risky in general. You must escape necessary
2335characters in correct order, depending on the context.
2336
2337C<decode> will not be affected in anyway.
2338
2339=head2 indent_length
2340
2341 $json = $json->indent_length($number_of_spaces)
2342 $length = $json->get_indent_length
2343
2344This option is only useful when you also enable C<indent> or C<pretty>.
2345
2346JSON::XS indents with three spaces when you C<encode> (if requested
2347by C<indent> or C<pretty>), and the number cannot be changed.
2348JSON::PP allows you to change/get the number of indent spaces with these
2349mutator/accessor. The default number of spaces is three (the same as
2350JSON::XS), and the acceptable range is from C<0> (no indentation;
2351it'd be better to disable indentation by C<indent(0)>) to C<15>.
2352
2353=head2 sort_by
2354
2355 $json = $json->sort_by($code_ref)
2356 $json = $json->sort_by($subroutine_name)
2357
2358If you just want to sort keys (names) in JSON objects when you
2359C<encode>, enable C<canonical> option (see above) that allows you to
2360sort object keys alphabetically.
2361
2362If you do need to sort non-alphabetically for whatever reasons,
2363you can give a code reference (or a subroutine name) to C<sort_by>,
2364then the argument will be passed to Perl's C<sort> built-in function.
2365
2366As the sorting is done in the JSON::PP scope, you usually need to
2367prepend C<JSON::PP::> to the subroutine name, and the special variables
2368C<$a> and C<$b> used in the subrontine used by C<sort> function.
2369
2370Example:
2371
2372 my %ORDER = (id => 1, class => 2, name => 3);
2373 $json->sort_by(sub {
2374 ($ORDER{$JSON::PP::a} // 999) <=> ($ORDER{$JSON::PP::b} // 999)
2375 or $JSON::PP::a cmp $JSON::PP::b
2376 });
2377 print $json->encode([
2378 {name => 'CPAN', id => 1, href => 'http://cpan.org'}
2379 ]);
2380 # [{"id":1,"name":"CPAN","href":"http://cpan.org"}]
2381
2382Note that C<sort_by> affects all the plain hashes in the data structure.
2383If you need finer control, C<tie> necessary hashes with a module that
2384implements ordered hash (such as L<Hash::Ordered> and L<Tie::IxHash>).
2385C<canonical> and C<sort_by> don't affect the key order in C<tie>d
2386hashes.
2387
2388 use Hash::Ordered;
2389 tie my %hash, 'Hash::Ordered',
2390 (name => 'CPAN', id => 1, href => 'http://cpan.org');
2391 print $json->encode([\%hash]);
2392 # [{"name":"CPAN","id":1,"href":"http://cpan.org"}] # order is kept
d5424315
DG
2393
2394=head1 INCREMENTAL PARSING
2395
dec273dc 2396This section is also taken from JSON::XS.
d5424315 2397
dec273dc
CBW
2398In some cases, there is the need for incremental parsing of JSON
2399texts. While this module always has to keep both JSON text and resulting
2400Perl data structure in memory at one time, it does allow you to parse a
2401JSON stream incrementally. It does so by accumulating text until it has
2402a full JSON object, which it then can decode. This process is similar to
2403using C<decode_prefix> to see if a full JSON object is available, but
2404is much more efficient (and can be implemented with a minimum of method
2405calls).
d5424315 2406
dec273dc 2407JSON::PP will only attempt to parse the JSON text once it is sure it
d5424315
DG
2408has enough text to get a decisive result, using a very simple but
2409truly incremental parser. This means that it sometimes won't stop as
dec273dc
CBW
2410early as the full parser, for example, it doesn't detect mismatched
2411parentheses. The only thing it guarantees is that it starts decoding as
d5424315
DG
2412soon as a syntactically valid JSON text has been seen. This means you need
2413to set resource limits (e.g. C<max_size>) to ensure the parser will stop
2414parsing in the presence if syntax errors.
2415
2416The following methods implement this incremental parser.
2417
2418=head2 incr_parse
2419
2420 $json->incr_parse( [$string] ) # void context
2421
2422 $obj_or_undef = $json->incr_parse( [$string] ) # scalar context
2423
2424 @obj_or_empty = $json->incr_parse( [$string] ) # list context
2425
2426This is the central parsing function. It can both append new text and
2427extract objects from the stream accumulated so far (both of these
2428functions are optional).
2429
2430If C<$string> is given, then this string is appended to the already
2431existing JSON fragment stored in the C<$json> object.
2432
2433After that, if the function is called in void context, it will simply
2434return without doing anything further. This can be used to add more text
2435in as many chunks as you want.
2436
2437If the method is called in scalar context, then it will try to extract
2438exactly I<one> JSON object. If that is successful, it will return this
2439object, otherwise it will return C<undef>. If there is a parse error,
2440this method will croak just as C<decode> would do (one can then use
a1e5c561 2441C<incr_skip> to skip the erroneous part). This is the most common way of
d5424315
DG
2442using the method.
2443
2444And finally, in list context, it will try to extract as many objects
2445from the stream as it can find and return them, or the empty list
dec273dc
CBW
2446otherwise. For this to work, there must be no separators (other than
2447whitespace) between the JSON objects or arrays, instead they must be
2448concatenated back-to-back. If an error occurs, an exception will be
2449raised as in the scalar context case. Note that in this case, any
2450previously-parsed JSON texts will be lost.
d5424315 2451
dec273dc
CBW
2452Example: Parse some JSON arrays/objects in a given string and return
2453them.
d5424315 2454
dec273dc 2455 my @objs = JSON::PP->new->incr_parse ("[5][7][1,2]");
d5424315
DG
2456
2457=head2 incr_text
2458
2459 $lvalue_string = $json->incr_text
2460
2461This method returns the currently stored JSON fragment as an lvalue, that
2462is, you can manipulate it. This I<only> works when a preceding call to
2463C<incr_parse> in I<scalar context> successfully returned an object. Under
2464all other circumstances you must not call this function (I mean it.
2465although in simple tests it might actually work, it I<will> fail under
2466real world conditions). As a special exception, you can also call this
2467method before having parsed anything.
2468
dec273dc
CBW
2469That means you can only use this function to look at or manipulate text
2470before or after complete JSON objects, not while the parser is in the
2471middle of parsing a JSON object.
2472
d5424315
DG
2473This function is useful in two cases: a) finding the trailing text after a
2474JSON object or b) parsing multiple JSON objects separated by non-JSON text
2475(such as commas).
2476
d5424315
DG
2477=head2 incr_skip
2478
2479 $json->incr_skip
2480
dec273dc
CBW
2481This will reset the state of the incremental parser and will remove
2482the parsed text from the input buffer so far. This is useful after
2483C<incr_parse> died, in which case the input buffer and incremental parser
2484state is left unchanged, to skip the text parsed so far and to reset the
2485parse state.
2486
2487The difference to C<incr_reset> is that only text until the parse error
2488occurred is removed.
d5424315
DG
2489
2490=head2 incr_reset
2491
2492 $json->incr_reset
2493
2494This completely resets the incremental parser, that is, after this call,
2495it will be as if the parser had never parsed anything.
2496
a1e5c561 2497This is useful if you want to repeatedly parse JSON objects and want to
d5424315
DG
2498ignore any trailing data, which means you have to reset the parser after
2499each successful decode.
2500
d5424315
DG
2501=head1 MAPPING
2502
dec273dc
CBW
2503Most of this section is also taken from JSON::XS.
2504
2505This section describes how JSON::PP maps Perl values to JSON values and
2506vice versa. These mappings are designed to "do the right thing" in most
2507circumstances automatically, preserving round-tripping characteristics
2508(what you put in comes out as something equivalent).
d5424315 2509
dec273dc
CBW
2510For the more enlightened: note that in the following descriptions,
2511lowercase I<perl> refers to the Perl interpreter, while uppercase I<Perl>
2512refers to the abstract Perl language itself.
d5424315
DG
2513
2514=head2 JSON -> PERL
2515
2516=over 4
2517
2518=item object
2519
2520A JSON object becomes a reference to a hash in Perl. No ordering of object
dec273dc 2521keys is preserved (JSON does not preserve object key ordering itself).
d5424315
DG
2522
2523=item array
2524
2525A JSON array becomes a reference to an array in Perl.
2526
2527=item string
2528
2529A JSON string becomes a string scalar in Perl - Unicode codepoints in JSON
2530are represented by the same codepoints in the Perl string, so no manual
2531decoding is necessary.
2532
2533=item number
2534
2535A JSON number becomes either an integer, numeric (floating point) or
2536string scalar in perl, depending on its range and any fractional parts. On
2537the Perl level, there is no difference between those as Perl handles all
2538the conversion details, but an integer may take slightly less memory and
2539might represent more values exactly than floating point numbers.
2540
dec273dc 2541If the number consists of digits only, JSON::PP will try to represent
d5424315
DG
2542it as an integer value. If that fails, it will try to represent it as
2543a numeric (floating point) value if that is possible without loss of
2544precision. Otherwise it will preserve the number as a string value (in
2545which case you lose roundtripping ability, as the JSON number will be
a1e5c561 2546re-encoded to a JSON string).
d5424315
DG
2547
2548Numbers containing a fractional or exponential part will always be
2549represented as numeric (floating point) values, possibly at a loss of
2550precision (in which case you might lose perfect roundtripping ability, but
2551the JSON number will still be re-encoded as a JSON number).
2552
2553Note that precision is not accuracy - binary floating point values cannot
2554represent most decimal fractions exactly, and when converting from and to
dec273dc 2555floating point, JSON::PP only guarantees precision up to but not including
a1e5c561 2556the least significant bit.
d5424315 2557
dec273dc
CBW
2558When C<allow_bignum> is enabled, big integer values and any numeric
2559values will be converted into L<Math::BigInt> and L<Math::BigFloat>
2560objects respectively, without becoming string scalars or losing
2561precision.
d5424315
DG
2562
2563=item true, false
2564
2565These JSON atoms become C<JSON::PP::true> and C<JSON::PP::false>,
2566respectively. They are overloaded to act almost exactly like the numbers
a1e5c561 2567C<1> and C<0>. You can check whether a scalar is a JSON boolean by using
dec273dc 2568the C<JSON::PP::is_bool> function.
d5424315
DG
2569
2570=item null
2571
2572A JSON null atom becomes C<undef> in Perl.
2573
dec273dc
CBW
2574=item shell-style comments (C<< # I<text> >>)
2575
2576As a nonstandard extension to the JSON syntax that is enabled by the
2577C<relaxed> setting, shell-style comments are allowed. They can start
2578anywhere outside strings and go till the end of the line.
d5424315
DG
2579
2580=back
2581
2582
2583=head2 PERL -> JSON
2584
2585The mapping from Perl to JSON is slightly more difficult, as Perl is a
2586truly typeless language, so we can only guess which JSON type is meant by
2587a Perl value.
2588
2589=over 4
2590
2591=item hash references
2592
dec273dc
CBW
2593Perl hash references become JSON objects. As there is no inherent
2594ordering in hash keys (or JSON objects), they will usually be encoded
2595in a pseudo-random order. JSON::PP can optionally sort the hash keys
2596(determined by the I<canonical> flag and/or I<sort_by> property), so
2597the same data structure will serialise to the same JSON text (given
2598same settings and version of JSON::PP), but this incurs a runtime
2599overhead and is only rarely useful, e.g. when you want to compare some
2600JSON text against another for equality.
d5424315
DG
2601
2602=item array references
2603
2604Perl array references become JSON arrays.
2605
2606=item other references
2607
2608Other unblessed references are generally not allowed and will cause an
2609exception to be thrown, except for references to the integers C<0> and
2610C<1>, which get turned into C<false> and C<true> atoms in JSON. You can
dec273dc
CBW
2611also use C<JSON::PP::false> and C<JSON::PP::true> to improve
2612readability.
d5424315 2613
dec273dc 2614 to_json [\0, JSON::PP::true] # yields [false,true]
d5424315 2615
dec273dc 2616=item JSON::PP::true, JSON::PP::false
d5424315
DG
2617
2618These special values become JSON true and JSON false values,
2619respectively. You can also use C<\1> and C<\0> directly if you want.
2620
dec273dc 2621=item JSON::PP::null
d5424315 2622
dec273dc 2623This special value becomes JSON null.
d5424315 2624
dec273dc 2625=item blessed objects
d5424315 2626
dec273dc
CBW
2627Blessed objects are not directly representable in JSON, but C<JSON::PP>
2628allows various ways of handling objects. See L<OBJECT SERIALISATION>,
2629below, for details.
d5424315
DG
2630
2631=item simple scalars
2632
2633Simple Perl scalars (any scalar that is not a reference) are the most
dec273dc 2634difficult objects to encode: JSON::PP will encode undefined scalars as
d5424315
DG
2635JSON C<null> values, scalars that have last been used in a string context
2636before encoding as JSON strings, and anything else as number value:
2637
2638 # dump as number
2639 encode_json [2] # yields [2]
2640 encode_json [-3.0e17] # yields [-3e+17]
2641 my $value = 5; encode_json [$value] # yields [5]
2642
2643 # used as string, so dump as string
2644 print $value;
2645 encode_json [$value] # yields ["5"]
2646
2647 # undef becomes null
2648 encode_json [undef] # yields [null]
2649
2650You can force the type to be a string by stringifying it:
2651
2652 my $x = 3.1; # some variable containing a number
2653 "$x"; # stringified
2654 $x .= ""; # another, more awkward way to stringify
2655 print $x; # perl does it for you, too, quite often
dec273dc 2656 # (but for older perls)
d5424315
DG
2657
2658You can force the type to be a number by numifying it:
2659
2660 my $x = "3"; # some variable containing a string
2661 $x += 0; # numify it, ensuring it will be dumped as a number
a1e5c561 2662 $x *= 1; # same thing, the choice is yours.
d5424315 2663
a1e5c561 2664You cannot currently force the type in other, less obscure, ways.
d5424315
DG
2665
2666Note that numerical precision has the same meaning as under Perl (so
2667binary to decimal conversion follows the same rules as in Perl, which
2668can differ to other languages). Also, your perl interpreter might expose
2669extensions to the floating point numbers of your platform, such as
2670infinities or NaN's - these cannot be represented in JSON, and it is an
2671error to pass those in.
2672
dec273dc
CBW
2673JSON::PP (and JSON::XS) trusts what you pass to C<encode> method
2674(or C<encode_json> function) is a clean, validated data structure with
2675values that can be represented as valid JSON values only, because it's
2676not from an external data source (as opposed to JSON texts you pass to
2677C<decode> or C<decode_json>, which JSON::PP considers tainted and
2678doesn't trust). As JSON::PP doesn't know exactly what you and consumers
2679of your JSON texts want the unexpected values to be (you may want to
2680convert them into null, or to stringify them with or without
2681normalisation (string representation of infinities/NaN may vary
2682depending on platforms), or to croak without conversion), you're advised
2683to do what you and your consumers need before you encode, and also not
2684to numify values that may start with values that look like a number
2685(including infinities/NaN), without validating.
d5424315
DG
2686
2687=back
2688
dec273dc 2689=head2 OBJECT SERIALISATION
d5424315 2690
dec273dc 2691As for Perl objects, JSON::PP only supports a pure JSON representation (without the ability to deserialise the object automatically again).
d5424315 2692
dec273dc 2693=head3 SERIALISATION
d5424315 2694
dec273dc
CBW
2695What happens when C<JSON::PP> encounters a Perl object depends on the
2696C<allow_blessed>, C<convert_blessed> and C<allow_bignum> settings, which are
2697used in this order:
d5424315 2698
dec273dc 2699=over 4
d5424315 2700
dec273dc 2701=item 1. C<convert_blessed> is enabled and the object has a C<TO_JSON> method.
d5424315 2702
dec273dc
CBW
2703In this case, the C<TO_JSON> method of the object is invoked in scalar
2704context. It must return a single scalar that can be directly encoded into
2705JSON. This scalar replaces the object in the JSON text.
d5424315 2706
dec273dc
CBW
2707For example, the following C<TO_JSON> method will convert all L<URI>
2708objects to JSON strings when serialised. The fact that these values
2709originally were L<URI> objects is lost.
d5424315 2710
dec273dc
CBW
2711 sub URI::TO_JSON {
2712 my ($uri) = @_;
2713 $uri->as_string
2714 }
d5424315 2715
dec273dc 2716=item 2. C<allow_bignum> is enabled and the object is a C<Math::BigInt> or C<Math::BigFloat>.
d5424315 2717
dec273dc 2718The object will be serialised as a JSON number value.
d5424315 2719
dec273dc 2720=item 3. C<allow_blessed> is enabled.
d5424315 2721
dec273dc 2722The object will be serialised as a JSON null value.
d5424315 2723
dec273dc 2724=item 4. none of the above
d5424315 2725
dec273dc
CBW
2726If none of the settings are enabled or the respective methods are missing,
2727C<JSON::PP> throws an exception.
d5424315 2728
dec273dc 2729=back
d5424315 2730
dec273dc 2731=head1 ENCODING/CODESET FLAG NOTES
d5424315 2732
dec273dc 2733This section is taken from JSON::XS.
d5424315 2734
dec273dc
CBW
2735The interested reader might have seen a number of flags that signify
2736encodings or codesets - C<utf8>, C<latin1> and C<ascii>. There seems to be
2737some confusion on what these do, so here is a short comparison:
d5424315 2738
dec273dc
CBW
2739C<utf8> controls whether the JSON text created by C<encode> (and expected
2740by C<decode>) is UTF-8 encoded or not, while C<latin1> and C<ascii> only
2741control whether C<encode> escapes character values outside their respective
2742codeset range. Neither of these flags conflict with each other, although
2743some combinations make less sense than others.
d5424315 2744
dec273dc
CBW
2745Care has been taken to make all flags symmetrical with respect to
2746C<encode> and C<decode>, that is, texts encoded with any combination of
2747these flag values will be correctly decoded when the same flags are used
2748- in general, if you use different flag settings while encoding vs. when
2749decoding you likely have a bug somewhere.
d5424315 2750
dec273dc
CBW
2751Below comes a verbose discussion of these flags. Note that a "codeset" is
2752simply an abstract set of character-codepoint pairs, while an encoding
2753takes those codepoint numbers and I<encodes> them, in our case into
2754octets. Unicode is (among other things) a codeset, UTF-8 is an encoding,
2755and ISO-8859-1 (= latin 1) and ASCII are both codesets I<and> encodings at
2756the same time, which can be confusing.
d5424315 2757
dec273dc 2758=over 4
d5424315 2759
dec273dc
CBW
2760=item C<utf8> flag disabled
2761
2762When C<utf8> is disabled (the default), then C<encode>/C<decode> generate
2763and expect Unicode strings, that is, characters with high ordinal Unicode
2764values (> 255) will be encoded as such characters, and likewise such
2765characters are decoded as-is, no changes to them will be done, except
2766"(re-)interpreting" them as Unicode codepoints or Unicode characters,
2767respectively (to Perl, these are the same thing in strings unless you do
2768funny/weird/dumb stuff).
2769
2770This is useful when you want to do the encoding yourself (e.g. when you
2771want to have UTF-16 encoded JSON texts) or when some other layer does
2772the encoding for you (for example, when printing to a terminal using a
2773filehandle that transparently encodes to UTF-8 you certainly do NOT want
2774to UTF-8 encode your data first and have Perl encode it another time).
2775
2776=item C<utf8> flag enabled
2777
2778If the C<utf8>-flag is enabled, C<encode>/C<decode> will encode all
2779characters using the corresponding UTF-8 multi-byte sequence, and will
2780expect your input strings to be encoded as UTF-8, that is, no "character"
2781of the input string must have any value > 255, as UTF-8 does not allow
2782that.
2783
2784The C<utf8> flag therefore switches between two modes: disabled means you
2785will get a Unicode string in Perl, enabled means you get an UTF-8 encoded
2786octet/binary string in Perl.
2787
2788=item C<latin1> or C<ascii> flags enabled
2789
2790With C<latin1> (or C<ascii>) enabled, C<encode> will escape characters
2791with ordinal values > 255 (> 127 with C<ascii>) and encode the remaining
2792characters as specified by the C<utf8> flag.
2793
2794If C<utf8> is disabled, then the result is also correctly encoded in those
2795character sets (as both are proper subsets of Unicode, meaning that a
2796Unicode string with all character values < 256 is the same thing as a
2797ISO-8859-1 string, and a Unicode string with all character values < 128 is
2798the same thing as an ASCII string in Perl).
2799
2800If C<utf8> is enabled, you still get a correct UTF-8-encoded string,
2801regardless of these flags, just some more characters will be escaped using
2802C<\uXXXX> then before.
2803
2804Note that ISO-8859-1-I<encoded> strings are not compatible with UTF-8
2805encoding, while ASCII-encoded strings are. That is because the ISO-8859-1
2806encoding is NOT a subset of UTF-8 (despite the ISO-8859-1 I<codeset> being
2807a subset of Unicode), while ASCII is.
2808
2809Surprisingly, C<decode> will ignore these flags and so treat all input
2810values as governed by the C<utf8> flag. If it is disabled, this allows you
2811to decode ISO-8859-1- and ASCII-encoded strings, as both strict subsets of
2812Unicode. If it is enabled, you can correctly decode UTF-8 encoded strings.
2813
2814So neither C<latin1> nor C<ascii> are incompatible with the C<utf8> flag -
2815they only govern when the JSON output engine escapes a character or not.
2816
2817The main use for C<latin1> is to relatively efficiently store binary data
2818as JSON, at the expense of breaking compatibility with most JSON decoders.
2819
2820The main use for C<ascii> is to force the output to not contain characters
2821with values > 127, which means you can interpret the resulting string
2822as UTF-8, ISO-8859-1, ASCII, KOI8-R or most about any character set and
28238-bit-encoding, and still get the same data structure back. This is useful
2824when your channel for JSON transfer is not 8-bit clean or the encoding
2825might be mangled in between (e.g. in mail), and works because ASCII is a
2826proper subset of most 8-bit and multibyte encodings in use in the world.
d5424315
DG
2827
2828=back
2829
d5424315
DG
2830=head1 SEE ALSO
2831
dec273dc
CBW
2832The F<json_pp> command line utility for quick experiments.
2833
2834L<JSON::XS>, L<Cpanel::JSON::XS>, and L<JSON::Tiny> for faster alternatives.
2835L<JSON> and L<JSON::MaybeXS> for easy migration.
d5424315 2836
dec273dc 2837L<JSON::PP::Compat5005> and L<JSON::PP::Compat5006> for older perl users.
d5424315
DG
2838
2839RFC4627 (L<http://www.ietf.org/rfc/rfc4627.txt>)
2840
2841=head1 AUTHOR
2842
2843Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt>
2844
2845
2846=head1 COPYRIGHT AND LICENSE
2847
a1e5c561 2848Copyright 2007-2016 by Makamaka Hannyaharamitu
d5424315
DG
2849
2850This library is free software; you can redistribute it and/or modify
2851it under the same terms as Perl itself.
2852
2853=cut