Bump version of Data::Dumper

[perl5.git] / dist / Data-Dumper / Dumper.pm
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm

index 7c8a72c..22a1150 100644 (file)
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -10,16 +10,17 @@
  package Data::Dumper;
  
  BEGIN {
-    $VERSION = '2.151'; # Don't forget to set version and release
+    $VERSION = '2.172'; # Don't forget to set version and release
  }               # date in POD below!
  
  #$| = 1;
  
  use 5.006_001;
  require Exporter;
-require overload;
  
-use Carp;
+use constant IS_PRE_516_PERL => $] < 5.016;
+
+use Carp ();
  
  BEGIN {
      @ISA = qw(Exporter);
@@ -37,8 +38,11 @@ BEGIN {
      or $Useperl = 1;
  }
  
+my $IS_ASCII  = ord 'A' ==  65;
+
  # module vars and their defaults
  $Indent     = 2         unless defined $Indent;
+$Trailingcomma = 0      unless defined $Trailingcomma;
  $Purity     = 0         unless defined $Purity;
  $Pad        = ""        unless defined $Pad;
  $Varname    = "VAR"     unless defined $Varname;
@@ -56,6 +60,7 @@ $Useperl    = 0         unless defined $Useperl;
  $Sortkeys   = 0         unless defined $Sortkeys;
  $Deparse    = 0         unless defined $Deparse;
  $Sparseseen = 0         unless defined $Sparseseen;
+$Maxrecurse = 1000      unless defined $Maxrecurse;
  
  #
  # expects an arrayref of values to be dumped.
@@ -66,13 +71,14 @@ $Sparseseen = 0         unless defined $Sparseseen;
  sub new {
    my($c, $v, $n) = @_;
  
-  croak "Usage:  PACKAGE->new(ARRAYREF, [ARRAYREF])"
+  Carp::croak("Usage:  PACKAGE->new(ARRAYREF, [ARRAYREF])")
      unless (defined($v) && (ref($v) eq 'ARRAY'));
    $n = [] unless (defined($n) && (ref($n) eq 'ARRAY'));
  
    my($s) = {
          level      => 0,           # current recursive depth
          indent     => $Indent,     # various styles of indenting
+        trailingcomma => $Trailingcomma, # whether to add comma after last elem
          pad        => $Pad,        # all lines prefixed by this string
          xpad       => "",          # padding-per-level
          apad       => "",          # added padding for hash keys n such
@@ -92,6 +98,7 @@ sub new {
          'bless'    => $Bless,    # keyword to use for "bless"
  #        expdepth   => $Expdepth,   # cutoff depth for explicit dumping
          maxdepth   => $Maxdepth,   # depth beyond which we give up
+       maxrecurse => $Maxrecurse, # depth beyond which we abort
          useperl    => $Useperl,    # use the pure Perl implementation
          sortkeys   => $Sortkeys,   # flag or filter for sorting hash keys
          deparse    => $Deparse,    # use B::Deparse for coderefs
@@ -164,11 +171,11 @@ sub Seen {
            $s->{seen}{$id} = [$k, $v];
          }
          else {
-          carp "Only refs supported, ignoring non-ref item \$$k";
+          Carp::carp("Only refs supported, ignoring non-ref item \$$k");
          }
        }
        else {
-        carp "Value of ref must be defined; ignoring undefined item \$$k";
+        Carp::carp("Value of ref must be defined; ignoring undefined item \$$k");
        }
      }
      return $s;
@@ -189,7 +196,7 @@ sub Values {
        return $s;
      }
      else {
-      croak "Argument to Values, if provided, must be array ref";
+      Carp::croak("Argument to Values, if provided, must be array ref");
      }
    }
    else {
@@ -208,7 +215,7 @@ sub Names {
        return $s;
      }
      else {
-      croak "Argument to Names, if provided, must be array ref";
+      Carp::croak("Argument to Names, if provided, must be array ref");
      }
    }
    else {
@@ -219,10 +226,11 @@ sub Names {
  sub DESTROY {}
  
  sub Dump {
-    return &Dumpxs
-    unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
-           $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
-    return &Dumpperl;
+  return &Dumpxs
+    unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl})
+            # Use pure perl version on earlier releases on EBCDIC platforms
+        || (! $IS_ASCII && $] lt 5.021_010);
+  return &Dumpperl;
  }
  
  #
@@ -350,6 +358,12 @@ sub _dump {
        return qq['$val'];
      }
  
+    # avoid recursing infinitely [perl #122111]
+    if ($s->{maxrecurse} > 0
+        and $s->{level} >= $s->{maxrecurse}) {
+        die "Recursion limit of $s->{maxrecurse} exceeded";
+    }
+
      # we have a blessed ref
      my ($blesspad);
      if ($realpack and !$no_bless) {
@@ -400,7 +414,9 @@ sub _dump {
          $out .= $pad . $ipad . '#' . $i
            if $s->{indent} >= 3;
          $out .= $pad . $ipad . $s->_dump($v, $sname);
-        $out .= "," if $i++ < $#$val;
+        $out .= ","
+            if $i++ < $#$val
+            || ($s->{trailingcomma} && $s->{indent} >= 1);
        }
        $out .= $pad . ($s->{xpad} x ($s->{level} - 1)) if $i;
        $out .= ($name =~ /^\@/) ? ')' : ']';
@@ -422,7 +438,7 @@ sub _dump {
          if (ref($s->{sortkeys}) eq 'CODE') {
            $keys = $s->{sortkeys}($val);
            unless (ref($keys) eq 'ARRAY') {
-            carp "Sortkeys subroutine did not return ARRAYREF";
+            Carp::carp("Sortkeys subroutine did not return ARRAYREF");
              $keys = [];
            }
          }
@@ -460,7 +476,7 @@ sub _dump {
            if $s->{indent} >= 2;
        }
        if (substr($out, -1) eq ',') {
-        chop $out;
+        chop $out if !$s->{trailingcomma} || !$s->{indent};
          $out .= $pad . ($s->{xpad} x ($s->{level} - 1));
        }
        $out .= ($name =~ /^\%/) ? ')' : '}';
@@ -470,16 +486,16 @@ sub _dump {
          require B::Deparse;
          my $sub =  'sub ' . (B::Deparse->new)->coderef2text($val);
          $pad    =  $s->{sep} . $s->{pad} . $s->{apad} . $s->{xpad} x ($s->{level} - 1);
-        $sub    =~ s/\n/$pad/gse;
+        $sub    =~ s/\n/$pad/gs;
          $out   .=  $sub;
        }
        else {
          $out .= 'sub { "DUMMY" }';
-        carp "Encountered CODE ref, using dummy placeholder" if $s->{purity};
+        Carp::carp("Encountered CODE ref, using dummy placeholder") if $s->{purity};
        }
      }
      else {
-      croak "Can't handle '$realtype' type";
+      Carp::croak("Can't handle '$realtype' type");
      }
  
      if ($realpack and !$no_bless) { # we have a blessed ref
@@ -512,11 +528,12 @@ sub _dump {
      $ref = \$val;
      if (ref($ref) eq 'GLOB') {  # glob
        my $name = substr($val, 1);
-      if ($name =~ /^[A-Za-z_][\w:]*$/ && $name ne 'main::') {
-        $name =~ s/^main::/::/;
+      $name =~ s/^main::(?!\z)/::/;
+      if ($name =~ /\A(?:[A-Z_a-z][0-9A-Z_a-z]*)?::(?:[0-9A-Z_a-z]+::)*[0-9A-Z_a-z]*\z/ && $name ne 'main::') {
          $sname = $name;
        }
        else {
+        local $s->{useqq} = IS_PRE_516_PERL && ($s->{useqq} || $name =~ /[^\x00-\x7f]/) ? 1 : $s->{useqq};
          $sname = $s->_dump(
            $name eq 'main::' || $] < 5.007 && $name eq "main::\0"
              ? ''
@@ -603,7 +620,7 @@ sub Reset {
  
  sub Indent {
    my($s, $v) = @_;
-  if (defined($v)) {
+  if (@_ >= 2) {
      if ($v == 0) {
        $s->{xpad} = "";
        $s->{sep} = "";
@@ -620,84 +637,94 @@ sub Indent {
    }
  }
  
+sub Trailingcomma {
+  my($s, $v) = @_;
+  @_ >= 2 ? (($s->{trailingcomma} = $v), return $s) : $s->{trailingcomma};
+}
+
  sub Pair {
      my($s, $v) = @_;
-    defined($v) ? (($s->{pair} = $v), return $s) : $s->{pair};
+    @_ >= 2 ? (($s->{pair} = $v), return $s) : $s->{pair};
  }
  
  sub Pad {
    my($s, $v) = @_;
-  defined($v) ? (($s->{pad} = $v), return $s) : $s->{pad};
+  @_ >= 2 ? (($s->{pad} = $v), return $s) : $s->{pad};
  }
  
  sub Varname {
    my($s, $v) = @_;
-  defined($v) ? (($s->{varname} = $v), return $s) : $s->{varname};
+  @_ >= 2 ? (($s->{varname} = $v), return $s) : $s->{varname};
  }
  
  sub Purity {
    my($s, $v) = @_;
-  defined($v) ? (($s->{purity} = $v), return $s) : $s->{purity};
+  @_ >= 2 ? (($s->{purity} = $v), return $s) : $s->{purity};
  }
  
  sub Useqq {
    my($s, $v) = @_;
-  defined($v) ? (($s->{useqq} = $v), return $s) : $s->{useqq};
+  @_ >= 2 ? (($s->{useqq} = $v), return $s) : $s->{useqq};
  }
  
  sub Terse {
    my($s, $v) = @_;
-  defined($v) ? (($s->{terse} = $v), return $s) : $s->{terse};
+  @_ >= 2 ? (($s->{terse} = $v), return $s) : $s->{terse};
  }
  
  sub Freezer {
    my($s, $v) = @_;
-  defined($v) ? (($s->{freezer} = $v), return $s) : $s->{freezer};
+  @_ >= 2 ? (($s->{freezer} = $v), return $s) : $s->{freezer};
  }
  
  sub Toaster {
    my($s, $v) = @_;
-  defined($v) ? (($s->{toaster} = $v), return $s) : $s->{toaster};
+  @_ >= 2 ? (($s->{toaster} = $v), return $s) : $s->{toaster};
  }
  
  sub Deepcopy {
    my($s, $v) = @_;
-  defined($v) ? (($s->{deepcopy} = $v), return $s) : $s->{deepcopy};
+  @_ >= 2 ? (($s->{deepcopy} = $v), return $s) : $s->{deepcopy};
  }
  
  sub Quotekeys {
    my($s, $v) = @_;
-  defined($v) ? (($s->{quotekeys} = $v), return $s) : $s->{quotekeys};
+  @_ >= 2 ? (($s->{quotekeys} = $v), return $s) : $s->{quotekeys};
  }
  
  sub Bless {
    my($s, $v) = @_;
-  defined($v) ? (($s->{'bless'} = $v), return $s) : $s->{'bless'};
+  @_ >= 2 ? (($s->{'bless'} = $v), return $s) : $s->{'bless'};
  }
  
  sub Maxdepth {
    my($s, $v) = @_;
-  defined($v) ? (($s->{'maxdepth'} = $v), return $s) : $s->{'maxdepth'};
+  @_ >= 2 ? (($s->{'maxdepth'} = $v), return $s) : $s->{'maxdepth'};
+}
+
+sub Maxrecurse {
+  my($s, $v) = @_;
+  @_ >= 2 ? (($s->{'maxrecurse'} = $v), return $s) : $s->{'maxrecurse'};
  }
  
  sub Useperl {
    my($s, $v) = @_;
-  defined($v) ? (($s->{'useperl'} = $v), return $s) : $s->{'useperl'};
+  @_ >= 2 ? (($s->{'useperl'} = $v), return $s) : $s->{'useperl'};
  }
  
  sub Sortkeys {
    my($s, $v) = @_;
-  defined($v) ? (($s->{'sortkeys'} = $v), return $s) : $s->{'sortkeys'};
+  @_ >= 2 ? (($s->{'sortkeys'} = $v), return $s) : $s->{'sortkeys'};
  }
  
  sub Deparse {
    my($s, $v) = @_;
-  defined($v) ? (($s->{'deparse'} = $v), return $s) : $s->{'deparse'};
+  @_ >= 2 ? (($s->{'deparse'} = $v), return $s) : $s->{'deparse'};
  }
  
  sub Sparseseen {
    my($s, $v) = @_;
-  defined($v) ? (($s->{'noseen'} = $v), return $s) : $s->{'noseen'};
+  @_ >= 2 ? (($s->{'noseen'} = $v), return $s) : $s->{'noseen'};
  }
  
  # used by qquote below
@@ -711,41 +738,71 @@ my %esc = (
      "\e" => "\\e",
  );
  
+my $low_controls = ($IS_ASCII)
+
+                   # This includes \177, because traditionally it has been
+                   # output as octal, even though it isn't really a "low"
+                   # control
+                   ? qr/[\0-\x1f\177]/
+
+                     # EBCDIC low controls.
+                   : qr/[\0-\x3f]/;
+
  # put a string value in double quotes
  sub qquote {
    local($_) = shift;
    s/([\\\"\@\$])/\\$1/g;
+
+  # This efficiently changes the high ordinal characters to \x{} if the utf8
+  # flag is on.  On ASCII platforms, the high ordinals are all the
+  # non-ASCII's.  On EBCDIC platforms, we don't include in these the non-ASCII
+  # controls whose ordinals are less than SPACE, excluded below by the range
+  # \0-\x3f.  On ASCII platforms this range just compiles as part of :ascii:.
+  # On EBCDIC platforms, there is just one outlier high ordinal control, and
+  # it gets output as \x{}.
    my $bytes; { use bytes; $bytes = length }
-  s/([[:^ascii:]])/'\x{'.sprintf("%x",ord($1)).'}'/ge if $bytes > length;
-  return qq("$_") unless
-    /[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/;  # fast exit
+  s/([^[:ascii:]\0-\x3f])/sprintf("\\x{%x}",ord($1))/ge
+    if $bytes > length
  
-  my $high = shift || "";
+       # The above doesn't get the EBCDIC outlier high ordinal control when
+       # the string is UTF-8 but there are no UTF-8 variant characters in it.
+       # We want that to come out as \x{} anyway.  We need is_utf8() to do
+       # this.
+       || (! $IS_ASCII && $] ge 5.008_001 && utf8::is_utf8($_));
+
+  return qq("$_") unless /[[:^print:]]/;  # fast exit if only printables
+
+  # Here, there is at least one non-printable to output.  First, translate the
+  # escapes.
    s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
  
-  if (ord('^')==94)  { # ascii
-    # no need for 3 digits in escape for these
-    s/([\0-\037])(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
-    s/([\0-\037\177])/'\\'.sprintf('%03o',ord($1))/eg;
+  # no need for 3 digits in escape for octals not followed by a digit.
+  s/($low_controls)(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
+
+  # But otherwise use 3 digits
+  s/($low_controls)/'\\'.sprintf('%03o',ord($1))/eg;
+
      # all but last branch below not supported --BEHAVIOR SUBJECT TO CHANGE--
-    if ($high eq "iso8859") {
-      s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
+  my $high = shift || "";
+    if ($high eq "iso8859") {   # Doesn't escape the Latin1 printables
+      if ($IS_ASCII) {
+        s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
+      }
+      elsif ($] ge 5.007_003) {
+        my $high_control = utf8::unicode_to_native(0x9F);
+        s/$high_control/sprintf('\\%o',ord($1))/eg;
+      }
      } elsif ($high eq "utf8") {
+#     Some discussion of what to do here is in
+#       https://rt.perl.org/Ticket/Display.html?id=113088
  #     use utf8;
  #     $str =~ s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
      } elsif ($high eq "8bit") {
          # leave it as it is
      } else {
-      s/([\200-\377])/'\\'.sprintf('%03o',ord($1))/eg;
-      s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
+      s/([[:^ascii:]])/'\\'.sprintf('%03o',ord($1))/eg;
+      #s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
      }
-  }
-  else { # ebcdic
-      s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])(?!\d)}
-       {my $v = ord($1); '\\'.sprintf(($v <= 037 ? '%o' : '%03o'), $v)}eg;
-      s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])}
-       {'\\'.sprintf('%03o',ord($1))}eg;
-  }
  
    return qq("$_");
  }
@@ -984,6 +1041,15 @@ consumes twice the number of lines).  Style 2 is the default.
  
  =item *
  
+$Data::Dumper::Trailingcomma  I<or>  I<$OBJ>->Trailingcomma(I<[NEWVAL]>)
+
+Controls whether a comma is added after the last element of an array or
+hash. Even when true, no comma is added between the last element of an array
+or hash and a closing bracket when they appear on the same line. The default
+is false.
+
+=item *
+
  $Data::Dumper::Purity  I<or>  I<$OBJ>->Purity(I<[NEWVAL]>)
  
  Controls the degree to which the output can be C<eval>ed to recreate the
@@ -1012,9 +1078,7 @@ $Data::Dumper::Useqq  I<or>  I<$OBJ>->Useqq(I<[NEWVAL]>)
  When set, enables the use of double quotes for representing string values.
  Whitespace other than space will be represented as C<[\n\t\r]>, "unsafe"
  characters will be backslashed, and unprintable characters will be output as
-quoted octal integers.  Since setting this variable imposes a performance
-penalty, the default is 0.  C<Dump()> will run slower if this flag is set,
-since the fast XSUB implementation doesn't support it yet.
+quoted octal integers.  The default is 0.
  
  =item *
  
@@ -1105,6 +1169,16 @@ no maximum depth.
  
  =item *
  
+$Data::Dumper::Maxrecurse  I<or>  $I<OBJ>->Maxrecurse(I<[NEWVAL]>)
+
+Can be set to a positive integer that specifies the depth beyond which
+recursion into a structure will throw an exception.  This is intended
+as a security measure to prevent perl running out of stack space when
+dumping an excessively deep structure.  Can be set to 0 to remove the
+limit.  Default is 1000.
+
+=item *
+
  $Data::Dumper::Useperl  I<or>  $I<OBJ>->Useperl(I<[NEWVAL]>)
  
  Can be set to a boolean value which controls whether the pure Perl
@@ -1138,9 +1212,10 @@ $Data::Dumper::Deparse  I<or>  $I<OBJ>->Deparse(I<[NEWVAL]>)
  
  Can be set to a boolean value to control whether code references are
  turned into perl source code. If set to a true value, C<B::Deparse>
-will be used to get the source of the code reference. Using this option
-will force using the Perl implementation of the dumper, since the fast
-XSUB implementation doesn't support it.
+will be used to get the source of the code reference. In older versions,
+using this option imposed a significant performance penalty when dumping
+parts of a data structure other than code references, but that is no
+longer the case.
  
  Caution : use this option only if you know that your coderefs will be
  properly reconstructed by C<B::Deparse>.
@@ -1361,15 +1436,9 @@ the C<Deparse> flag), an anonymous subroutine that
  contains the string '"DUMMY"' will be inserted in its place, and a warning
  will be printed if C<Purity> is set.  You can C<eval> the result, but bear
  in mind that the anonymous sub that gets created is just a placeholder.
-Someday, perl will have a switch to cache-on-demand the string
-representation of a compiled piece of code, I hope.  If you have prior
-knowledge of all the code refs that your data structures are likely
-to have, you can use the C<Seen> method to pre-seed the internal reference
-table and make the dumped output point to them, instead.  See L</EXAMPLES>
-above.
-
-The C<Useqq> and C<Deparse> flags makes Dump() run slower, since the
-XSUB implementation does not support them.
+Even using the C<Deparse> flag will in some cases produce results that
+behave differently after being passed to C<eval>; see the documentation
+for L<B::Deparse>.
  
  SCALAR objects have the weirdest looking C<bless> workaround.
  
@@ -1392,13 +1461,13 @@ be to use the C<Sortkeys> filter of Data::Dumper.
  
  Gurusamy Sarathy        gsar@activestate.com
  
-Copyright (c) 1996-2014 Gurusamy Sarathy. All rights reserved.
+Copyright (c) 1996-2017 Gurusamy Sarathy. All rights reserved.
  This program is free software; you can redistribute it and/or
  modify it under the same terms as Perl itself.
  
  =head1 VERSION
  
-Version 2.151  (March 7 2014)
+Version 2.172
  
  =head1 SEE ALSO