Make Data::Dumper work when dynamic extensions are not available

[perl5.git] / ext / Data / Dumper / Dumper.pm
diff --git a/ext/Data/Dumper/Dumper.pm b/ext/Data/Dumper/Dumper.pm

index 5897e1f..3f2d8c9 100644 (file)
--- a/ext/Data/Dumper/Dumper.pm
+++ b/ext/Data/Dumper/Dumper.pm
@@ -9,37 +9,50 @@
  
  package Data::Dumper;
  
-$VERSION = '2.103';
+$VERSION = '2.121_04';
  
  #$| = 1;
  
-require 5.6.1;
+use 5.006_001;
  require Exporter;
-use XSLoader ();
  require overload;
  
  use Carp;
  
-@ISA = qw(Exporter);
-@EXPORT = qw(Dumper);
-@EXPORT_OK = qw(DumperX);
-
-XSLoader::load 'Data::Dumper';
+BEGIN {
+    @ISA = qw(Exporter);
+    @EXPORT = qw(Dumper);
+    @EXPORT_OK = qw(DumperX);
+
+    # if run under miniperl, or otherwise lacking dynamic loading,
+    # XSLoader should be attempted to load, or the pure perl flag
+    # toggled on load failure.
+    eval {
+       require XSLoader;
+       XSLoader::load( 'Data::Dumper' );
+       1;
+    };
+    $Useperl = 1 if $@;
+}
  
  # module vars and their defaults
-$Indent = 2 unless defined $Indent;
-$Purity = 0 unless defined $Purity;
-$Pad = "" unless defined $Pad;
-$Varname = "VAR" unless defined $Varname;
-$Useqq = 0 unless defined $Useqq;
-$Terse = 0 unless defined $Terse;
-$Freezer = "" unless defined $Freezer;
-$Toaster = "" unless defined $Toaster;
-$Deepcopy = 0 unless defined $Deepcopy;
-$Quotekeys = 1 unless defined $Quotekeys;
-$Bless = "bless" unless defined $Bless;
-#$Expdepth = 0 unless defined $Expdepth;
-$Maxdepth = 0 unless defined $Maxdepth;
+$Indent     = 2         unless defined $Indent;
+$Purity     = 0         unless defined $Purity;
+$Pad        = ""        unless defined $Pad;
+$Varname    = "VAR"     unless defined $Varname;
+$Useqq      = 0         unless defined $Useqq;
+$Terse      = 0         unless defined $Terse;
+$Freezer    = ""        unless defined $Freezer;
+$Toaster    = ""        unless defined $Toaster;
+$Deepcopy   = 0         unless defined $Deepcopy;
+$Quotekeys  = 1         unless defined $Quotekeys;
+$Bless      = "bless"   unless defined $Bless;
+#$Expdepth   = 0         unless defined $Expdepth;
+$Maxdepth   = 0         unless defined $Maxdepth;
+$Pair       = ' => '    unless defined $Pair;
+$Useperl    = 0         unless defined $Useperl;
+$Sortkeys   = 0         unless defined $Sortkeys;
+$Deparse    = 0         unless defined $Deparse;
  
  #
  # expects an arrayref of values to be dumped.
@@ -61,6 +74,7 @@ sub new {
              xpad       => "",          # padding-per-level
              apad       => "",          # added padding for hash keys n such
              sep        => "",          # list separator
+            pair       => $Pair,       # hash key/value separator: defaults to ' => '
              seen       => {},          # local (nested) refs (id => [name, val])
              todump     => $v,          # values to dump []
              names      => $n,          # optional names for values []
@@ -75,6 +89,9 @@ sub new {
               'bless'   => $Bless,      # keyword to use for "bless"
  #           expdepth   => $Expdepth,   # cutoff depth for explicit dumping
              maxdepth   => $Maxdepth,   # depth beyond which we give up
+            useperl    => $Useperl,    # use the pure Perl implementation
+            sortkeys   => $Sortkeys,   # flag or filter for sorting hash keys
+            deparse    => $Deparse,    # use B::Deparse for coderefs
            };
  
    if ($Indent > 0) {
@@ -148,7 +165,9 @@ sub DESTROY {}
  
  sub Dump {
      return &Dumpxs
-       unless $Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq});
+       unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
+              $Data::Dumper::Useqq   || (ref($_[0]) && $_[0]->{useqq}) ||
+              $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
      return &Dumpperl;
  }
  
@@ -208,6 +227,8 @@ sub Dumpperl {
  #
  # twist, toil and turn;
  # and recurse, of course.
+# sometimes sordidly;
+# and curse if no recourse.
  #
  sub _dump {
    my($s, $val, $name) = @_;
@@ -219,9 +240,13 @@ sub _dump {
  
    if ($type) {
  
-    # prep it, if it looks like an object
-    if (my $freezer = $s->{freezer}) {
-      $val->$freezer() if UNIVERSAL::can($val, $freezer);
+    # Call the freezer method if it's specified and the object has the
+    # method.  Trap errors and warn() instead of die()ing, like the XS
+    # implementation.
+    my $freezer = $s->{freezer};
+    if ($freezer and UNIVERSAL::can($val, $freezer)) {
+      eval { $val->$freezer() };
+      warn "WARNING(Freezer method call failed): $@" if $@;
      }
  
      ($realpack, $realtype, $id) =
@@ -322,20 +347,37 @@ sub _dump {
        $out .= ($name =~ /^\@/) ? ')' : ']';
      }
      elsif ($realtype eq 'HASH') {
-      my($k, $v, $pad, $lpad, $mname);
+      my($k, $v, $pad, $lpad, $mname, $pair);
        $out .= ($name =~ /^\%/) ? '(' : '{';
        $pad = $s->{sep} . $s->{pad} . $s->{apad};
        $lpad = $s->{apad};
+      $pair = $s->{pair};
        ($name =~ /^\%(.*)$/) ? ($mname = "\$" . $1) :
         # omit -> if $foo->[0]->{bar}, but not ${$foo->[0]}->{bar}
         ($name =~ /^\\?[\%\@\*\$][^{].*[]}]$/) ? ($mname = $name) :
           ($mname = $name . '->');
        $mname .= '->' if $mname =~ /^\*.+\{[A-Z]+\}$/;
-      while (($k, $v) = each %$val) {
+      my ($sortkeys, $keys, $key) = ("$s->{sortkeys}");
+      if ($sortkeys) {
+       if (ref($s->{sortkeys}) eq 'CODE') {
+         $keys = $s->{sortkeys}($val);
+         unless (ref($keys) eq 'ARRAY') {
+           carp "Sortkeys subroutine did not return ARRAYREF";
+           $keys = [];
+         }
+       }
+       else {
+         $keys = [ sort keys %$val ];
+       }
+      }
+      while (($k, $v) = ! $sortkeys ? (each %$val) :
+            @$keys ? ($key = shift(@$keys), $val->{$key}) :
+            () ) 
+      {
         my $nk = $s->_dump($k, "");
         $nk = $1 if !$s->{quotekeys} and $nk =~ /^[\"\']([A-Za-z_]\w*)[\"\']$/;
         $sname = $mname . '{' . $nk . '}';
-       $out .= $pad . $ipad . $nk . " => ";
+       $out .= $pad . $ipad . $nk . $pair;
  
         # temporarily alter apad
         $s->{apad} .= (" " x (length($nk) + 4)) if $s->{indent} >= 2;
@@ -349,8 +391,16 @@ sub _dump {
        $out .= ($name =~ /^\%/) ? ')' : '}';
      }
      elsif ($realtype eq 'CODE') {
-      $out .= 'sub { "DUMMY" }';
-      carp "Encountered CODE ref, using dummy placeholder" if $s->{purity};
+      if ($s->{deparse}) {
+       require B::Deparse;
+       my $sub =  'sub ' . (B::Deparse->new)->coderef2text($val);
+       $pad    =  $s->{sep} . $s->{pad} . $s->{apad} . $s->{xpad} x ($s->{level} - 1);
+       $sub    =~ s/\n/$pad/gse;
+       $out   .=  $sub;
+      } else {
+        $out .= 'sub { "DUMMY" }';
+        carp "Encountered CODE ref, using dummy placeholder" if $s->{purity};
+      }
      }
      else {
        croak "Can\'t handle $realtype type.";
@@ -412,11 +462,12 @@ sub _dump {
      elsif (!defined($val)) {
        $out .= "undef";
      }
-    elsif ($val =~ /^(?:0|-?[1-9]\d{0,8})$/) { # safe decimal number
+    elsif ($val =~ /^(?:0|-?[1-9]\d{0,8})\z/) { # safe decimal number
        $out .= $val;
      }
      else {                              # string
-      if ($s->{useqq}) {
+      if ($s->{useqq} or $val =~ tr/\0-\377//c) {
+        # Fall back to qq if there's unicode
         $out .= qquote($val, $s->{useqq});
        }
        else {
@@ -482,6 +533,11 @@ sub Indent {
    }
  }
  
+sub Pair {
+    my($s, $v) = @_;
+    defined($v) ? (($s->{pair} = $v), return $s) : $s->{pair};
+}
+
  sub Pad {
    my($s, $v) = @_;
    defined($v) ? (($s->{pad} = $v), return $s) : $s->{pad};
@@ -537,6 +593,20 @@ sub Maxdepth {
    defined($v) ? (($s->{'maxdepth'} = $v), return $s) : $s->{'maxdepth'};
  }
  
+sub Useperl {
+  my($s, $v) = @_;
+  defined($v) ? (($s->{'useperl'} = $v), return $s) : $s->{'useperl'};
+}
+
+sub Sortkeys {
+  my($s, $v) = @_;
+  defined($v) ? (($s->{'sortkeys'} = $v), return $s) : $s->{'sortkeys'};
+}
+
+sub Deparse {
+  my($s, $v) = @_;
+  defined($v) ? (($s->{'deparse'} = $v), return $s) : $s->{'deparse'};
+}
  
  # used by qquote below
  my %esc = (  
@@ -575,6 +645,7 @@ sub qquote {
          # leave it as it is
      } else {
        s/([\200-\377])/'\\'.sprintf('%03o',ord($1))/eg;
+      s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
      }
    }
    else { # ebcdic
@@ -587,6 +658,10 @@ sub qquote {
    return qq("$_");
  }
  
+# helper sub to sort hash keys in Perl < 5.8.0 where we don't have
+# access to sortsv() from XS
+sub _sortkeys { [ sort keys %{$_[0]} ] }
+
  1;
  __END__
  
@@ -606,7 +681,7 @@ Data::Dumper - stringified perl data structures, suitable for both printing and
  
      # configuration variables
      {
-      local $Data::Dump::Purity = 1;
+      local $Data::Dumper::Purity = 1;
        eval Data::Dumper->Dump([$foo, $bar], [qw(foo *ary)]);
      }
  
@@ -641,7 +716,8 @@ The default output of self-referential structures can be C<eval>ed, but the
  nested references to C<$VAR>I<n> will be undefined, since a recursive
  structure cannot be constructed using one Perl statement.  You should set the
  C<Purity> flag to 1 to get additional statements that will correctly fill in
-these references.
+these references.  Moreover, if C<eval>ed when strictures are in effect,
+you need to ensure that any variables it accesses are previously declared.
  
  In the extended usage form, the references to be dumped can be given
  user-specified names.  If a name begins with a C<*>, the output will 
@@ -699,7 +775,7 @@ references are not dumped; instead, their names are inserted wherever they
  are encountered subsequently.  This is useful especially for properly
  dumping subroutine references.
  
-Expects a anonymous hash of name => value pairs.  Same rules apply for names
+Expects an anonymous hash of name => value pairs.  Same rules apply for names
  as in C<new>.  If no argument is supplied, will return the "seen" list of
  name => value pairs, in a list context.  Otherwise, returns the object
  itself.
@@ -753,7 +829,9 @@ so that they can be chained together nicely.
  
  =over 4
  
-=item $Data::Dumper::Indent  I<or>  I<$OBJ>->Indent(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Indent  I<or>  I<$OBJ>->Indent(I<[NEWVAL]>)
  
  Controls the style of indentation.  It can be set to 0, 1, 2 or 3.  Style 0
  spews output without any newlines, indentation, or spaces between list
@@ -766,24 +844,32 @@ up).  Style 3 is like style 2, but also annotates the elements of arrays
  with their index (but the comment is on its own line, so array output
  consumes twice the number of lines).  Style 2 is the default.
  
-=item $Data::Dumper::Purity  I<or>  I<$OBJ>->Purity(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Purity  I<or>  I<$OBJ>->Purity(I<[NEWVAL]>)
  
  Controls the degree to which the output can be C<eval>ed to recreate the
  supplied reference structures.  Setting it to 1 will output additional perl
  statements that will correctly recreate nested references.  The default is
  0.
  
-=item $Data::Dumper::Pad  I<or>  I<$OBJ>->Pad(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Pad  I<or>  I<$OBJ>->Pad(I<[NEWVAL]>)
  
  Specifies the string that will be prefixed to every line of the output.
  Empty string by default.
  
-=item $Data::Dumper::Varname  I<or>  I<$OBJ>->Varname(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Varname  I<or>  I<$OBJ>->Varname(I<[NEWVAL]>)
  
  Contains the prefix to use for tagging variable names in the output. The
  default is "VAR".
  
-=item $Data::Dumper::Useqq  I<or>  I<$OBJ>->Useqq(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Useqq  I<or>  I<$OBJ>->Useqq(I<[NEWVAL]>)
  
  When set, enables the use of double quotes for representing string values.
  Whitespace other than space will be represented as C<[\n\t\r]>, "unsafe"
@@ -792,14 +878,18 @@ quoted octal integers.  Since setting this variable imposes a performance
  penalty, the default is 0.  C<Dump()> will run slower if this flag is set,
  since the fast XSUB implementation doesn't support it yet.
  
-=item $Data::Dumper::Terse  I<or>  I<$OBJ>->Terse(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Terse  I<or>  I<$OBJ>->Terse(I<[NEWVAL]>)
  
  When set, Data::Dumper will emit single, non-self-referential values as
  atoms/terms rather than statements.  This means that the C<$VAR>I<n> names
  will be avoided where possible, but be advised that such output may not
  always be parseable by C<eval>.
  
-=item $Data::Dumper::Freezer  I<or>  $I<OBJ>->Freezer(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Freezer  I<or>  $I<OBJ>->Freezer(I<[NEWVAL]>)
  
  Can be set to a method name, or to an empty string to disable the feature.
  Data::Dumper will invoke that method via the object before attempting to
@@ -810,37 +900,64 @@ method can be called via the object, and that the object ends up containing
  only perl data types after the method has been called.  Defaults to an empty
  string.
  
-=item $Data::Dumper::Toaster  I<or>  $I<OBJ>->Toaster(I<[NEWVAL]>)
+If an object does not support the method specified (determined using
+UNIVERSAL::can()) then the call will be skipped.  If the method dies a
+warning will be generated.
+
+=item *
+
+$Data::Dumper::Toaster  I<or>  $I<OBJ>->Toaster(I<[NEWVAL]>)
  
  Can be set to a method name, or to an empty string to disable the feature.
  Data::Dumper will emit a method call for any objects that are to be dumped
-using the syntax C<bless(DATA, CLASS)->METHOD()>.  Note that this means that
+using the syntax C<bless(DATA, CLASS)-E<gt>METHOD()>.  Note that this means that
  the method specified will have to perform any modifications required on the
  object (like creating new state within it, and/or reblessing it in a
  different package) and then return it.  The client is responsible for making
  sure the method can be called via the object, and that it returns a valid
  object.  Defaults to an empty string.
  
-=item $Data::Dumper::Deepcopy  I<or>  $I<OBJ>->Deepcopy(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Deepcopy  I<or>  $I<OBJ>->Deepcopy(I<[NEWVAL]>)
  
  Can be set to a boolean value to enable deep copies of structures.
  Cross-referencing will then only be done when absolutely essential
  (i.e., to break reference cycles).  Default is 0.
  
-=item $Data::Dumper::Quotekeys  I<or>  $I<OBJ>->Quotekeys(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Quotekeys  I<or>  $I<OBJ>->Quotekeys(I<[NEWVAL]>)
  
  Can be set to a boolean value to control whether hash keys are quoted.
  A false value will avoid quoting hash keys when it looks like a simple
  string.  Default is 1, which will always enclose hash keys in quotes.
  
-=item $Data::Dumper::Bless  I<or>  $I<OBJ>->Bless(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Bless  I<or>  $I<OBJ>->Bless(I<[NEWVAL]>)
  
  Can be set to a string that specifies an alternative to the C<bless>
  builtin operator used to create objects.  A function with the specified
  name should exist, and should accept the same arguments as the builtin.
  Default is C<bless>.
  
-=item $Data::Dumper::Maxdepth  I<or>  $I<OBJ>->Maxdepth(I<[NEWVAL]>)
+=item *
+
+$Data::Dumper::Pair  I<or>  $I<OBJ>->Pair(I<[NEWVAL]>)
+
+Can be set to a string that specifies the separator between hash keys
+and values. To dump nested hash, array and scalar values to JavaScript,
+use: C<$Data::Dumper::Pair = ' : ';>. Implementing C<bless> in JavaScript
+is left as an exercise for the reader.
+A function with the specified name exists, and accepts the same arguments
+as the builtin.
+
+Default is: C< =E<gt> >.
+
+=item *
+
+$Data::Dumper::Maxdepth  I<or>  $I<OBJ>->Maxdepth(I<[NEWVAL]>)
  
  Can be set to a positive integer that specifies the depth beyond which
  which we don't venture into a structure.  Has no effect when
@@ -848,6 +965,48 @@ C<Data::Dumper::Purity> is set.  (Useful in debugger when we often don't
  want to see more than enough).  Default is 0, which means there is 
  no maximum depth. 
  
+=item *
+
+$Data::Dumper::Useperl  I<or>  $I<OBJ>->Useperl(I<[NEWVAL]>)
+
+Can be set to a boolean value which controls whether the pure Perl
+implementation of C<Data::Dumper> is used. The C<Data::Dumper> module is
+a dual implementation, with almost all functionality written in both
+pure Perl and also in XS ('C'). Since the XS version is much faster, it
+will always be used if possible. This option lets you override the
+default behavior, usually for testing purposes only. Default is 0, which
+means the XS implementation will be used if possible.
+
+=item *
+
+$Data::Dumper::Sortkeys  I<or>  $I<OBJ>->Sortkeys(I<[NEWVAL]>)
+
+Can be set to a boolean value to control whether hash keys are dumped in
+sorted order. A true value will cause the keys of all hashes to be
+dumped in Perl's default sort order. Can also be set to a subroutine
+reference which will be called for each hash that is dumped. In this
+case C<Data::Dumper> will call the subroutine once for each hash,
+passing it the reference of the hash. The purpose of the subroutine is
+to return a reference to an array of the keys that will be dumped, in
+the order that they should be dumped. Using this feature, you can
+control both the order of the keys, and which keys are actually used. In
+other words, this subroutine acts as a filter by which you can exclude
+certain keys from being dumped. Default is 0, which means that hash keys
+are not sorted.
+
+=item *
+
+$Data::Dumper::Deparse  I<or>  $I<OBJ>->Deparse(I<[NEWVAL]>)
+
+Can be set to a boolean value to control whether code references are
+turned into perl source code. If set to a true value, C<B::Deparse>
+will be used to get the source of the code reference. Using this option
+will force using the Perl implementation of the dumper, since the fast
+XSUB implementation doesn't support it.
+
+Caution : use this option only if you know that your coderefs will be
+properly reconstructed by C<B::Deparse>.
+
  =back
  
  =head2 Exports
@@ -903,6 +1062,9 @@ distribution for more examples.)
      $Data::Dumper::Useqq = 1;          # print strings in double quotes
      print Dumper($boo);
  
+    $Data::Dumper::Pair = " : ";       # specify hash key/value separator
+    print Dumper($boo);
+
  
      ########
      # recursive structures
@@ -1003,16 +1165,41 @@ distribution for more examples.)
      print $d->Dump;
  
  
+    ########
+    # sorting and filtering hash keys
+    ########
+
+    $Data::Dumper::Sortkeys = \&my_filter;
+    my $foo = { map { (ord, "$_$_$_") } 'I'..'Q' };
+    my $bar = { %$foo };
+    my $baz = { reverse %$foo };
+    print Dumper [ $foo, $bar, $baz ];
+
+    sub my_filter {
+        my ($hash) = @_;
+        # return an array ref containing the hash keys to dump
+        # in the order that you want them to be dumped
+        return [
+          # Sort the keys of %$foo in reverse numeric order
+            $hash eq $foo ? (sort {$b <=> $a} keys %$hash) :
+          # Only dump the odd number keys of %$bar
+            $hash eq $bar ? (grep {$_ % 2} keys %$hash) :
+          # Sort keys in default order for all other hashes
+            (sort keys %$hash)
+        ];
+    }
+
  =head1 BUGS
  
  Due to limitations of Perl subroutine call semantics, you cannot pass an
  array or hash.  Prepend it with a C<\> to pass its reference instead.  This
-will be remedied in time, with the arrival of prototypes in later versions
-of Perl.  For now, you need to use the extended usage form, and prepend the
+will be remedied in time, now that Perl has subroutine prototypes.
+For now, you need to use the extended usage form, and prepend the
  name with a C<*> to output it as a hash or array.
  
  C<Data::Dumper> cheats with CODE references.  If a code reference is
-encountered in the structure being processed, an anonymous subroutine that
+encountered in the structure being processed (and if you haven't set
+the C<Deparse> flag), an anonymous subroutine that
  contains the string '"DUMMY"' will be inserted in its place, and a warning
  will be printed if C<Purity> is set.  You can C<eval> the result, but bear
  in mind that the anonymous sub that gets created is just a placeholder.
@@ -1023,11 +1210,25 @@ to have, you can use the C<Seen> method to pre-seed the internal reference
  table and make the dumped output point to them, instead.  See L<EXAMPLES>
  above.
  
-The C<Useqq> flag makes Dump() run slower, since the XSUB implementation
-does not support it.
+The C<Useqq> and C<Deparse> flags makes Dump() run slower, since the
+XSUB implementation does not support them.
  
  SCALAR objects have the weirdest looking C<bless> workaround.
  
+Pure Perl version of C<Data::Dumper> escapes UTF-8 strings correctly
+only in Perl 5.8.0 and later.
+
+=head2 NOTE
+
+Starting from Perl 5.8.1 different runs of Perl will have different
+ordering of hash keys.  The change was done for greater security,
+see L<perlsec/"Algorithmic Complexity Attacks">.  This means that
+different runs of Perl will have different Data::Dumper outputs if
+the data contains hashes.  If you need to have identical Data::Dumper
+outputs from different runs of Perl, use the environment variable
+PERL_HASH_SEED, see L<perlrun/PERL_HASH_SEED>.  Using this restores
+the old (platform-specific) ordering: an even prettier solution might
+be to use the C<Sortkeys> filter of Data::Dumper.
  
  =head1 AUTHOR
  
@@ -1037,10 +1238,9 @@ Copyright (c) 1996-98 Gurusamy Sarathy. All rights reserved.
  This program is free software; you can redistribute it and/or
  modify it under the same terms as Perl itself.
  
-
  =head1 VERSION
  
-Version 2.11   (unreleased)
+Version 2.121  (Aug 24 2003)
  
  =head1 SEE ALSO