This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
lib/dumpvar.pl: Generalize for non-ASCII platforms
authorKarl Williamson <khw@cpan.org>
Sun, 2 Nov 2014 03:10:48 +0000 (21:10 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 6 Mar 2015 04:48:24 +0000 (21:48 -0700)
lib/dumpvar.pl

index 91153ea..b2f3798 100644 (file)
@@ -14,6 +14,8 @@ package dumpvar;
 
 $winsize = 80 unless defined $winsize;
 
+sub ASCII { return ord('A') == 65; }
+
 
 # Defaults
 
@@ -25,6 +27,9 @@ $subdump = 1;
 $dumpReused = 0 unless defined $dumpReused;
 $bareStringify = 1 unless defined $bareStringify;
 
+my $APC = chr utf8::unicode_to_native(0x9F);
+my $backslash_c_question = (ASCII) ? '\177' : $APC;
+
 sub main::dumpValue {
   local %address;
   local $^W=0;
@@ -41,12 +46,8 @@ sub unctrl {
        local($v) ; 
 
        return \$_ if ref \$_ eq "GLOB";
-        if (ord('A') == 193) { # EBCDIC.
-           # EBCDIC has no concept of "\cA" or "A" being related
-           # to each other by a linear/boolean mapping.
-       } else {
-           s/([\001-\037\177])/'^'.pack('c',ord($1)^64)/eg;
-       }
+        s/([\000-\037])/ '^' . chr(utf8::unicode_to_native(ord($1)^64))/eg;
+        s/ $backslash_c_question /^?/xg;
        return $_;
     }
 }
@@ -54,7 +55,7 @@ sub unctrl {
 sub uniescape {
     join("",
         map { $_ > 255 ? sprintf("\\x{%04X}", $_) : chr($_) }
-            unpack("U*", $_[0]));
+            unpack("W*", $_[0]));
 }
 
 sub stringify {
@@ -79,39 +80,27 @@ sub _stringify {
            and %overload:: and defined &{'overload::StrVal'};
        
        if ($tick eq 'auto') {
-           if (ord('A') == 193) {
-               if (/[\000-\011]/ or /[\013-\024\31-\037\177]/) {
-                   $tick = '"';
-               } else {
-                   $tick = "'";
-               }
-            }  else {
-               if (/[\000-\011\013-\037\177]/) {
-                   $tick = '"';
-               } else {
-                   $tick = "'";
-               }
-           }
+            if (/[^[:^cntrl:]\n]/u) {   # All controls but \n get '"'
+                $tick = '"';
+            } else {
+                $tick = "'";
+            }
        }
        if ($tick eq "'") {
          s/([\'\\])/\\$1/g;
        } elsif ($unctrl eq 'unctrl') {
          s/([\"\\])/\\$1/g ;
-         s/([\000-\037\177])/'^'.pack('c',ord($1)^64)/eg;
+          $_ = &unctrl($_);
          # uniescape?
-         s/([\200-\377])/'\\0x'.sprintf('%2X',ord($1))/eg 
+         s/([[:^ascii:]])/'\\0x'.sprintf('%2X',ord($1))/eg
            if $quoteHighBit;
        } elsif ($unctrl eq 'quote') {
          s/([\"\\\$\@])/\\$1/g if $tick eq '"';
-         s/\033/\\e/g;
-         if (ord('A') == 193) { # EBCDIC.
-             s/([\000-\037\177])/'\\c'.chr(193)/eg; # Unfinished.
-         } else {
-             s/([\000-\037\177])/'\\c'._escaped_ord($1)/eg;
-         }
+         s/\e/\\e/g;
+          s/([\000-\037$backslash_c_question])/'\\c'._escaped_ord($1)/eg;
        }
        $_ = uniescape($_);
-       s/([\200-\377])/'\\'.sprintf('%3o',ord($1))/eg if $quoteHighBit;
+       s/([[:^ascii:]])/'\\'.sprintf('%3o',ord($1))/eg if $quoteHighBit;
        return ($noticks || /^\d+(\.\d*)?\Z/) 
          ? $_ 
          : $tick . $_ . $tick;
@@ -121,8 +110,13 @@ sub _stringify {
 # Ensure a resulting \ is escaped to be \\
 sub _escaped_ord {
     my $chr = shift;
-    $chr = chr(ord($chr)^64);
-    $chr =~ s{\\}{\\\\}g;
+    if ($chr eq $backslash_c_question) {
+        $chr = '?';
+    }
+    else {
+        $chr = chr(utf8::unicode_to_native(ord($chr)^64));
+        $chr =~ s{\\}{\\\\}g;
+    }
     return $chr;
 }