fix problems from Carp's partial EBCDIC support

author Zefram <zefram@fysh.org>

Thu, 20 Jul 2017 03:00:30 +0000 (04:00 +0100)

committer Zefram <zefram@fysh.org>

Thu, 20 Jul 2017 03:00:30 +0000 (04:00 +0100)
author Zefram <zefram@fysh.org>
Thu, 20 Jul 2017 03:00:30 +0000 (04:00 +0100)
committer Zefram <zefram@fysh.org>
Thu, 20 Jul 2017 03:00:30 +0000 (04:00 +0100)
diff --git a/dist/Carp/Changes b/dist/Carp/Changes

index dca6a52..0498eeb 100644 (file)
--- a/dist/Carp/Changes
+++ b/dist/Carp/Changes
@@ -1,3 +1,17 @@
+version 1.43
+
+  * fix problems introduced by the partial EBCDIC support from version
+    1.35
+
+version 1.42
+
+  * add some doc clue about what cluck does
+
+  * avoid floating point overflow in test
+
+version 1.41
+
+  * add missing "<FH> chunk #" phrase to messages
  
  version 1.40; 2016-03-10
    * Get arg_string.t to compile in perl v5.6
diff --git a/dist/Carp/lib/Carp.pm b/dist/Carp/lib/Carp.pm

index 05052b9..6127b26 100644 (file)
--- a/dist/Carp/lib/Carp.pm
+++ b/dist/Carp/lib/Carp.pm
@@ -87,7 +87,36 @@ BEGIN {
      }
  }
  
-our $VERSION = '1.42';
+# is_safe_printable_codepoint() indicates whether a character, specified
+# by integer codepoint, is OK to output literally in a trace.  Generally
+# this is if it is a printable character in the ancestral character set
+# (ASCII or EBCDIC).  This is used on some Perls in situations where a
+# regexp can't be used.
+BEGIN {
+    *is_safe_printable_codepoint =
+       "$]" >= 5.007_003 ?
+           eval(q(sub ($) {
+               my $u = utf8::native_to_unicode($_[0]);
+               $u >= 0x20 && $u <= 0x7e;
+           }))
+       : ord("A") == 65 ?
+           sub ($) { $_[0] >= 0x20 && $_[0] <= 0x7e }
+       :
+           sub ($) {
+               # Early EBCDIC
+               # 3 EBCDIC code pages supported then;  all controls but one
+               # are the code points below SPACE.  The other one is 0x5F on
+               # POSIX-BC; FF on the other two.
+               # FIXME: there are plenty of unprintable codepoints other
+               # than those that this code and the comment above identifies
+               # as "controls".
+               $_[0] >= ord(" ") && $_[0] <= 0xff &&
+                   $_[0] != (ord ("^") == 106 ? 0x5f : 0xff);
+           }
+       ;
+}
+
+our $VERSION = '1.43';
  $VERSION =~ tr/_//d;
  
  our $MaxEvalLen = 0;
@@ -300,32 +329,15 @@ sub format_arg {
                 next;
             }
             my $o = ord($c);
-
-            # This code is repeated in Regexp::CARP_TRACE()
-            if ($] ge 5.007_003) {
-                substr $arg, $i, 1, sprintf("\\x{%x}", $o)
-                 if utf8::native_to_unicode($o) < utf8::native_to_unicode(0x20)
-                  || utf8::native_to_unicode($o) > utf8::native_to_unicode(0x7e);
-            } elsif (ord("A") == 65) {
-                substr $arg, $i, 1, sprintf("\\x{%x}", $o)
-                    if $o < 0x20 || $o > 0x7e;
-            } else { # Early EBCDIC
-
-                # 3 EBCDIC code pages supported then;  all controls but one
-                # are the code points below SPACE.  The other one is 0x5F on
-                # POSIX-BC; FF on the other two.
-                substr $arg, $i, 1, sprintf("\\x{%x}", $o)
-                    if $o < ord(" ") || ((ord ("^") == 106)
-                                          ? $o == 0x5f
-                                          : $o == 0xff);
-            }
+           substr $arg, $i, 1, sprintf("\\x{%x}", $o)
+               unless is_safe_printable_codepoint($o);
         }
      } else {
         $arg =~ s/([\"\\\$\@])/\\$1/g;
          # This is all the ASCII printables spelled-out.  It is portable to all
          # Perl versions and platforms (such as EBCDIC).  There are other more
          # compact ways to do this, but may not work everywhere every version.
-        $arg =~ s/([^ !"\$\%#'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
+        $arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
      }
      downgrade($arg, 1);
      return "\"".$arg."\"".$suffix;
@@ -338,25 +350,12 @@ sub Regexp::CARP_TRACE {
         for(my $i = length($arg); $i--; ) {
             my $o = ord(substr($arg, $i, 1));
             my $x = substr($arg, 0, 0);   # work around bug on Perl 5.8.{1,2}
-
-            # This code is repeated in format_arg()
-            if ($] ge 5.007_003) {
-                substr $arg, $i, 1, sprintf("\\x{%x}", $o)
-                 if utf8::native_to_unicode($o) < utf8::native_to_unicode(0x20)
-                  || utf8::native_to_unicode($o) > utf8::native_to_unicode(0x7e);
-            } elsif (ord("A") == 65) {
-                substr $arg, $i, 1, sprintf("\\x{%x}", $o)
-                    if $o < 0x20 || $o > 0x7e;
-            } else { # Early EBCDIC
-                substr $arg, $i, 1, sprintf("\\x{%x}", $o)
-                    if $o < ord(" ") || ((ord ("^") == 106)
-                                          ? $o == 0x5f
-                                          : $o == 0xff);
-            }
+           substr $arg, $i, 1, sprintf("\\x{%x}", $o)
+               unless is_safe_printable_codepoint($o);
         }
      } else {
          # See comment in format_arg() about this same regex.
-        $arg =~ s/([^ !"\$\%#'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
+        $arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
      }
      downgrade($arg, 1);
      my $suffix = "";
diff --git a/dist/Carp/lib/Carp/Heavy.pm b/dist/Carp/lib/Carp/Heavy.pm

index f9c584a..4b8cbe1 100644 (file)
--- a/dist/Carp/lib/Carp/Heavy.pm
+++ b/dist/Carp/lib/Carp/Heavy.pm
@@ -2,7 +2,7 @@ package Carp::Heavy;
  
  use Carp ();
  
-our $VERSION = '1.42';
+our $VERSION = '1.43';
  $VERSION =~ tr/_//d;
  
  # Carp::Heavy was merged into Carp in version 1.12.  Any mismatched versions
diff --git a/dist/Carp/t/arg_regexp.t b/dist/Carp/t/arg_regexp.t

index 1575b29..83e8f03 100644 (file)
--- a/dist/Carp/t/arg_regexp.t
+++ b/dist/Carp/t/arg_regexp.t
@@ -1,6 +1,8 @@
  use warnings;
  use strict;
  
+# confirm that regexp-typed stack args are displayed correctly by longmess()
+
  use Test::More tests => 42;
  
  use Carp ();
@@ -16,12 +18,14 @@ my $e9 = sprintf "%02x", (($] ge 5.007_003)
                            : ((ord("A") == 193)
                               ? 0x51
                               : 0xE9));
-my $chr_e9 = chr eval "0x$e9";
+my $xe9 = "\\x$e9";
+my $chr_e9 = eval "\"$xe9\"";
  my $nl_as_hex = sprintf "%x", ord("\n");
  
  # On Perl 5.6 we accept some incorrect quoting of Unicode characters,
  # because upgradedness of regexps isn't preserved by stringification,
  # so it's impossible to implement the correct behaviour.
+# FIXME: the permissive patterns don't account for EBCDIC
  my $xe9_rx = "$]" < 5.008 ? qr/\\x\{c3\}\\x\{a9\}|\\x\{e9\}/ : qr/\\x\{$e9\}/;
  my $x666_rx = "$]" < 5.008 ? qr/\\x\{d9\}\\x\{a6\}|\\x\{666\}/ : qr/\\x\{666\}/;
  my $x2603_rx = "$]" < 5.008 ? qr/\\x\{e2\}\\x\{98\}\\x\{83\}|\\x\{2603\}/ : qr/\\x\{2603\}/;
@@ -41,16 +45,10 @@ like lm(qr/\x{666}b/), qr/main::lm\(qr\(\\x\{666\}b\)u?\)/;
  like lm(rx("\x{666}b")), qr/main::lm\(qr\(${x666_rx}b\)u?\)/;
  like lm(qr/a\x{666}/), qr/main::lm\(qr\(a\\x\{666\}\)u?\)/;
  like lm(rx("a\x{666}")), qr/main::lm\(qr\(a${x666_rx}\)u?\)/;
-like lm(qr/L${chr_e9}on/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on\)u?\)/;
+like lm(qr/L${xe9}on/), qr/main::lm\(qr\(L\\x${e9}on\)u?\)/;
  like lm(rx("L${chr_e9}on")), qr/main::lm\(qr\(L${xe9_rx}on\)u?\)/;
-
-
-SKIP: {
-    skip "wide-character-related bug in pre-5.18 perls", 2 if $] lt 5.017_001;
-
-    like lm(qr/L${chr_e9}on \x{2603} !/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on \\x\{2603\} !\)u?\)/;
-    like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L${xe9_rx}on ${x2603_rx} !\)u?\)/;
-}
+like lm(qr/L${xe9}on \x{2603} !/), qr/main::lm\(qr\(L\\x${e9}on \\x\{2603\} !\)u?\)/;
+like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L${xe9_rx}on ${x2603_rx} !\)u?\)/;
  
  $Carp::MaxArgLen = 5;
  foreach my $arg ("foo bar baz", "foo bar ba", "foo bar b", "foo bar ", "foo bar", "foo ba") {
@@ -60,16 +58,10 @@ foreach my $arg ("foo b", "foo ", "foo", "fo", "f", "") {
      like lm(rx($arg)), qr/main::lm\(qr\(\Q$arg\E\)u?\)/;
  }
  like lm(qr/foo.bar$/sm), qr/main::lm\(qr\(fo\)\.\.\.u?ms\)/;
-
-SKIP: {
-    skip "wide-character-related bug in pre-5.18 perls", 4 if $] lt 5.017_001;
-
-    like lm(qr/L${chr_e9}on \x{2603} !/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
-    like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
-    like lm(qr/L${chr_e9}on\x{2603}/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
-    like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
-}
-
+like lm(qr/L${xe9}on \x{2603} !/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(qr/L${xe9}on\x{2603}/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
  like lm(qr/foo\x{2603}/), qr/main::lm\(qr\(fo\)\.\.\.u?\)/;
  like lm(rx("foo\x{2603}")), qr/main::lm\(qr\(fo\)\.\.\.u?\)/;
  
@@ -77,12 +69,7 @@ $Carp::MaxArgLen = 0;
  foreach my $arg ("wibble:" x 20, "foo bar baz") {
      like lm(rx($arg)), qr/main::lm\(qr\(\Q$arg\E\)u?\)/;
  }
-
-SKIP: {
-    skip "wide-character-related bug in pre-5.18 perls", 2 if $] lt 5.017_001;
-
-    like lm(qr/L${chr_e9}on\x{2603}/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on\\x\{2603\}\)u?\)/;
-    like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L${xe9_rx}on${x2603_rx}\)u?\)/;
-}
+like lm(qr/L${xe9}on\x{2603}/), qr/main::lm\(qr\(L\\x${e9}on\\x\{2603\}\)u?\)/;
+like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L${xe9_rx}on${x2603_rx}\)u?\)/;
  
  1;
diff --git a/dist/Carp/t/arg_string.t b/dist/Carp/t/arg_string.t

index dc70f43..544a4fe 100644 (file)
--- a/dist/Carp/t/arg_string.t
+++ b/dist/Carp/t/arg_string.t
@@ -1,9 +1,9 @@
  use warnings;
  use strict;
  
-# confirm that stack args are displayed correctly by longmess()
+# confirm that string-typed stack args are displayed correctly by longmess()
  
-use Test::More tests => 32;
+use Test::More tests => 33;
  
  use Carp ();
  
@@ -17,7 +17,8 @@ my $e9 = sprintf "%02x", (($] ge 5.007_003)
                            : ((ord("A") == 193)
                               ? 0x51
                               : 0xE9));
-my $chr_e9 = chr eval "0x$e9";
+my $xe9 = "\\x$e9";
+my $chr_e9 = eval "\"$xe9\"";
  my $nl_as_hex = sprintf "%x", ord("\n");
  
  like lm(3), qr/main::lm\(3\)/;
@@ -33,9 +34,9 @@ like lm(-3.5e30),
                \) /x;
  like lm(""), qr/main::lm\(""\)/;
  like lm("foo"), qr/main::lm\("foo"\)/;
+like lm("a&b"), qr/main::lm\("a&b"\)/;
  like lm("a\$b\@c\\d\"e"), qr/main::lm\("a\\\$b\\\@c\\\\d\\\"e"\)/;
  like lm("a\nb"), qr/main::lm\("a\\x\{$nl_as_hex\}b"\)/;
-
  like lm("a\x{666}b"), qr/main::lm\("a\\x\{666\}b"\)/;
  like lm("\x{666}b"), qr/main::lm\("\\x\{666\}b"\)/;
  like lm("a\x{666}"), qr/main::lm\("a\\x\{666\}"\)/;
author	Zefram <zefram@fysh.org>
	Thu, 20 Jul 2017 03:00:30 +0000 (04:00 +0100)
committer	Zefram <zefram@fysh.org>
	Thu, 20 Jul 2017 03:00:30 +0000 (04:00 +0100)
dist/Carp/Changes		patch \| blob \| blame \| history
dist/Carp/lib/Carp.pm		patch \| blob \| blame \| history
dist/Carp/lib/Carp/Heavy.pm		patch \| blob \| blame \| history
dist/Carp/t/arg_regexp.t		patch \| blob \| blame \| history
dist/Carp/t/arg_string.t		patch \| blob \| blame \| history