Commit
975fe8546427b5f6259103912b13925be148becd introduced partial EBCDIC
support to Carp, but simultaneously introduced some bugs into the module
and the tests. Multiple issues are addressed in this commit:
* The main check for whether a character needs a non-literal
representation when dumping a string or regexp argument, which used
to be a regexp character range [ -~], was expanded to an explicit
character set not using range syntax, but in the expansion the "&"
was omitted. This caused unwanted \x representation of any "&" in an
argument in a stack trace. Add the "&" back in and fix the sorting
of the character set.
* The substitute version of this check for Perls on which Carp can't
safely apply a regexp to an upgraded string, but new enough to have
utf8::native_to_unicode(), was applying that function to some fixed
codepoint values that were already Unicode codepoints. Remove those
calls, and compare the fixed codepoints directly to codepoints correctly
converted through that function.
* That version of the check, by referring to utf8::native_to_unicode()
directly in source that is always compiled, caused the utf8:: stash to
be vivified on Perl 5.6, causing havoc (and failed tests). Hide that
version of the check behind a (compile-time) string eval.
* Another version of the printability check, for EBCDIC on Perl 5.6,
treated as printable any codepoint above 0xff. Change that to correctly
treat all such codepoints as not safely printable.
* Some tests in t/arg_regexp.t which were originally about non-ASCII
characters specified in a regexp by using \x regexp syntax got changed
to use the non-ASCII characters literally at the regexp syntax level
(by interpolating them from a constructed string). Restore these to
using \x syntax, with the appropriate variability of the hex digits.
* Add a couple of "fixme" comments about parts of the EBCDIC support
that are incomplete.
* Some tests involving non-ASCII characters were later made to skip on
any Perl prior to 5.17.1. In practice they work fine on earlier Perls,
and they're fairly important. Suspect that the problem that led to
the skipping being added was dependent on the tests having been broken
as described above, so remove the skipping logic.
* Incidentally, correct a comment about the purpose of t/arg_string.t
and add a similar one to t/arg_regexp.t.
* Incidentally, add Changes entries for versions 1.41 and 1.42, which
were omitted when those changes were made.
+version 1.43
+
+ * fix problems introduced by the partial EBCDIC support from version
+ 1.35
+
+version 1.42
+
+ * add some doc clue about what cluck does
+
+ * avoid floating point overflow in test
+
+version 1.41
+
+ * add missing "<FH> chunk #" phrase to messages
version 1.40; 2016-03-10
* Get arg_string.t to compile in perl v5.6
}
}
-our $VERSION = '1.42';
+# is_safe_printable_codepoint() indicates whether a character, specified
+# by integer codepoint, is OK to output literally in a trace. Generally
+# this is if it is a printable character in the ancestral character set
+# (ASCII or EBCDIC). This is used on some Perls in situations where a
+# regexp can't be used.
+BEGIN {
+ *is_safe_printable_codepoint =
+ "$]" >= 5.007_003 ?
+ eval(q(sub ($) {
+ my $u = utf8::native_to_unicode($_[0]);
+ $u >= 0x20 && $u <= 0x7e;
+ }))
+ : ord("A") == 65 ?
+ sub ($) { $_[0] >= 0x20 && $_[0] <= 0x7e }
+ :
+ sub ($) {
+ # Early EBCDIC
+ # 3 EBCDIC code pages supported then; all controls but one
+ # are the code points below SPACE. The other one is 0x5F on
+ # POSIX-BC; FF on the other two.
+ # FIXME: there are plenty of unprintable codepoints other
+ # than those that this code and the comment above identifies
+ # as "controls".
+ $_[0] >= ord(" ") && $_[0] <= 0xff &&
+ $_[0] != (ord ("^") == 106 ? 0x5f : 0xff);
+ }
+ ;
+}
+
+our $VERSION = '1.43';
$VERSION =~ tr/_//d;
our $MaxEvalLen = 0;
next;
}
my $o = ord($c);
-
- # This code is repeated in Regexp::CARP_TRACE()
- if ($] ge 5.007_003) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if utf8::native_to_unicode($o) < utf8::native_to_unicode(0x20)
- || utf8::native_to_unicode($o) > utf8::native_to_unicode(0x7e);
- } elsif (ord("A") == 65) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < 0x20 || $o > 0x7e;
- } else { # Early EBCDIC
-
- # 3 EBCDIC code pages supported then; all controls but one
- # are the code points below SPACE. The other one is 0x5F on
- # POSIX-BC; FF on the other two.
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < ord(" ") || ((ord ("^") == 106)
- ? $o == 0x5f
- : $o == 0xff);
- }
+ substr $arg, $i, 1, sprintf("\\x{%x}", $o)
+ unless is_safe_printable_codepoint($o);
}
} else {
$arg =~ s/([\"\\\$\@])/\\$1/g;
# This is all the ASCII printables spelled-out. It is portable to all
# Perl versions and platforms (such as EBCDIC). There are other more
# compact ways to do this, but may not work everywhere every version.
- $arg =~ s/([^ !"\$\%#'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
+ $arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
}
downgrade($arg, 1);
return "\"".$arg."\"".$suffix;
for(my $i = length($arg); $i--; ) {
my $o = ord(substr($arg, $i, 1));
my $x = substr($arg, 0, 0); # work around bug on Perl 5.8.{1,2}
-
- # This code is repeated in format_arg()
- if ($] ge 5.007_003) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if utf8::native_to_unicode($o) < utf8::native_to_unicode(0x20)
- || utf8::native_to_unicode($o) > utf8::native_to_unicode(0x7e);
- } elsif (ord("A") == 65) {
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < 0x20 || $o > 0x7e;
- } else { # Early EBCDIC
- substr $arg, $i, 1, sprintf("\\x{%x}", $o)
- if $o < ord(" ") || ((ord ("^") == 106)
- ? $o == 0x5f
- : $o == 0xff);
- }
+ substr $arg, $i, 1, sprintf("\\x{%x}", $o)
+ unless is_safe_printable_codepoint($o);
}
} else {
# See comment in format_arg() about this same regex.
- $arg =~ s/([^ !"\$\%#'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
+ $arg =~ s/([^ !"#\$\%\&'()*+,\-.\/0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\\\]^_`abcdefghijklmnopqrstuvwxyz\{|}~])/sprintf("\\x{%x}",ord($1))/eg;
}
downgrade($arg, 1);
my $suffix = "";
use Carp ();
-our $VERSION = '1.42';
+our $VERSION = '1.43';
$VERSION =~ tr/_//d;
# Carp::Heavy was merged into Carp in version 1.12. Any mismatched versions
use warnings;
use strict;
+# confirm that regexp-typed stack args are displayed correctly by longmess()
+
use Test::More tests => 42;
use Carp ();
: ((ord("A") == 193)
? 0x51
: 0xE9));
-my $chr_e9 = chr eval "0x$e9";
+my $xe9 = "\\x$e9";
+my $chr_e9 = eval "\"$xe9\"";
my $nl_as_hex = sprintf "%x", ord("\n");
# On Perl 5.6 we accept some incorrect quoting of Unicode characters,
# because upgradedness of regexps isn't preserved by stringification,
# so it's impossible to implement the correct behaviour.
+# FIXME: the permissive patterns don't account for EBCDIC
my $xe9_rx = "$]" < 5.008 ? qr/\\x\{c3\}\\x\{a9\}|\\x\{e9\}/ : qr/\\x\{$e9\}/;
my $x666_rx = "$]" < 5.008 ? qr/\\x\{d9\}\\x\{a6\}|\\x\{666\}/ : qr/\\x\{666\}/;
my $x2603_rx = "$]" < 5.008 ? qr/\\x\{e2\}\\x\{98\}\\x\{83\}|\\x\{2603\}/ : qr/\\x\{2603\}/;
like lm(rx("\x{666}b")), qr/main::lm\(qr\(${x666_rx}b\)u?\)/;
like lm(qr/a\x{666}/), qr/main::lm\(qr\(a\\x\{666\}\)u?\)/;
like lm(rx("a\x{666}")), qr/main::lm\(qr\(a${x666_rx}\)u?\)/;
-like lm(qr/L${chr_e9}on/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on\)u?\)/;
+like lm(qr/L${xe9}on/), qr/main::lm\(qr\(L\\x${e9}on\)u?\)/;
like lm(rx("L${chr_e9}on")), qr/main::lm\(qr\(L${xe9_rx}on\)u?\)/;
-
-
-SKIP: {
- skip "wide-character-related bug in pre-5.18 perls", 2 if $] lt 5.017_001;
-
- like lm(qr/L${chr_e9}on \x{2603} !/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on \\x\{2603\} !\)u?\)/;
- like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L${xe9_rx}on ${x2603_rx} !\)u?\)/;
-}
+like lm(qr/L${xe9}on \x{2603} !/), qr/main::lm\(qr\(L\\x${e9}on \\x\{2603\} !\)u?\)/;
+like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L${xe9_rx}on ${x2603_rx} !\)u?\)/;
$Carp::MaxArgLen = 5;
foreach my $arg ("foo bar baz", "foo bar ba", "foo bar b", "foo bar ", "foo bar", "foo ba") {
like lm(rx($arg)), qr/main::lm\(qr\(\Q$arg\E\)u?\)/;
}
like lm(qr/foo.bar$/sm), qr/main::lm\(qr\(fo\)\.\.\.u?ms\)/;
-
-SKIP: {
- skip "wide-character-related bug in pre-5.18 perls", 4 if $] lt 5.017_001;
-
- like lm(qr/L${chr_e9}on \x{2603} !/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
- like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
- like lm(qr/L${chr_e9}on\x{2603}/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
- like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
-}
-
+like lm(qr/L${xe9}on \x{2603} !/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(rx("L${chr_e9}on \x{2603} !")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(qr/L${xe9}on\x{2603}/), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
+like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L\\\)\.\.\.u?\)/;
like lm(qr/foo\x{2603}/), qr/main::lm\(qr\(fo\)\.\.\.u?\)/;
like lm(rx("foo\x{2603}")), qr/main::lm\(qr\(fo\)\.\.\.u?\)/;
foreach my $arg ("wibble:" x 20, "foo bar baz") {
like lm(rx($arg)), qr/main::lm\(qr\(\Q$arg\E\)u?\)/;
}
-
-SKIP: {
- skip "wide-character-related bug in pre-5.18 perls", 2 if $] lt 5.017_001;
-
- like lm(qr/L${chr_e9}on\x{2603}/), qr/main::lm\(qr\(L\\x\{?${e9}\}?on\\x\{2603\}\)u?\)/;
- like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L${xe9_rx}on${x2603_rx}\)u?\)/;
-}
+like lm(qr/L${xe9}on\x{2603}/), qr/main::lm\(qr\(L\\x${e9}on\\x\{2603\}\)u?\)/;
+like lm(rx("L${chr_e9}on\x{2603}")), qr/main::lm\(qr\(L${xe9_rx}on${x2603_rx}\)u?\)/;
1;
use warnings;
use strict;
-# confirm that stack args are displayed correctly by longmess()
+# confirm that string-typed stack args are displayed correctly by longmess()
-use Test::More tests => 32;
+use Test::More tests => 33;
use Carp ();
: ((ord("A") == 193)
? 0x51
: 0xE9));
-my $chr_e9 = chr eval "0x$e9";
+my $xe9 = "\\x$e9";
+my $chr_e9 = eval "\"$xe9\"";
my $nl_as_hex = sprintf "%x", ord("\n");
like lm(3), qr/main::lm\(3\)/;
\) /x;
like lm(""), qr/main::lm\(""\)/;
like lm("foo"), qr/main::lm\("foo"\)/;
+like lm("a&b"), qr/main::lm\("a&b"\)/;
like lm("a\$b\@c\\d\"e"), qr/main::lm\("a\\\$b\\\@c\\\\d\\\"e"\)/;
like lm("a\nb"), qr/main::lm\("a\\x\{$nl_as_hex\}b"\)/;
-
like lm("a\x{666}b"), qr/main::lm\("a\\x\{666\}b"\)/;
like lm("\x{666}b"), qr/main::lm\("\\x\{666\}b"\)/;
like lm("a\x{666}"), qr/main::lm\("a\\x\{666\}"\)/;