From 4b6af431de82c30fc8df0d5d024cf77f6512e8b9 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 14 Mar 2015 21:50:27 -0600 Subject: [PATCH] Create single fcn for dup'd /lib code Several /lib .pm's have the same code which is complicated enough to warrant being placed in a shared function. This commit creates a .pm to be used by these .pm's. This implements the perhaps archaic 'Meta' notation wherein characters above 0x7f are displayed as M- plus the ASCII-range character derived by looking at only the lower 7 bits of the upper range one. There are problems with this, in that a literal control character can be in the string, whereas it is trying to get rid of control characters. But I left it to work as-is, just centralizing the code. On EBCDIC platforms this notation makes no sense because the bit patterns are all mixed up about having the upper bit set. So this commit fixes things on these platforms, so these are changed to \x{...}. No literal control characters are emitted. Another potential problem is that characters above 0xFF are passed through, unchanged. But again, I let the existing behavior stand. --- MANIFEST | 2 ++ Porting/Maintainers.pl | 1 + lib/DB.pm | 6 +++--- lib/meta_notation.pm | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/meta_notation.t | 40 +++++++++++++++++++++++++++++++++++++ lib/perl5db.pl | 11 +++++----- lib/sigtrap.pm | 6 +++--- 7 files changed, 108 insertions(+), 12 deletions(-) create mode 100644 lib/meta_notation.pm create mode 100644 lib/meta_notation.t diff --git a/MANIFEST b/MANIFEST index f874063..9fb277b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4182,6 +4182,8 @@ lib/less.pm For "use less" lib/less.t See if less support works lib/locale.pm For "use locale" lib/locale.t See if locale support works +lib/meta_notation.pm Helper for certain /lib .pm's +lib/meta_notation.t See if meta_notation.t works lib/Net/hostent.pm By-name interface to Perl's builtin gethost* lib/Net/hostent.t See if Net::hostent works lib/Net/netent.pm By-name interface to Perl's builtin getnet* diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index b0e3ea4..a21e843 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -1368,6 +1368,7 @@ use File::Glob qw(:case); lib/FindBin.{pm,t} lib/Getopt/Std.{pm,t} lib/Internals.t + lib/meta_notation.{pm,t} lib/Net/hostent.{pm,t} lib/Net/netent.{pm,t} lib/Net/protoent.{pm,t} diff --git a/lib/DB.pm b/lib/DB.pm index fd0ff92..404c57c 100644 --- a/lib/DB.pm +++ b/lib/DB.pm @@ -41,7 +41,7 @@ BEGIN { $DB::subname = ''; # currently executing sub (fully qualified name) $DB::lineno = ''; # current line number - $DB::VERSION = $DB::VERSION = '1.07'; + $DB::VERSION = $DB::VERSION = '1.08'; # initialize private globals to avoid warnings @@ -244,8 +244,8 @@ sub backtrace { for (@a) { s/'/\\'/g; s/([^\0]*)/'$1'/ unless /^-?[\d.]+$/; - s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg; - s/([\0-\37\177])/sprintf("^%c",ord($1)^64)/eg; + require 'meta_notation.pm'; + $_ = _meta_notation($_) if /[[:^print:]]/a; } $w = $w ? '@ = ' : '$ = '; $a = $h ? '(' . join(', ', @a) . ')' : ''; diff --git a/lib/meta_notation.pm b/lib/meta_notation.pm new file mode 100644 index 0000000..2f85cd3 --- /dev/null +++ b/lib/meta_notation.pm @@ -0,0 +1,54 @@ +use strict; +use warnings; + +# A tiny private library routine which is a helper to several Perl core +# modules, to allow a paradigm to be implemented in a single place. The name, +# contents, or even the existence of this file may be changed at any time and +# are NOT to be used by anthing outside the Perl core. + +sub _meta_notation ($) { + + # Returns a copy of the input string with the nonprintable characters + # below 0x100 changed into printables. Any ASCII printables or above 0xFF + # are unchanged. (XXX Probably above-Latin1 characters should be + # converted to \X{...}) + # + # \0 .. \x1F (which are "\c@" .. "\c_") are changed into ^@, ^A, ^B, ... + # ^Z, ^[, ^\, ^], ^^, ^_ + # \c? is changed into ^?. + # + # The above accounts for all the ASCII-range nonprintables. + # + # On ASCII platforms, the upper-Latin1-range characters are converted to + # Meta notation, so that \xC1 becomes 'M-A', \xE2 becomes 'M-b', etc. + # This is how it always has worked, so is continued that way for backwards + # compatibility. XXX Wrong, but the way it has always worked is that \x80 + # .. \x9F are converted to M- followed by a literal control char. This + # probably has escaped attention due to the limited domains this code has + # been applied to. ext/SDBM_File/dbu.c does this right. + # + # On EBCDIC platforms, the upper-Latin1-range characters are converted + # into '\x{...}' Meta notation doesn't make sense on EBCDIC platforms + # because the ASCII-range printables are a mixture of upper bit set or + # not. [A-Za-Z0-9] all have the upper bit set. The underscore likely + # doesn't; and other punctuation may or may not. There's no simple + # pattern. + + my $string = shift; + + $string =~ s/([\0-\037])/ + sprintf("^%c",utf8::unicode_to_native(ord($1)^64))/xeg; + $string =~ s/\c?/^?/g; + if (ord("A") == 65) { + $string =~ s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg; + } + else { + no warnings 'experimental::regex_sets'; + # Leave alone things above \xff + $string =~ s/( (?[ [\x00-\xFF] & [:^print:]])) / + sprintf("\\x{%X}", ord($1))/xaeg; + } + + return $string; +} +1 diff --git a/lib/meta_notation.t b/lib/meta_notation.t new file mode 100644 index 0000000..d89d50c --- /dev/null +++ b/lib/meta_notation.t @@ -0,0 +1,40 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +use strict; +use warnings; + +eval "require 'meta_notation.pm'"; +if ($@) { + fail("Could not find 'meta_notation.pm'"); +} +else { + + is(_meta_notation("\007\010\011\c?Z\x{103}"), "^G^H^I^?Z\x{103}"); + + if ($::IS_ASCII || $::IS_ASCII) { + is(_meta_notation("\x{c1}\x{e2}"), 'M-AM-b'); + is(_meta_notation("\x{df}"), 'M-_'); + } + else { # EBCDIC platform + # In the first iteration we are looking for a non-ASCII control; in + # the second, a regular non-ASCII character. SPACE marks the end of + # most controls. We test each to see that they are properly converted + # to \x{...} + foreach my $start (0x20, ord " ") { + for (my $i = $start; $i < 256; $i++) { + my $char = chr $i; + next if $char =~ /[[:ascii:]]/; + is(_meta_notation($char), sprintf("\\x{%X}", $i)); + last; + } + } + } +} + +done_testing(); diff --git a/lib/perl5db.pl b/lib/perl5db.pl index e0989db..47b9f4a 100644 --- a/lib/perl5db.pl +++ b/lib/perl5db.pl @@ -528,7 +528,7 @@ BEGIN { # Debugger for Perl 5.00x; perl5db.pl patch level: use vars qw($VERSION $header); -$VERSION = '1.47'; +$VERSION = '1.48'; $header = "perl5db.pl version $VERSION"; @@ -6540,11 +6540,10 @@ sub _dump_trace_calc_saved_single_arg s/(.*)/'$1'/s unless /^(?: -?[\d.]+ | \*[\w:]* )$/x; - # Turn high-bit characters into meta-whatever. - s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg; - - # Turn control characters into ^-whatever. - s/([\0-\37\177])/sprintf("^%c",ord($1)^64)/eg; + # Turn high-bit characters into meta-whatever, and controls into like + # '^D'. + require 'meta_notation.pm'; + $_ = _meta_notation($_) if /[[:^print:]]/a; return $_; } diff --git a/lib/sigtrap.pm b/lib/sigtrap.pm index df728c8..7d80146 100644 --- a/lib/sigtrap.pm +++ b/lib/sigtrap.pm @@ -8,7 +8,7 @@ sigtrap - Perl pragma to enable simple signal handling use Carp; -$VERSION = 1.07; +$VERSION = 1.08; $Verbose ||= 0; sub import { @@ -99,8 +99,8 @@ sub handler_traceback { s/([\'\\])/\\$1/g; s/([^\0]*)/'$1'/ unless /^(?: -?[\d.]+ | \*[\w:]* )$/x; - s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg; - s/([\0-\37\177])/sprintf("^%c",ord($1)^64)/eg; + require 'meta_notation.pm'; + $_ = _meta_notation($_) if /[[:^print:]]/a; push(@a, $_); } $w = $w ? '@ = ' : '$ = '; -- 1.8.3.1