[perl5.git] / lib / meta_notation.pm

use strict;
use warnings;

# A tiny private library routine which is a helper to several Perl core
# modules, to allow a paradigm to be implemented in a single place.  The name,
# contents, or even the existence of this file may be changed at any time and
# are NOT to be used by anthing outside the Perl core.

sub _meta_notation ($) {

    # Returns a copy of the input string with the nonprintable characters
    # below 0x100 changed into printables.  Any ASCII printables or above 0xFF
    # are unchanged.  (XXX Probably above-Latin1 characters should be
    # converted to \X{...})
    #
    # \0 .. \x1F (which are "\c@" .. "\c_") are changed into ^@, ^A, ^B, ...
    # ^Z, ^[, ^\, ^], ^^, ^_
    # \c? is changed into ^?.
    #
    # The above accounts for all the ASCII-range nonprintables.
    #
    # On ASCII platforms, the upper-Latin1-range characters are converted to
    # Meta notation, so that \xC1 becomes 'M-A', \xE2 becomes 'M-b', etc.
    # This is how it always has worked, so is continued that way for backwards
    # compatibility.  XXX Wrong, but the way it has always worked is that \x80
    # .. \x9F are converted to M- followed by a literal control char.  This
    # probably has escaped attention due to the limited domains this code has
    # been applied to.  ext/SDBM_File/dbu.c does this right.
    #
    # On EBCDIC platforms, the upper-Latin1-range characters are converted
    # into '\x{...}'  Meta notation doesn't make sense on EBCDIC platforms
    # because the ASCII-range printables are a mixture of upper bit set or
    # not.  [A-Za-Z0-9] all have the upper bit set.  The underscore likely
    # doesn't; and other punctuation may or may not.  There's no simple
    # pattern.

    my $string = shift;

    $string =~ s/([\0-\037])/
               sprintf("^%c",utf8::unicode_to_native(ord($1)^64))/xeg;
    $string =~ s/\c?/^?/g;
    if (ord("A") == 65) {
        $string =~ s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg;
    }
    else {
        no warnings 'experimental::regex_sets';
        # Leave alone things above \xff
        $string =~ s/( (?[ [\x00-\xFF] & [:^print:]])) /
                  sprintf("\\x{%X}", ord($1))/xaeg;
    }

    return $string;
}
1
Commit	Line	Data
4b6af431 KW	1	use strict;
	2	use warnings;
	3
	4	# A tiny private library routine which is a helper to several Perl core
	5	# modules, to allow a paradigm to be implemented in a single place. The name,
	6	# contents, or even the existence of this file may be changed at any time and
	7	# are NOT to be used by anthing outside the Perl core.
	8
	9	sub _meta_notation ($) {
	10
	11	# Returns a copy of the input string with the nonprintable characters
	12	# below 0x100 changed into printables. Any ASCII printables or above 0xFF
	13	# are unchanged. (XXX Probably above-Latin1 characters should be
	14	# converted to \X{...})
	15	#
	16	# \0 .. \x1F (which are "\c@" .. "\c_") are changed into ^@, ^A, ^B, ...
	17	# ^Z, ^[, ^\, ^], ^^, ^_
	18	# \c? is changed into ^?.
	19	#
	20	# The above accounts for all the ASCII-range nonprintables.
	21	#
	22	# On ASCII platforms, the upper-Latin1-range characters are converted to
	23	# Meta notation, so that \xC1 becomes 'M-A', \xE2 becomes 'M-b', etc.
	24	# This is how it always has worked, so is continued that way for backwards
	25	# compatibility. XXX Wrong, but the way it has always worked is that \x80
	26	# .. \x9F are converted to M- followed by a literal control char. This
	27	# probably has escaped attention due to the limited domains this code has
	28	# been applied to. ext/SDBM_File/dbu.c does this right.
	29	#
	30	# On EBCDIC platforms, the upper-Latin1-range characters are converted
	31	# into '\x{...}' Meta notation doesn't make sense on EBCDIC platforms
	32	# because the ASCII-range printables are a mixture of upper bit set or
	33	# not. [A-Za-Z0-9] all have the upper bit set. The underscore likely
	34	# doesn't; and other punctuation may or may not. There's no simple
	35	# pattern.
	36
	37	my $string = shift;
	38
	39	$string =~ s/([\0-\037])/
	40	sprintf("^%c",utf8::unicode_to_native(ord($1)^64))/xeg;
	41	$string =~ s/\c?/^?/g;
	42	if (ord("A") == 65) {
	43	$string =~ s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg;
	44	}
	45	else {
	46	no warnings 'experimental::regex_sets';
	47	# Leave alone things above \xff
	48	$string =~ s/( (?[ [\x00-\xFF] & [:^print:]])) /
	49	sprintf("\\x{%X}", ord($1))/xaeg;
	50	}
	51
	52	return $string;
	53	}
	54	1