This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.h: Fold 2 ANYOF flags into a single one
[perl5.git] / lib / meta_notation.pm
CommitLineData
4b6af431
KW
1use strict;
2use warnings;
3
4# A tiny private library routine which is a helper to several Perl core
5# modules, to allow a paradigm to be implemented in a single place. The name,
6# contents, or even the existence of this file may be changed at any time and
7# are NOT to be used by anthing outside the Perl core.
8
9sub _meta_notation ($) {
10
11 # Returns a copy of the input string with the nonprintable characters
12 # below 0x100 changed into printables. Any ASCII printables or above 0xFF
13 # are unchanged. (XXX Probably above-Latin1 characters should be
14 # converted to \X{...})
15 #
16 # \0 .. \x1F (which are "\c@" .. "\c_") are changed into ^@, ^A, ^B, ...
17 # ^Z, ^[, ^\, ^], ^^, ^_
18 # \c? is changed into ^?.
19 #
20 # The above accounts for all the ASCII-range nonprintables.
21 #
22 # On ASCII platforms, the upper-Latin1-range characters are converted to
23 # Meta notation, so that \xC1 becomes 'M-A', \xE2 becomes 'M-b', etc.
24 # This is how it always has worked, so is continued that way for backwards
25 # compatibility. XXX Wrong, but the way it has always worked is that \x80
26 # .. \x9F are converted to M- followed by a literal control char. This
27 # probably has escaped attention due to the limited domains this code has
28 # been applied to. ext/SDBM_File/dbu.c does this right.
29 #
30 # On EBCDIC platforms, the upper-Latin1-range characters are converted
31 # into '\x{...}' Meta notation doesn't make sense on EBCDIC platforms
32 # because the ASCII-range printables are a mixture of upper bit set or
33 # not. [A-Za-Z0-9] all have the upper bit set. The underscore likely
34 # doesn't; and other punctuation may or may not. There's no simple
35 # pattern.
36
37 my $string = shift;
38
39 $string =~ s/([\0-\037])/
40 sprintf("^%c",utf8::unicode_to_native(ord($1)^64))/xeg;
41 $string =~ s/\c?/^?/g;
42 if (ord("A") == 65) {
43 $string =~ s/([\200-\377])/sprintf("M-%c",ord($1)&0177)/eg;
44 }
45 else {
46 no warnings 'experimental::regex_sets';
47 # Leave alone things above \xff
48 $string =~ s/( (?[ [\x00-\xFF] & [:^print:]])) /
49 sprintf("\\x{%X}", ord($1))/xaeg;
50 }
51
52 return $string;
53}
541