This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Updated escaping code. utf8 regex debug output improvements
[perl5.git] / ext / re / re.pm
CommitLineData
b3eb6a9b
GS
1package re;
2
380e0b81 3our $VERSION = 0.06_01;
56953603 4
b3eb6a9b
GS
5=head1 NAME
6
7re - Perl pragma to alter regular expression behaviour
8
9=head1 SYNOPSIS
10
e4d48cc9
GS
11 use re 'taint';
12 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 13
2cd61cdb 14 $pat = '(?{ $foo = 1 })';
e4d48cc9 15 use re 'eval';
2cd61cdb 16 /foo${pat}bar/; # won't fail (when not under -T switch)
e4d48cc9
GS
17
18 {
19 no re 'taint'; # the default
20 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
21
22 no re 'eval'; # the default
2cd61cdb 23 /foo${pat}bar/; # disallowed (with or without -T switch)
e4d48cc9 24 }
b3eb6a9b 25
0a92e3a8
GS
26 use re 'debug'; # NOT lexically scoped (as others are)
27 /^(.*)$/s; # output debugging info during
28 # compile and run time
2cd61cdb 29
02ea72ae
IZ
30 use re 'debugcolor'; # same as 'debug', but with colored output
31 ...
32
a3621e74
YO
33 use re qw(Debug All); # Finer tuned debugging options.
34 use re qw(Debug More); # Similarly not lexically scoped.
35 no re qw(Debug ALL); # Turn of all re dugging and unload the module.
36
3ffabb8c
GS
37(We use $^X in these examples because it's tainted by default.)
38
b3eb6a9b
GS
39=head1 DESCRIPTION
40
41When C<use re 'taint'> is in effect, and a tainted string is the target
42of a regex, the regex memories (or values returned by the m// operator
e4d48cc9
GS
43in list context) are tainted. This feature is useful when regex operations
44on tainted data aren't meant to extract safe substrings, but to perform
45other transformations.
b3eb6a9b 46
e4d48cc9 47When C<use re 'eval'> is in effect, a regex is allowed to contain
2cd61cdb 48C<(?{ ... })> zero-width assertions even if regular expression contains
ffbc6a93 49variable interpolation. That is normally disallowed, since it is a
2cd61cdb
IZ
50potential security risk. Note that this pragma is ignored when the regular
51expression is obtained from tainted data, i.e. evaluation is always
3c4b39be 52disallowed with tainted regular expressions. See L<perlre/(?{ code })>.
2cd61cdb 53
ffbc6a93 54For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8
GS
55expressions (i.e., the result of C<qr//>) is I<not> considered variable
56interpolation. Thus:
2cd61cdb
IZ
57
58 /foo${pat}bar/
59
ffbc6a93 60I<is> allowed if $pat is a precompiled regular expression, even
2cd61cdb
IZ
61if $pat contains C<(?{ ... })> assertions.
62
ffbc6a93 63When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb
IZ
64compiling and using regular expressions. The output is the same as that
65obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
66B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae
IZ
67of the match. Using C<debugcolor> instead of C<debug> enables a
68form of output that can be used to get a colorful display on terminals
69that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
70comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 71strings on/off, pre-point part on/off.
2cd61cdb
IZ
72See L<perldebug/"Debugging regular expressions"> for additional info.
73
a3621e74
YO
74Similarly C<use re 'Debug'> produces debugging output, the difference
75being that it allows the fine tuning of what debugging output will be
be8e71aa
YO
76emitted. Options are divided into three groups, those related to
77compilation, those related to execution and those related to special
78purposes. The options are as follows:
79
80=over 4
81
82=item Compile related options
83
84=over 4
85
86=item COMPILE
87
88Turns on all compile related debug options.
89
90=item PARSE
91
92Turns on debug output related to the process of parsing the pattern.
93
94=item OPTIMISE
95
96Enables output related to the optimisation phase of compilation.
97
98=item TRIE_COMPILE
99
100Detailed info about trie compilation.
101
102=item DUMP
103
104Dump the final program out after it is compiled and optimised.
105
106=item OFFSETS
107
108Dump offset information. This can be used to see how regops correlate
109to the pattern. Output format is
110
111 NODENUM:POSITION[LENGTH]
112
113Where 1 is the position of the first char in the string. Note that position
114can be 0, or larger than the actual length of the pattern, likewise length
115can be zero.
116
117=back
118
119=item Execute related options
120
121=over 4
122
123=item EXECUTE
124
125Turns on all execute related debug options.
126
127=item MATCH
128
129Turns on debugging of the main matching loop.
130
131=item TRIE_EXECUTE
132
133Extra debugging of how tries execute.
134
135=item INTUIT
136
137Enable debugging of start point optimisations.
138
139=back
140
141=item Extra debugging options
142
143=over 4
144
145=item EXTRA
146
147Turns on all "extra" debugging options.
148
149=item TRIE_MORE
150
151Enable enhanced TRIE debugging. Enhances both TRIE_EXECUTE
152and TRIE_COMPILE.
153
154=item OFFSETS_DEBUG
155
156Enable debugging of offsets information. This emits copious
157amounts of trace information and doesnt mesh well with other
158debug options.
159
160Almost definately only useful to people hacking
161on the offsets part of the debug engine.
162
163=back
164
165=item Other useful flags
166
167These are useful shortcuts to save on the typing.
168
169=over 4
170
171=item ALL
172
173Enable all compile and execute options at once.
174
175=item All
176
177Enable DUMP and all execute options. Equivelent to:
178
179 use re 'debug';
180
181=item MORE
182
183=item More
184
185Enable TRIE_MORE and all execute compile and execute options.
186
187=back 4
188
189=back 4
a3621e74
YO
190
191The directive C<use re 'debug'> and its equivalents are I<not> lexically
192scoped, as the other directives are. They have both compile-time and run-time
193effects.
b3eb6a9b
GS
194
195See L<perlmodlib/Pragmatic Modules>.
196
197=cut
198
918c0b2d
CB
199# N.B. File::Basename contains a literal for 'taint' as a fallback. If
200# taint is changed here, File::Basename must be updated as well.
b3eb6a9b 201my %bitmask = (
9cfe5470
RGS
202taint => 0x00100000, # HINT_RE_TAINT
203eval => 0x00200000, # HINT_RE_EVAL
b3eb6a9b
GS
204);
205
02ea72ae
IZ
206sub setcolor {
207 eval { # Ignore errors
208 require Term::Cap;
209
210 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
8d300b32 211 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
02ea72ae 212 my @props = split /,/, $props;
c712d376 213 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
02ea72ae 214
c712d376
HM
215 $colors =~ s/\0//g;
216 $ENV{PERL_RE_COLORS} = $colors;
02ea72ae 217 };
02ea72ae
IZ
218}
219
a3621e74 220my %flags = (
be8e71aa
YO
221 COMPILE => 0x0000FF,
222 PARSE => 0x000001,
223 OPTIMISE => 0x000002,
224 TRIE_COMPILE => 0x000004,
225 DUMP => 0x000008,
226 OFFSETS => 0x000010,
227
228 EXECUTE => 0x00FF00,
229 INTUIT => 0x000100,
230 MATCH => 0x000200,
231 TRIE_EXECUTE => 0x000400,
232
233 EXTRA => 0xFF0000,
234 TRIE_MORE => 0x010000,
235 OFFSETS_DEBUG => 0x020000,
ab3bbdeb 236 STATE => 0x040000,
a3621e74 237);
be8e71aa
YO
238$flags{ALL} = $flags{COMPILE} | $flags{EXECUTE};
239$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
240$flags{More} = $flags{MORE} = $flags{ALL} | $flags{TRIE_MORE};
a3621e74
YO
241
242my $installed = 0;
243
380e0b81
NC
244sub _load_unload {
245 my $on = shift;
246 require XSLoader;
247 XSLoader::load('re');
1839d50f 248 install($on);
380e0b81
NC
249}
250
b3eb6a9b 251sub bits {
56953603 252 my $on = shift;
b3eb6a9b 253 my $bits = 0;
2570cdf1 254 unless (@_) {
b3eb6a9b
GS
255 require Carp;
256 Carp::carp("Useless use of \"re\" pragma");
257 }
a3621e74
YO
258 foreach my $idx (0..$#_){
259 my $s=$_[$idx];
260 if ($s eq 'Debug' or $s eq 'Debugcolor') {
261 setcolor() if $s eq 'Debugcolor';
262 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
a3621e74
YO
263 for my $idx ($idx+1..$#_) {
264 if ($flags{$_[$idx]}) {
265 if ($on) {
266 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
267 } else {
268 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
269 }
270 } else {
271 require Carp;
272 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
273 join(", ",sort { $flags{$a} <=> $flags{$b} } keys %flags ) );
274 }
275 }
380e0b81 276 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
a3621e74
YO
277 last;
278 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
279 setcolor() if $s eq 'debugcolor';
380e0b81 280 _load_unload($on);
a3621e74
YO
281 } elsif (exists $bitmask{$s}) {
282 $bits |= $bitmask{$s};
283 } else {
284 require Carp;
285 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
286 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
287 ")");
288 }
56953603 289 }
b3eb6a9b
GS
290 $bits;
291}
292
293sub import {
294 shift;
2570cdf1 295 $^H |= bits(1, @_);
b3eb6a9b
GS
296}
297
298sub unimport {
299 shift;
2570cdf1 300 $^H &= ~ bits(0, @_);
b3eb6a9b
GS
301}
302
3031;