This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
enc2xs and C++: add extern "C" to data
[perl5.git] / ext / re / re.pm
CommitLineData
b3eb6a9b
GS
1package re;
2
a5ca303d 3our $VERSION = 0.06_02;
56953603 4
b3eb6a9b
GS
5=head1 NAME
6
7re - Perl pragma to alter regular expression behaviour
8
9=head1 SYNOPSIS
10
e4d48cc9
GS
11 use re 'taint';
12 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 13
2cd61cdb 14 $pat = '(?{ $foo = 1 })';
e4d48cc9 15 use re 'eval';
2cd61cdb 16 /foo${pat}bar/; # won't fail (when not under -T switch)
e4d48cc9
GS
17
18 {
19 no re 'taint'; # the default
20 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
21
22 no re 'eval'; # the default
2cd61cdb 23 /foo${pat}bar/; # disallowed (with or without -T switch)
e4d48cc9 24 }
b3eb6a9b 25
0a92e3a8
GS
26 use re 'debug'; # NOT lexically scoped (as others are)
27 /^(.*)$/s; # output debugging info during
28 # compile and run time
2cd61cdb 29
02ea72ae
IZ
30 use re 'debugcolor'; # same as 'debug', but with colored output
31 ...
32
a3621e74
YO
33 use re qw(Debug All); # Finer tuned debugging options.
34 use re qw(Debug More); # Similarly not lexically scoped.
35 no re qw(Debug ALL); # Turn of all re dugging and unload the module.
36
3ffabb8c
GS
37(We use $^X in these examples because it's tainted by default.)
38
b3eb6a9b
GS
39=head1 DESCRIPTION
40
41When C<use re 'taint'> is in effect, and a tainted string is the target
42of a regex, the regex memories (or values returned by the m// operator
e4d48cc9
GS
43in list context) are tainted. This feature is useful when regex operations
44on tainted data aren't meant to extract safe substrings, but to perform
45other transformations.
b3eb6a9b 46
e4d48cc9 47When C<use re 'eval'> is in effect, a regex is allowed to contain
2cd61cdb 48C<(?{ ... })> zero-width assertions even if regular expression contains
ffbc6a93 49variable interpolation. That is normally disallowed, since it is a
2cd61cdb
IZ
50potential security risk. Note that this pragma is ignored when the regular
51expression is obtained from tainted data, i.e. evaluation is always
3c4b39be 52disallowed with tainted regular expressions. See L<perlre/(?{ code })>.
2cd61cdb 53
ffbc6a93 54For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8
GS
55expressions (i.e., the result of C<qr//>) is I<not> considered variable
56interpolation. Thus:
2cd61cdb
IZ
57
58 /foo${pat}bar/
59
ffbc6a93 60I<is> allowed if $pat is a precompiled regular expression, even
2cd61cdb
IZ
61if $pat contains C<(?{ ... })> assertions.
62
ffbc6a93 63When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb
IZ
64compiling and using regular expressions. The output is the same as that
65obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
66B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae
IZ
67of the match. Using C<debugcolor> instead of C<debug> enables a
68form of output that can be used to get a colorful display on terminals
69that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
70comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 71strings on/off, pre-point part on/off.
2cd61cdb
IZ
72See L<perldebug/"Debugging regular expressions"> for additional info.
73
a3621e74
YO
74Similarly C<use re 'Debug'> produces debugging output, the difference
75being that it allows the fine tuning of what debugging output will be
be8e71aa
YO
76emitted. Options are divided into three groups, those related to
77compilation, those related to execution and those related to special
78purposes. The options are as follows:
79
80=over 4
81
82=item Compile related options
83
84=over 4
85
86=item COMPILE
87
88Turns on all compile related debug options.
89
90=item PARSE
91
92Turns on debug output related to the process of parsing the pattern.
93
94=item OPTIMISE
95
96Enables output related to the optimisation phase of compilation.
97
98=item TRIE_COMPILE
99
100Detailed info about trie compilation.
101
102=item DUMP
103
104Dump the final program out after it is compiled and optimised.
105
106=item OFFSETS
107
108Dump offset information. This can be used to see how regops correlate
109to the pattern. Output format is
110
111 NODENUM:POSITION[LENGTH]
112
113Where 1 is the position of the first char in the string. Note that position
114can be 0, or larger than the actual length of the pattern, likewise length
115can be zero.
116
117=back
118
119=item Execute related options
120
121=over 4
122
123=item EXECUTE
124
125Turns on all execute related debug options.
126
127=item MATCH
128
129Turns on debugging of the main matching loop.
130
131=item TRIE_EXECUTE
132
133Extra debugging of how tries execute.
134
135=item INTUIT
136
137Enable debugging of start point optimisations.
138
139=back
140
141=item Extra debugging options
142
143=over 4
144
145=item EXTRA
146
147Turns on all "extra" debugging options.
148
149=item TRIE_MORE
150
151Enable enhanced TRIE debugging. Enhances both TRIE_EXECUTE
152and TRIE_COMPILE.
153
154=item OFFSETS_DEBUG
155
156Enable debugging of offsets information. This emits copious
157amounts of trace information and doesnt mesh well with other
158debug options.
159
160Almost definately only useful to people hacking
161on the offsets part of the debug engine.
162
163=back
164
165=item Other useful flags
166
167These are useful shortcuts to save on the typing.
168
169=over 4
170
171=item ALL
172
173Enable all compile and execute options at once.
174
175=item All
176
177Enable DUMP and all execute options. Equivelent to:
178
179 use re 'debug';
180
181=item MORE
182
183=item More
184
185Enable TRIE_MORE and all execute compile and execute options.
186
187=back 4
188
189=back 4
a3621e74
YO
190
191The directive C<use re 'debug'> and its equivalents are I<not> lexically
192scoped, as the other directives are. They have both compile-time and run-time
193effects.
b3eb6a9b
GS
194
195See L<perlmodlib/Pragmatic Modules>.
196
197=cut
198
918c0b2d
CB
199# N.B. File::Basename contains a literal for 'taint' as a fallback. If
200# taint is changed here, File::Basename must be updated as well.
b3eb6a9b 201my %bitmask = (
9cfe5470
RGS
202taint => 0x00100000, # HINT_RE_TAINT
203eval => 0x00200000, # HINT_RE_EVAL
b3eb6a9b
GS
204);
205
02ea72ae
IZ
206sub setcolor {
207 eval { # Ignore errors
208 require Term::Cap;
209
210 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
8d300b32 211 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
02ea72ae 212 my @props = split /,/, $props;
c712d376 213 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
02ea72ae 214
c712d376
HM
215 $colors =~ s/\0//g;
216 $ENV{PERL_RE_COLORS} = $colors;
02ea72ae 217 };
f9f4320a
YO
218 if ($@) {
219 $ENV{PERL_RE_COLORS}||=qq'\t\t> <\t> <\t\t'
220 }
221
02ea72ae
IZ
222}
223
a3621e74 224my %flags = (
be8e71aa
YO
225 COMPILE => 0x0000FF,
226 PARSE => 0x000001,
227 OPTIMISE => 0x000002,
a5ca303d 228 TRIEC => 0x000004,
be8e71aa 229 DUMP => 0x000008,
be8e71aa
YO
230
231 EXECUTE => 0x00FF00,
232 INTUIT => 0x000100,
233 MATCH => 0x000200,
a5ca303d 234 TRIEE => 0x000400,
be8e71aa
YO
235
236 EXTRA => 0xFF0000,
a5ca303d
YO
237 TRIEM => 0x010000,
238 OFFSETS => 0x020000,
239 OFFSETSDBG => 0x040000,
240 STATE => 0x080000,
241 OPTIMISEM => 0x100000,
a3621e74 242);
786e8c11 243$flags{ALL} = -1;
be8e71aa 244$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
a5ca303d 245$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
ddc5bc0f 246$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
a5ca303d 247$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
a3621e74 248
f9f4320a 249my $installed =eval {
380e0b81
NC
250 require XSLoader;
251 XSLoader::load('re');
f9f4320a
YO
252 install();
253};
254
255sub _load_unload {
256 my ($on)= @_;
257 if ($on) {
258 die "'re' not installed!?" unless $installed;
259 #warn "installed: $installed\n";
260 install(); # allow for changes in colors
261 $^H{regcomp}= $installed;
262 } else {
263 delete $^H{regcomp};
264 }
380e0b81
NC
265}
266
b3eb6a9b 267sub bits {
56953603 268 my $on = shift;
b3eb6a9b 269 my $bits = 0;
2570cdf1 270 unless (@_) {
f9f4320a 271 return;
b3eb6a9b 272 }
a3621e74
YO
273 foreach my $idx (0..$#_){
274 my $s=$_[$idx];
275 if ($s eq 'Debug' or $s eq 'Debugcolor') {
f9f4320a 276 setcolor() if $s =~/color/i;
a3621e74 277 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
a3621e74
YO
278 for my $idx ($idx+1..$#_) {
279 if ($flags{$_[$idx]}) {
280 if ($on) {
281 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
282 } else {
283 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
284 }
285 } else {
286 require Carp;
287 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
288 join(", ",sort { $flags{$a} <=> $flags{$b} } keys %flags ) );
289 }
290 }
380e0b81 291 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
a3621e74
YO
292 last;
293 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
f9f4320a 294 setcolor() if $s =~/color/i;
380e0b81 295 _load_unload($on);
a3621e74
YO
296 } elsif (exists $bitmask{$s}) {
297 $bits |= $bitmask{$s};
298 } else {
299 require Carp;
300 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
301 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
302 ")");
303 }
56953603 304 }
b3eb6a9b
GS
305 $bits;
306}
307
308sub import {
309 shift;
2570cdf1 310 $^H |= bits(1, @_);
b3eb6a9b
GS
311}
312
313sub unimport {
314 shift;
2570cdf1 315 $^H &= ~ bits(0, @_);
b3eb6a9b
GS
316}
317
3181;