Commit | Line | Data |
---|---|---|
b3eb6a9b GS |
1 | package re; |
2 | ||
99cc5cc6 | 3 | # pragma for controlling the regexp engine |
de8c5301 YO |
4 | use strict; |
5 | use warnings; | |
6 | ||
1eac213a | 7 | our $VERSION = "0.32"; |
de8c5301 | 8 | our @ISA = qw(Exporter); |
ec781434 | 9 | our @EXPORT_OK = ('regmust', |
192c1e27 JH |
10 | qw(is_regexp regexp_pattern |
11 | regname regnames regnames_count)); | |
de8c5301 YO |
12 | our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; |
13 | ||
de8c5301 YO |
14 | my %bitmask = ( |
15 | taint => 0x00100000, # HINT_RE_TAINT | |
16 | eval => 0x00200000, # HINT_RE_EVAL | |
17 | ); | |
18 | ||
1e215989 FC |
19 | my $flags_hint = 0x02000000; # HINT_RE_FLAGS |
20 | my $PMMOD_SHIFT = 0; | |
21 | my %reflags = ( | |
22 | m => 1 << ($PMMOD_SHIFT + 0), | |
23 | s => 1 << ($PMMOD_SHIFT + 1), | |
24 | i => 1 << ($PMMOD_SHIFT + 2), | |
25 | x => 1 << ($PMMOD_SHIFT + 3), | |
41d7c59e MH |
26 | n => 1 << ($PMMOD_SHIFT + 5), |
27 | p => 1 << ($PMMOD_SHIFT + 6), | |
67cdf558 | 28 | strict => 1 << ($PMMOD_SHIFT + 10), |
1e215989 | 29 | # special cases: |
1e215989 | 30 | d => 0, |
a62b1201 KW |
31 | l => 1, |
32 | u => 2, | |
cfaf538b | 33 | a => 3, |
b4ab316d | 34 | aa => 4, |
1e215989 FC |
35 | ); |
36 | ||
de8c5301 YO |
37 | sub setcolor { |
38 | eval { # Ignore errors | |
39 | require Term::Cap; | |
40 | ||
41 | my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. | |
42 | my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; | |
43 | my @props = split /,/, $props; | |
44 | my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; | |
45 | ||
46 | $colors =~ s/\0//g; | |
47 | $ENV{PERL_RE_COLORS} = $colors; | |
48 | }; | |
49 | if ($@) { | |
50 | $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t'; | |
51 | } | |
52 | ||
53 | } | |
54 | ||
55 | my %flags = ( | |
56 | COMPILE => 0x0000FF, | |
57 | PARSE => 0x000001, | |
58 | OPTIMISE => 0x000002, | |
59 | TRIEC => 0x000004, | |
60 | DUMP => 0x000008, | |
f7819f85 | 61 | FLAGS => 0x000010, |
d9a72fcc | 62 | TEST => 0x000020, |
de8c5301 YO |
63 | |
64 | EXECUTE => 0x00FF00, | |
65 | INTUIT => 0x000100, | |
66 | MATCH => 0x000200, | |
67 | TRIEE => 0x000400, | |
68 | ||
69 | EXTRA => 0xFF0000, | |
70 | TRIEM => 0x010000, | |
71 | OFFSETS => 0x020000, | |
72 | OFFSETSDBG => 0x040000, | |
73 | STATE => 0x080000, | |
74 | OPTIMISEM => 0x100000, | |
75 | STACK => 0x280000, | |
e7707071 | 76 | BUFFERS => 0x400000, |
2c296965 | 77 | GPOS => 0x800000, |
de8c5301 | 78 | ); |
e7707071 | 79 | $flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS}); |
de8c5301 | 80 | $flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; |
2c296965 | 81 | $flags{Extra} = $flags{EXECUTE} | $flags{COMPILE} | $flags{GPOS}; |
de8c5301 YO |
82 | $flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; |
83 | $flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE}; | |
84 | $flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC}; | |
85 | ||
ec781434 NC |
86 | if (defined &DynaLoader::boot_DynaLoader) { |
87 | require XSLoader; | |
da4061d3 | 88 | XSLoader::load(); |
de8c5301 | 89 | } |
ec781434 NC |
90 | # else we're miniperl |
91 | # We need to work for miniperl, because the XS toolchain uses Text::Wrap, which | |
92 | # uses re 'taint'. | |
de8c5301 YO |
93 | |
94 | sub _load_unload { | |
95 | my ($on)= @_; | |
96 | if ($on) { | |
ec781434 NC |
97 | # We call install() every time, as if we didn't, we wouldn't |
98 | # "see" any changes to the color environment var since | |
99 | # the last time it was called. | |
100 | ||
101 | # install() returns an integer, which if casted properly | |
99cc5cc6 | 102 | # in C resolves to a structure containing the regexp |
ec781434 NC |
103 | # hooks. Setting it to a random integer will guarantee |
104 | # segfaults. | |
105 | $^H{regcomp} = install(); | |
de8c5301 YO |
106 | } else { |
107 | delete $^H{regcomp}; | |
108 | } | |
109 | } | |
110 | ||
111 | sub bits { | |
112 | my $on = shift; | |
113 | my $bits = 0; | |
3ab1d973 | 114 | my $turning_all_off = ! @_ && ! $on; |
cc4d09e1 | 115 | my %seen; # Has flag already been seen? |
3ab1d973 KW |
116 | if ($turning_all_off) { |
117 | ||
118 | # Pretend were called with certain parameters, which are best dealt | |
c9a74c77 | 119 | # with that way. |
3ab1d973 KW |
120 | push @_, keys %bitmask; # taint and eval |
121 | push @_, 'strict'; | |
122 | } | |
123 | ||
124 | # Process each subpragma parameter | |
1e215989 | 125 | ARG: |
de8c5301 YO |
126 | foreach my $idx (0..$#_){ |
127 | my $s=$_[$idx]; | |
128 | if ($s eq 'Debug' or $s eq 'Debugcolor') { | |
129 | setcolor() if $s =~/color/i; | |
130 | ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS}; | |
131 | for my $idx ($idx+1..$#_) { | |
132 | if ($flags{$_[$idx]}) { | |
133 | if ($on) { | |
134 | ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; | |
135 | } else { | |
136 | ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; | |
137 | } | |
138 | } else { | |
139 | require Carp; | |
140 | Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", | |
141 | join(", ",sort keys %flags ) ); | |
142 | } | |
143 | } | |
144 | _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); | |
145 | last; | |
146 | } elsif ($s eq 'debug' or $s eq 'debugcolor') { | |
147 | setcolor() if $s =~/color/i; | |
148 | _load_unload($on); | |
66e6b4c5 | 149 | last; |
de8c5301 YO |
150 | } elsif (exists $bitmask{$s}) { |
151 | $bits |= $bitmask{$s}; | |
152 | } elsif ($EXPORT_OK{$s}) { | |
de8c5301 YO |
153 | require Exporter; |
154 | re->export_to_level(2, 're', $s); | |
67cdf558 KW |
155 | } elsif ($s eq 'strict') { |
156 | if ($on) { | |
157 | $^H{reflags} |= $reflags{$s}; | |
158 | warnings::warnif('experimental::re_strict', | |
159 | "\"use re 'strict'\" is experimental"); | |
160 | ||
161 | # Turn on warnings if not already done. | |
162 | if (! warnings::enabled('regexp')) { | |
163 | require warnings; | |
164 | warnings->import('regexp'); | |
165 | $^H{re_strict} = 1; | |
166 | } | |
167 | } | |
168 | else { | |
3ab1d973 | 169 | $^H{reflags} &= ~$reflags{$s} if $^H{reflags}; |
67cdf558 KW |
170 | |
171 | # Turn off warnings if we turned them on. | |
172 | warnings->unimport('regexp') if $^H{re_strict}; | |
173 | } | |
174 | if ($^H{reflags}) { | |
175 | $^H |= $flags_hint; | |
176 | } | |
177 | else { | |
178 | $^H &= ~$flags_hint; | |
179 | } | |
1e215989 FC |
180 | } elsif ($s =~ s/^\///) { |
181 | my $reflags = $^H{reflags} || 0; | |
6320bfaf | 182 | my $seen_charset; |
342c8524 | 183 | while ($s =~ m/( . )/gx) { |
48895a0d | 184 | local $_ = $1; |
cfaf538b | 185 | if (/[adul]/) { |
342c8524 KW |
186 | # The 'a' may be repeated; hide this from the rest of the |
187 | # code by counting and getting rid of all of them, then | |
188 | # changing to 'aa' if there is a repeat. | |
189 | if ($_ eq 'a') { | |
190 | my $sav_pos = pos $s; | |
191 | my $a_count = $s =~ s/a//g; | |
192 | pos $s = $sav_pos - 1; # -1 because got rid of the 'a' | |
193 | if ($a_count > 2) { | |
194 | require Carp; | |
195 | Carp::carp( | |
196 | qq 'The "a" flag may only appear a maximum of twice' | |
197 | ); | |
198 | } | |
199 | elsif ($a_count == 2) { | |
200 | $_ = 'aa'; | |
201 | } | |
202 | } | |
1e215989 | 203 | if ($on) { |
45a507fa | 204 | if ($seen_charset) { |
96ef02be | 205 | require Carp; |
45a507fa KW |
206 | if ($seen_charset ne $_) { |
207 | Carp::carp( | |
208 | qq 'The "$seen_charset" and "$_" flags ' | |
209 | .qq 'are exclusive' | |
210 | ); | |
211 | } | |
212 | else { | |
213 | Carp::carp( | |
214 | qq 'The "$seen_charset" flag may not appear ' | |
215 | .qq 'twice' | |
216 | ); | |
217 | } | |
96ef02be | 218 | } |
6320bfaf KW |
219 | $^H{reflags_charset} = $reflags{$_}; |
220 | $seen_charset = $_; | |
1e215989 FC |
221 | } |
222 | else { | |
6320bfaf | 223 | delete $^H{reflags_charset} |
b10bad5a KW |
224 | if defined $^H{reflags_charset} |
225 | && $^H{reflags_charset} == $reflags{$_}; | |
1e215989 FC |
226 | } |
227 | } elsif (exists $reflags{$_}) { | |
cc4d09e1 KW |
228 | $seen{$_}++; |
229 | $on | |
1e215989 FC |
230 | ? $reflags |= $reflags{$_} |
231 | : ($reflags &= ~$reflags{$_}); | |
232 | } else { | |
233 | require Carp; | |
234 | Carp::carp( | |
235 | qq'Unknown regular expression flag "$_"' | |
236 | ); | |
237 | next ARG; | |
238 | } | |
239 | } | |
6320bfaf | 240 | ($^H{reflags} = $reflags or defined $^H{reflags_charset}) |
b10bad5a KW |
241 | ? $^H |= $flags_hint |
242 | : ($^H &= ~$flags_hint); | |
de8c5301 YO |
243 | } else { |
244 | require Carp; | |
245 | Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", | |
246 | join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), | |
247 | ")"); | |
248 | } | |
249 | } | |
cc4d09e1 KW |
250 | if (exists $seen{'x'} && $seen{'x'} > 1 |
251 | && (warnings::enabled("deprecated") | |
252 | || warnings::enabled("regexp"))) | |
253 | { | |
254 | my $message = "Having more than one /x regexp modifier is deprecated"; | |
255 | if (warnings::enabled("deprecated")) { | |
256 | warnings::warn("deprecated", $message); | |
257 | } | |
258 | else { | |
259 | warnings::warn("regexp", $message); | |
260 | } | |
261 | } | |
3ab1d973 KW |
262 | |
263 | if ($turning_all_off) { | |
264 | _load_unload(0); | |
265 | $^H{reflags} = 0; | |
266 | $^H{reflags_charset} = 0; | |
267 | $^H &= ~$flags_hint; | |
268 | } | |
269 | ||
de8c5301 YO |
270 | $bits; |
271 | } | |
272 | ||
273 | sub import { | |
274 | shift; | |
275 | $^H |= bits(1, @_); | |
276 | } | |
277 | ||
278 | sub unimport { | |
279 | shift; | |
280 | $^H &= ~ bits(0, @_); | |
281 | } | |
282 | ||
283 | 1; | |
284 | ||
285 | __END__ | |
56953603 | 286 | |
b3eb6a9b GS |
287 | =head1 NAME |
288 | ||
289 | re - Perl pragma to alter regular expression behaviour | |
290 | ||
291 | =head1 SYNOPSIS | |
292 | ||
e4d48cc9 GS |
293 | use re 'taint'; |
294 | ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here | |
b3eb6a9b | 295 | |
2cd61cdb | 296 | $pat = '(?{ $foo = 1 })'; |
e4d48cc9 | 297 | use re 'eval'; |
48fe68f5 KW |
298 | /foo${pat}bar/; # won't fail (when not under -T |
299 | # switch) | |
e4d48cc9 GS |
300 | |
301 | { | |
302 | no re 'taint'; # the default | |
303 | ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here | |
304 | ||
305 | no re 'eval'; # the default | |
48fe68f5 KW |
306 | /foo${pat}bar/; # disallowed (with or without -T |
307 | # switch) | |
e4d48cc9 | 308 | } |
b3eb6a9b | 309 | |
67cdf558 KW |
310 | use re 'strict'; # Raise warnings for more conditions |
311 | ||
1e215989 FC |
312 | use re '/ix'; |
313 | "FOO" =~ / foo /; # /ix implied | |
314 | no re '/x'; | |
315 | "FOO" =~ /foo/; # just /i implied | |
316 | ||
1e2e3d02 | 317 | use re 'debug'; # output debugging info during |
48fe68f5 | 318 | /^(.*)$/s; # compile and run time |
1e2e3d02 | 319 | |
2cd61cdb | 320 | |
48fe68f5 KW |
321 | use re 'debugcolor'; # same as 'debug', but with colored |
322 | # output | |
02ea72ae IZ |
323 | ... |
324 | ||
48fe68f5 KW |
325 | use re qw(Debug All); # Same as "use re 'debug'", but you |
326 | # can use "Debug" with things other | |
327 | # than 'All' | |
328 | use re qw(Debug More); # 'All' plus output more details | |
329 | no re qw(Debug ALL); # Turn on (almost) all re debugging | |
330 | # in this scope | |
4ee9a43f | 331 | |
de8c5301 YO |
332 | use re qw(is_regexp regexp_pattern); # import utility functions |
333 | my ($pat,$mods)=regexp_pattern(qr/foo/i); | |
334 | if (is_regexp($obj)) { | |
335 | print "Got regexp: ", | |
48fe68f5 KW |
336 | scalar regexp_pattern($obj); # just as perl would stringify |
337 | } # it but no hassle with blessed | |
338 | # re's. | |
a3621e74 | 339 | |
3ffabb8c GS |
340 | (We use $^X in these examples because it's tainted by default.) |
341 | ||
b3eb6a9b GS |
342 | =head1 DESCRIPTION |
343 | ||
de8c5301 YO |
344 | =head2 'taint' mode |
345 | ||
b3eb6a9b | 346 | When C<use re 'taint'> is in effect, and a tainted string is the target |
99cc5cc6 A |
347 | of a regexp, the regexp memories (or values returned by the m// operator |
348 | in list context) are tainted. This feature is useful when regexp operations | |
e4d48cc9 GS |
349 | on tainted data aren't meant to extract safe substrings, but to perform |
350 | other transformations. | |
b3eb6a9b | 351 | |
de8c5301 YO |
352 | =head2 'eval' mode |
353 | ||
99cc5cc6 | 354 | When C<use re 'eval'> is in effect, a regexp is allowed to contain |
0b370c0a | 355 | C<(?{ ... })> zero-width assertions and C<(??{ ... })> postponed |
e128ab2c DM |
356 | subexpressions that are derived from variable interpolation, rather than |
357 | appearing literally within the regexp. That is normally disallowed, since | |
358 | it is a | |
2cd61cdb IZ |
359 | potential security risk. Note that this pragma is ignored when the regular |
360 | expression is obtained from tainted data, i.e. evaluation is always | |
0b370c0a | 361 | disallowed with tainted regular expressions. See L<perlre/(?{ code })> |
bb1773de | 362 | and L<perlre/(??{ code })>. |
2cd61cdb | 363 | |
ffbc6a93 | 364 | For the purpose of this pragma, interpolation of precompiled regular |
0a92e3a8 GS |
365 | expressions (i.e., the result of C<qr//>) is I<not> considered variable |
366 | interpolation. Thus: | |
2cd61cdb IZ |
367 | |
368 | /foo${pat}bar/ | |
369 | ||
ffbc6a93 | 370 | I<is> allowed if $pat is a precompiled regular expression, even |
0b370c0a | 371 | if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions. |
2cd61cdb | 372 | |
67cdf558 KW |
373 | =head2 'strict' mode |
374 | ||
1eac213a KW |
375 | Note that this is an experimental feature which may be changed or removed in a |
376 | future Perl release. | |
377 | ||
67cdf558 KW |
378 | When C<use re 'strict'> is in effect, stricter checks are applied than |
379 | otherwise when compiling regular expressions patterns. These may cause more | |
380 | warnings to be raised than otherwise, and more things to be fatal instead of | |
381 | just warnings. The purpose of this is to find and report at compile time some | |
382 | things, which may be legal, but have a reasonable possibility of not being the | |
383 | programmer's actual intent. This automatically turns on the C<"regexp"> | |
384 | warnings category (if not already on) within its scope. | |
385 | ||
1eac213a KW |
386 | As an example of something that is caught under C<"strict'>, but not |
387 | otherwise, is the pattern | |
67cdf558 KW |
388 | |
389 | qr/\xABC/ | |
390 | ||
391 | The C<"\x"> construct without curly braces should be followed by exactly two | |
392 | hex digits; this one is followed by three. This currently evaluates as | |
393 | equivalent to | |
394 | ||
395 | qr/\x{AB}C/ | |
396 | ||
397 | that is, the character whose code point value is C<0xAB>, followed by the | |
398 | letter C<C>. But since C<C> is a a hex digit, there is a reasonable chance | |
399 | that the intent was | |
400 | ||
401 | qr/\x{ABC}/ | |
402 | ||
403 | that is the single character at C<0xABC>. Under C<'strict'> it is an error to | |
404 | not follow C<\x> with exactly two hex digits. When not under C<'strict'> a | |
405 | warning is generated if there is only one hex digit, and no warning is raised | |
406 | if there are more than two. | |
407 | ||
408 | It is expected that what exactly C<'strict'> does will evolve over time as we | |
409 | gain experience with it. This means that programs that compile under it in | |
410 | today's Perl may not compile, or may have more or fewer warnings, in future | |
1eac213a KW |
411 | Perls. There is no backwards compatibility promises with regards to it. Also |
412 | there are already proposals for an alternate syntax for enabling it. For | |
413 | these reasons, using it will raise a C<experimental::re_strict> class warning, | |
67cdf558 KW |
414 | unless that category is turned off. |
415 | ||
416 | Note that if a pattern compiled within C<'strict'> is recompiled, say by | |
417 | interpolating into another pattern, outside of C<'strict'>, it is not checked | |
418 | again for strictness. This is because if it works under strict it must work | |
419 | under non-strict. | |
420 | ||
1e215989 FC |
421 | =head2 '/flags' mode |
422 | ||
423 | When C<use re '/flags'> is specified, the given flags are automatically | |
424 | added to every regular expression till the end of the lexical scope. | |
425 | ||
426 | C<no re '/flags'> will turn off the effect of C<use re '/flags'> for the | |
427 | given flags. | |
428 | ||
429 | For example, if you want all your regular expressions to have /msx on by | |
430 | default, simply put | |
431 | ||
432 | use re '/msx'; | |
433 | ||
434 | at the top of your code. | |
435 | ||
cfaf538b | 436 | The character set /adul flags cancel each other out. So, in this example, |
1e215989 FC |
437 | |
438 | use re "/u"; | |
439 | "ss" =~ /\xdf/; | |
440 | use re "/d"; | |
441 | "ss" =~ /\xdf/; | |
442 | ||
4d220a7d | 443 | the second C<use re> does an implicit C<no re '/u'>. |
1e215989 | 444 | |
59640339 | 445 | Turning on one of the character set flags with C<use re> takes precedence over the |
1e215989 FC |
446 | C<locale> pragma and the 'unicode_strings' C<feature>, for regular |
447 | expressions. Turning off one of these flags when it is active reverts to | |
448 | the behaviour specified by whatever other pragmata are in scope. For | |
449 | example: | |
450 | ||
451 | use feature "unicode_strings"; | |
452 | no re "/u"; # does nothing | |
453 | use re "/l"; | |
454 | no re "/l"; # reverts to unicode_strings behaviour | |
455 | ||
de8c5301 YO |
456 | =head2 'debug' mode |
457 | ||
ffbc6a93 | 458 | When C<use re 'debug'> is in effect, perl emits debugging messages when |
2cd61cdb IZ |
459 | compiling and using regular expressions. The output is the same as that |
460 | obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the | |
461 | B<-Dr> switch. It may be quite voluminous depending on the complexity | |
02ea72ae IZ |
462 | of the match. Using C<debugcolor> instead of C<debug> enables a |
463 | form of output that can be used to get a colorful display on terminals | |
464 | that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a | |
465 | comma-separated list of C<termcap> properties to use for highlighting | |
ffbc6a93 | 466 | strings on/off, pre-point part on/off. |
57e8c15d | 467 | See L<perldebug/"Debugging Regular Expressions"> for additional info. |
2cd61cdb | 468 | |
de8c5301 YO |
469 | As of 5.9.5 the directive C<use re 'debug'> and its equivalents are |
470 | lexically scoped, as the other directives are. However they have both | |
471 | compile-time and run-time effects. | |
472 | ||
473 | See L<perlmodlib/Pragmatic Modules>. | |
474 | ||
475 | =head2 'Debug' mode | |
476 | ||
a3621e74 YO |
477 | Similarly C<use re 'Debug'> produces debugging output, the difference |
478 | being that it allows the fine tuning of what debugging output will be | |
be8e71aa YO |
479 | emitted. Options are divided into three groups, those related to |
480 | compilation, those related to execution and those related to special | |
481 | purposes. The options are as follows: | |
482 | ||
483 | =over 4 | |
484 | ||
485 | =item Compile related options | |
486 | ||
487 | =over 4 | |
488 | ||
489 | =item COMPILE | |
490 | ||
491 | Turns on all compile related debug options. | |
492 | ||
493 | =item PARSE | |
494 | ||
495 | Turns on debug output related to the process of parsing the pattern. | |
496 | ||
497 | =item OPTIMISE | |
498 | ||
499 | Enables output related to the optimisation phase of compilation. | |
500 | ||
24b23f37 | 501 | =item TRIEC |
be8e71aa YO |
502 | |
503 | Detailed info about trie compilation. | |
504 | ||
505 | =item DUMP | |
506 | ||
507 | Dump the final program out after it is compiled and optimised. | |
508 | ||
d9a72fcc YO |
509 | =item FLAGS |
510 | ||
511 | Dump the flags associated with the program | |
512 | ||
513 | =item TEST | |
514 | ||
515 | Print output intended for testing the internals of the compile process | |
516 | ||
be8e71aa YO |
517 | =back |
518 | ||
519 | =item Execute related options | |
520 | ||
521 | =over 4 | |
522 | ||
523 | =item EXECUTE | |
524 | ||
525 | Turns on all execute related debug options. | |
526 | ||
527 | =item MATCH | |
528 | ||
529 | Turns on debugging of the main matching loop. | |
530 | ||
24b23f37 | 531 | =item TRIEE |
be8e71aa YO |
532 | |
533 | Extra debugging of how tries execute. | |
534 | ||
535 | =item INTUIT | |
536 | ||
48fe68f5 | 537 | Enable debugging of start-point optimisations. |
be8e71aa YO |
538 | |
539 | =back | |
540 | ||
541 | =item Extra debugging options | |
542 | ||
543 | =over 4 | |
544 | ||
545 | =item EXTRA | |
546 | ||
547 | Turns on all "extra" debugging options. | |
548 | ||
e7707071 YO |
549 | =item BUFFERS |
550 | ||
c27a5cfe | 551 | Enable debugging the capture group storage during match. Warning, |
e7707071 YO |
552 | this can potentially produce extremely large output. |
553 | ||
24b23f37 YO |
554 | =item TRIEM |
555 | ||
556 | Enable enhanced TRIE debugging. Enhances both TRIEE | |
557 | and TRIEC. | |
558 | ||
559 | =item STATE | |
560 | ||
4ee9a43f | 561 | Enable debugging of states in the engine. |
24b23f37 YO |
562 | |
563 | =item STACK | |
be8e71aa | 564 | |
24b23f37 YO |
565 | Enable debugging of the recursion stack in the engine. Enabling |
566 | or disabling this option automatically does the same for debugging | |
567 | states as well. This output from this can be quite large. | |
568 | ||
d9a72fcc YO |
569 | =item GPOS |
570 | ||
571 | Enable debugging of the \G modifier. | |
572 | ||
24b23f37 YO |
573 | =item OPTIMISEM |
574 | ||
48fe68f5 | 575 | Enable enhanced optimisation debugging and start-point optimisations. |
99cc5cc6 | 576 | Probably not useful except when debugging the regexp engine itself. |
24b23f37 YO |
577 | |
578 | =item OFFSETS | |
579 | ||
580 | Dump offset information. This can be used to see how regops correlate | |
581 | to the pattern. Output format is | |
582 | ||
583 | NODENUM:POSITION[LENGTH] | |
584 | ||
585 | Where 1 is the position of the first char in the string. Note that position | |
586 | can be 0, or larger than the actual length of the pattern, likewise length | |
587 | can be zero. | |
be8e71aa | 588 | |
24b23f37 | 589 | =item OFFSETSDBG |
be8e71aa YO |
590 | |
591 | Enable debugging of offsets information. This emits copious | |
fe759410 | 592 | amounts of trace information and doesn't mesh well with other |
be8e71aa YO |
593 | debug options. |
594 | ||
fe759410 | 595 | Almost definitely only useful to people hacking |
be8e71aa YO |
596 | on the offsets part of the debug engine. |
597 | ||
d9a72fcc | 598 | |
be8e71aa YO |
599 | =back |
600 | ||
601 | =item Other useful flags | |
602 | ||
603 | These are useful shortcuts to save on the typing. | |
604 | ||
605 | =over 4 | |
606 | ||
607 | =item ALL | |
608 | ||
48fe68f5 KW |
609 | Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS. |
610 | (To get every single option without exception, use both ALL and EXTRA.) | |
be8e71aa YO |
611 | |
612 | =item All | |
613 | ||
fe759410 | 614 | Enable DUMP and all execute options. Equivalent to: |
be8e71aa YO |
615 | |
616 | use re 'debug'; | |
617 | ||
618 | =item MORE | |
619 | ||
620 | =item More | |
621 | ||
48fe68f5 | 622 | Enable the options enabled by "All", plus STATE, TRIEC, and TRIEM. |
be8e71aa | 623 | |
dba3f186 | 624 | =back |
be8e71aa | 625 | |
dba3f186 | 626 | =back |
a3621e74 | 627 | |
1e2e3d02 | 628 | As of 5.9.5 the directive C<use re 'debug'> and its equivalents are |
48fe68f5 | 629 | lexically scoped, as are the other directives. However they have both |
1e2e3d02 | 630 | compile-time and run-time effects. |
b3eb6a9b | 631 | |
de8c5301 | 632 | =head2 Exportable Functions |
b3eb6a9b | 633 | |
de8c5301 | 634 | As of perl 5.9.5 're' debug contains a number of utility functions that |
4ee9a43f | 635 | may be optionally exported into the caller's namespace. They are listed |
de8c5301 | 636 | below. |
b3eb6a9b | 637 | |
de8c5301 | 638 | =over 4 |
b3eb6a9b | 639 | |
de8c5301 | 640 | =item is_regexp($ref) |
02ea72ae | 641 | |
de8c5301 | 642 | Returns true if the argument is a compiled regular expression as returned |
4ee9a43f | 643 | by C<qr//>, false if it is not. |
02ea72ae | 644 | |
4ee9a43f RGS |
645 | This function will not be confused by overloading or blessing. In |
646 | internals terms, this extracts the regexp pointer out of the | |
3a5e0888 | 647 | PERL_MAGIC_qr structure so it cannot be fooled. |
894be9b7 | 648 | |
de8c5301 | 649 | =item regexp_pattern($ref) |
02ea72ae | 650 | |
4ee9a43f RGS |
651 | If the argument is a compiled regular expression as returned by C<qr//>, |
652 | then this function returns the pattern. | |
be8e71aa | 653 | |
4ee9a43f RGS |
654 | In list context it returns a two element list, the first element |
655 | containing the pattern and the second containing the modifiers used when | |
656 | the pattern was compiled. | |
be8e71aa | 657 | |
4ee9a43f | 658 | my ($pat, $mods) = regexp_pattern($ref); |
a3621e74 | 659 | |
99cc5cc6 | 660 | In scalar context it returns the same as perl would when stringifying a raw |
4ee9a43f RGS |
661 | C<qr//> with the same pattern inside. If the argument is not a compiled |
662 | reference then this routine returns false but defined in scalar context, | |
663 | and the empty list in list context. Thus the following | |
f9f4320a | 664 | |
dff5e0c4 | 665 | if (regexp_pattern($ref) eq '(?^i:foo)') |
dba3f186 | 666 | |
de8c5301 | 667 | will be warning free regardless of what $ref actually is. |
380e0b81 | 668 | |
4ee9a43f RGS |
669 | Like C<is_regexp> this function will not be confused by overloading |
670 | or blessing of the object. | |
b3eb6a9b | 671 | |
256ddcd0 YO |
672 | =item regmust($ref) |
673 | ||
432acd5f | 674 | If the argument is a compiled regular expression as returned by C<qr//>, |
99cc5cc6 | 675 | then this function returns what the optimiser considers to be the longest |
432acd5f RGS |
676 | anchored fixed string and longest floating fixed string in the pattern. |
677 | ||
678 | A I<fixed string> is defined as being a substring that must appear for the | |
679 | pattern to match. An I<anchored fixed string> is a fixed string that must | |
680 | appear at a particular offset from the beginning of the match. A I<floating | |
681 | fixed string> is defined as a fixed string that can appear at any point in | |
682 | a range of positions relative to the start of the match. For example, | |
683 | ||
684 | my $qr = qr/here .* there/x; | |
685 | my ($anchored, $floating) = regmust($qr); | |
256ddcd0 | 686 | print "anchored:'$anchored'\nfloating:'$floating'\n"; |
432acd5f | 687 | |
256ddcd0 YO |
688 | results in |
689 | ||
690 | anchored:'here' | |
691 | floating:'there' | |
692 | ||
432acd5f RGS |
693 | Because the C<here> is before the C<.*> in the pattern, its position |
694 | can be determined exactly. That's not true, however, for the C<there>; | |
695 | it could appear at any point after where the anchored string appeared. | |
d952710b | 696 | Perl uses both for its optimisations, preferring the longer, or, if they are |
256ddcd0 YO |
697 | equal, the floating. |
698 | ||
699 | B<NOTE:> This may not necessarily be the definitive longest anchored and | |
432acd5f | 700 | floating string. This will be what the optimiser of the Perl that you |
256ddcd0 YO |
701 | are using thinks is the longest. If you believe that the result is wrong |
702 | please report it via the L<perlbug> utility. | |
703 | ||
28d8d7f4 | 704 | =item regname($name,$all) |
44a2ac75 | 705 | |
28d8d7f4 YO |
706 | Returns the contents of a named buffer of the last successful match. If |
707 | $all is true, then returns an array ref containing one entry per buffer, | |
44a2ac75 YO |
708 | otherwise returns the first defined buffer. |
709 | ||
28d8d7f4 | 710 | =item regnames($all) |
44a2ac75 | 711 | |
28d8d7f4 YO |
712 | Returns a list of all of the named buffers defined in the last successful |
713 | match. If $all is true, then it returns all names defined, if not it returns | |
714 | only names which were involved in the match. | |
44a2ac75 | 715 | |
28d8d7f4 | 716 | =item regnames_count() |
44a2ac75 | 717 | |
28d8d7f4 YO |
718 | Returns the number of distinct names defined in the pattern used |
719 | for the last successful match. | |
44a2ac75 | 720 | |
28d8d7f4 YO |
721 | B<Note:> this result is always the actual number of distinct |
722 | named buffers defined, it may not actually match that which is | |
723 | returned by C<regnames()> and related routines when those routines | |
724 | have not been called with the $all parameter set. | |
44a2ac75 | 725 | |
de8c5301 | 726 | =back |
b3eb6a9b | 727 | |
de8c5301 | 728 | =head1 SEE ALSO |
b3eb6a9b | 729 | |
de8c5301 YO |
730 | L<perlmodlib/Pragmatic Modules>. |
731 | ||
732 | =cut |