This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
ext/re/re.pm: Fix comment
[perl5.git] / ext / re / re.pm
CommitLineData
b3eb6a9b
GS
1package re;
2
99cc5cc6 3# pragma for controlling the regexp engine
de8c5301
YO
4use strict;
5use warnings;
6
0e165828 7our $VERSION = "0.31";
de8c5301 8our @ISA = qw(Exporter);
ec781434 9our @EXPORT_OK = ('regmust',
192c1e27
JH
10 qw(is_regexp regexp_pattern
11 regname regnames regnames_count));
de8c5301
YO
12our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
13
de8c5301
YO
14my %bitmask = (
15 taint => 0x00100000, # HINT_RE_TAINT
16 eval => 0x00200000, # HINT_RE_EVAL
17);
18
1e215989
FC
19my $flags_hint = 0x02000000; # HINT_RE_FLAGS
20my $PMMOD_SHIFT = 0;
21my %reflags = (
22 m => 1 << ($PMMOD_SHIFT + 0),
23 s => 1 << ($PMMOD_SHIFT + 1),
24 i => 1 << ($PMMOD_SHIFT + 2),
25 x => 1 << ($PMMOD_SHIFT + 3),
41d7c59e
MH
26 n => 1 << ($PMMOD_SHIFT + 5),
27 p => 1 << ($PMMOD_SHIFT + 6),
67cdf558 28 strict => 1 << ($PMMOD_SHIFT + 10),
1e215989 29# special cases:
1e215989 30 d => 0,
a62b1201
KW
31 l => 1,
32 u => 2,
cfaf538b 33 a => 3,
b4ab316d 34 aa => 4,
1e215989
FC
35);
36
de8c5301
YO
37sub setcolor {
38 eval { # Ignore errors
39 require Term::Cap;
40
41 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
42 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
43 my @props = split /,/, $props;
44 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
45
46 $colors =~ s/\0//g;
47 $ENV{PERL_RE_COLORS} = $colors;
48 };
49 if ($@) {
50 $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t';
51 }
52
53}
54
55my %flags = (
56 COMPILE => 0x0000FF,
57 PARSE => 0x000001,
58 OPTIMISE => 0x000002,
59 TRIEC => 0x000004,
60 DUMP => 0x000008,
f7819f85 61 FLAGS => 0x000010,
d9a72fcc 62 TEST => 0x000020,
de8c5301
YO
63
64 EXECUTE => 0x00FF00,
65 INTUIT => 0x000100,
66 MATCH => 0x000200,
67 TRIEE => 0x000400,
68
69 EXTRA => 0xFF0000,
70 TRIEM => 0x010000,
71 OFFSETS => 0x020000,
72 OFFSETSDBG => 0x040000,
73 STATE => 0x080000,
74 OPTIMISEM => 0x100000,
75 STACK => 0x280000,
e7707071 76 BUFFERS => 0x400000,
2c296965 77 GPOS => 0x800000,
de8c5301 78);
e7707071 79$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});
de8c5301 80$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
2c296965 81$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE} | $flags{GPOS};
de8c5301
YO
82$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
83$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
84$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
85
ec781434
NC
86if (defined &DynaLoader::boot_DynaLoader) {
87 require XSLoader;
da4061d3 88 XSLoader::load();
de8c5301 89}
ec781434
NC
90# else we're miniperl
91# We need to work for miniperl, because the XS toolchain uses Text::Wrap, which
92# uses re 'taint'.
de8c5301
YO
93
94sub _load_unload {
95 my ($on)= @_;
96 if ($on) {
ec781434
NC
97 # We call install() every time, as if we didn't, we wouldn't
98 # "see" any changes to the color environment var since
99 # the last time it was called.
100
101 # install() returns an integer, which if casted properly
99cc5cc6 102 # in C resolves to a structure containing the regexp
ec781434
NC
103 # hooks. Setting it to a random integer will guarantee
104 # segfaults.
105 $^H{regcomp} = install();
de8c5301
YO
106 } else {
107 delete $^H{regcomp};
108 }
109}
110
111sub bits {
112 my $on = shift;
113 my $bits = 0;
3ab1d973 114 my $turning_all_off = ! @_ && ! $on;
cc4d09e1 115 my %seen; # Has flag already been seen?
3ab1d973
KW
116 if ($turning_all_off) {
117
118 # Pretend were called with certain parameters, which are best dealt
c9a74c77 119 # with that way.
3ab1d973
KW
120 push @_, keys %bitmask; # taint and eval
121 push @_, 'strict';
122 }
123
124 # Process each subpragma parameter
1e215989 125 ARG:
de8c5301
YO
126 foreach my $idx (0..$#_){
127 my $s=$_[$idx];
128 if ($s eq 'Debug' or $s eq 'Debugcolor') {
129 setcolor() if $s =~/color/i;
130 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
131 for my $idx ($idx+1..$#_) {
132 if ($flags{$_[$idx]}) {
133 if ($on) {
134 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
135 } else {
136 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
137 }
138 } else {
139 require Carp;
140 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
141 join(", ",sort keys %flags ) );
142 }
143 }
144 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
145 last;
146 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
147 setcolor() if $s =~/color/i;
148 _load_unload($on);
66e6b4c5 149 last;
de8c5301
YO
150 } elsif (exists $bitmask{$s}) {
151 $bits |= $bitmask{$s};
152 } elsif ($EXPORT_OK{$s}) {
de8c5301
YO
153 require Exporter;
154 re->export_to_level(2, 're', $s);
67cdf558
KW
155 } elsif ($s eq 'strict') {
156 if ($on) {
157 $^H{reflags} |= $reflags{$s};
158 warnings::warnif('experimental::re_strict',
159 "\"use re 'strict'\" is experimental");
160
161 # Turn on warnings if not already done.
162 if (! warnings::enabled('regexp')) {
163 require warnings;
164 warnings->import('regexp');
165 $^H{re_strict} = 1;
166 }
167 }
168 else {
3ab1d973 169 $^H{reflags} &= ~$reflags{$s} if $^H{reflags};
67cdf558
KW
170
171 # Turn off warnings if we turned them on.
172 warnings->unimport('regexp') if $^H{re_strict};
173 }
174 if ($^H{reflags}) {
175 $^H |= $flags_hint;
176 }
177 else {
178 $^H &= ~$flags_hint;
179 }
1e215989
FC
180 } elsif ($s =~ s/^\///) {
181 my $reflags = $^H{reflags} || 0;
6320bfaf 182 my $seen_charset;
342c8524 183 while ($s =~ m/( . )/gx) {
48895a0d 184 local $_ = $1;
cfaf538b 185 if (/[adul]/) {
342c8524
KW
186 # The 'a' may be repeated; hide this from the rest of the
187 # code by counting and getting rid of all of them, then
188 # changing to 'aa' if there is a repeat.
189 if ($_ eq 'a') {
190 my $sav_pos = pos $s;
191 my $a_count = $s =~ s/a//g;
192 pos $s = $sav_pos - 1; # -1 because got rid of the 'a'
193 if ($a_count > 2) {
194 require Carp;
195 Carp::carp(
196 qq 'The "a" flag may only appear a maximum of twice'
197 );
198 }
199 elsif ($a_count == 2) {
200 $_ = 'aa';
201 }
202 }
1e215989 203 if ($on) {
45a507fa 204 if ($seen_charset) {
96ef02be 205 require Carp;
45a507fa
KW
206 if ($seen_charset ne $_) {
207 Carp::carp(
208 qq 'The "$seen_charset" and "$_" flags '
209 .qq 'are exclusive'
210 );
211 }
212 else {
213 Carp::carp(
214 qq 'The "$seen_charset" flag may not appear '
215 .qq 'twice'
216 );
217 }
96ef02be 218 }
6320bfaf
KW
219 $^H{reflags_charset} = $reflags{$_};
220 $seen_charset = $_;
1e215989
FC
221 }
222 else {
6320bfaf 223 delete $^H{reflags_charset}
b10bad5a
KW
224 if defined $^H{reflags_charset}
225 && $^H{reflags_charset} == $reflags{$_};
1e215989
FC
226 }
227 } elsif (exists $reflags{$_}) {
cc4d09e1
KW
228 $seen{$_}++;
229 $on
1e215989
FC
230 ? $reflags |= $reflags{$_}
231 : ($reflags &= ~$reflags{$_});
232 } else {
233 require Carp;
234 Carp::carp(
235 qq'Unknown regular expression flag "$_"'
236 );
237 next ARG;
238 }
239 }
6320bfaf 240 ($^H{reflags} = $reflags or defined $^H{reflags_charset})
b10bad5a
KW
241 ? $^H |= $flags_hint
242 : ($^H &= ~$flags_hint);
de8c5301
YO
243 } else {
244 require Carp;
245 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
246 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
247 ")");
248 }
249 }
cc4d09e1
KW
250 if (exists $seen{'x'} && $seen{'x'} > 1
251 && (warnings::enabled("deprecated")
252 || warnings::enabled("regexp")))
253 {
254 my $message = "Having more than one /x regexp modifier is deprecated";
255 if (warnings::enabled("deprecated")) {
256 warnings::warn("deprecated", $message);
257 }
258 else {
259 warnings::warn("regexp", $message);
260 }
261 }
3ab1d973
KW
262
263 if ($turning_all_off) {
264 _load_unload(0);
265 $^H{reflags} = 0;
266 $^H{reflags_charset} = 0;
267 $^H &= ~$flags_hint;
268 }
269
de8c5301
YO
270 $bits;
271}
272
273sub import {
274 shift;
275 $^H |= bits(1, @_);
276}
277
278sub unimport {
279 shift;
280 $^H &= ~ bits(0, @_);
281}
282
2831;
284
285__END__
56953603 286
b3eb6a9b
GS
287=head1 NAME
288
289re - Perl pragma to alter regular expression behaviour
290
291=head1 SYNOPSIS
292
e4d48cc9
GS
293 use re 'taint';
294 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 295
2cd61cdb 296 $pat = '(?{ $foo = 1 })';
e4d48cc9 297 use re 'eval';
48fe68f5
KW
298 /foo${pat}bar/; # won't fail (when not under -T
299 # switch)
e4d48cc9
GS
300
301 {
302 no re 'taint'; # the default
303 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
304
305 no re 'eval'; # the default
48fe68f5
KW
306 /foo${pat}bar/; # disallowed (with or without -T
307 # switch)
e4d48cc9 308 }
b3eb6a9b 309
67cdf558
KW
310 use re 'strict'; # Raise warnings for more conditions
311
1e215989
FC
312 use re '/ix';
313 "FOO" =~ / foo /; # /ix implied
314 no re '/x';
315 "FOO" =~ /foo/; # just /i implied
316
1e2e3d02 317 use re 'debug'; # output debugging info during
48fe68f5 318 /^(.*)$/s; # compile and run time
1e2e3d02 319
2cd61cdb 320
48fe68f5
KW
321 use re 'debugcolor'; # same as 'debug', but with colored
322 # output
02ea72ae
IZ
323 ...
324
48fe68f5
KW
325 use re qw(Debug All); # Same as "use re 'debug'", but you
326 # can use "Debug" with things other
327 # than 'All'
328 use re qw(Debug More); # 'All' plus output more details
329 no re qw(Debug ALL); # Turn on (almost) all re debugging
330 # in this scope
4ee9a43f 331
de8c5301
YO
332 use re qw(is_regexp regexp_pattern); # import utility functions
333 my ($pat,$mods)=regexp_pattern(qr/foo/i);
334 if (is_regexp($obj)) {
335 print "Got regexp: ",
48fe68f5
KW
336 scalar regexp_pattern($obj); # just as perl would stringify
337 } # it but no hassle with blessed
338 # re's.
a3621e74 339
3ffabb8c
GS
340(We use $^X in these examples because it's tainted by default.)
341
b3eb6a9b
GS
342=head1 DESCRIPTION
343
de8c5301
YO
344=head2 'taint' mode
345
b3eb6a9b 346When C<use re 'taint'> is in effect, and a tainted string is the target
99cc5cc6
A
347of a regexp, the regexp memories (or values returned by the m// operator
348in list context) are tainted. This feature is useful when regexp operations
e4d48cc9
GS
349on tainted data aren't meant to extract safe substrings, but to perform
350other transformations.
b3eb6a9b 351
de8c5301
YO
352=head2 'eval' mode
353
99cc5cc6 354When C<use re 'eval'> is in effect, a regexp is allowed to contain
0b370c0a 355C<(?{ ... })> zero-width assertions and C<(??{ ... })> postponed
e128ab2c
DM
356subexpressions that are derived from variable interpolation, rather than
357appearing literally within the regexp. That is normally disallowed, since
358it is a
2cd61cdb
IZ
359potential security risk. Note that this pragma is ignored when the regular
360expression is obtained from tainted data, i.e. evaluation is always
0b370c0a 361disallowed with tainted regular expressions. See L<perlre/(?{ code })>
bb1773de 362and L<perlre/(??{ code })>.
2cd61cdb 363
ffbc6a93 364For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8
GS
365expressions (i.e., the result of C<qr//>) is I<not> considered variable
366interpolation. Thus:
2cd61cdb
IZ
367
368 /foo${pat}bar/
369
ffbc6a93 370I<is> allowed if $pat is a precompiled regular expression, even
0b370c0a 371if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions.
2cd61cdb 372
67cdf558
KW
373=head2 'strict' mode
374
375When C<use re 'strict'> is in effect, stricter checks are applied than
376otherwise when compiling regular expressions patterns. These may cause more
377warnings to be raised than otherwise, and more things to be fatal instead of
378just warnings. The purpose of this is to find and report at compile time some
379things, which may be legal, but have a reasonable possibility of not being the
380programmer's actual intent. This automatically turns on the C<"regexp">
381warnings category (if not already on) within its scope.
382
383As an example of something that is caught under C<"strict'> but not otherwise
384is the pattern
385
386 qr/\xABC/
387
388The C<"\x"> construct without curly braces should be followed by exactly two
389hex digits; this one is followed by three. This currently evaluates as
390equivalent to
391
392 qr/\x{AB}C/
393
394that is, the character whose code point value is C<0xAB>, followed by the
395letter C<C>. But since C<C> is a a hex digit, there is a reasonable chance
396that the intent was
397
398 qr/\x{ABC}/
399
400that is the single character at C<0xABC>. Under C<'strict'> it is an error to
401not follow C<\x> with exactly two hex digits. When not under C<'strict'> a
402warning is generated if there is only one hex digit, and no warning is raised
403if there are more than two.
404
405It is expected that what exactly C<'strict'> does will evolve over time as we
406gain experience with it. This means that programs that compile under it in
407today's Perl may not compile, or may have more or fewer warnings, in future
408Perls. There is no backwards compatibility promises with regards to it. For
409this reason, using it will raise a C<experimental::re_strict> class warning,
410unless that category is turned off.
411
412Note that if a pattern compiled within C<'strict'> is recompiled, say by
413interpolating into another pattern, outside of C<'strict'>, it is not checked
414again for strictness. This is because if it works under strict it must work
415under non-strict.
416
1e215989
FC
417=head2 '/flags' mode
418
419When C<use re '/flags'> is specified, the given flags are automatically
420added to every regular expression till the end of the lexical scope.
421
422C<no re '/flags'> will turn off the effect of C<use re '/flags'> for the
423given flags.
424
425For example, if you want all your regular expressions to have /msx on by
426default, simply put
427
428 use re '/msx';
429
430at the top of your code.
431
cfaf538b 432The character set /adul flags cancel each other out. So, in this example,
1e215989
FC
433
434 use re "/u";
435 "ss" =~ /\xdf/;
436 use re "/d";
437 "ss" =~ /\xdf/;
438
4d220a7d 439the second C<use re> does an implicit C<no re '/u'>.
1e215989 440
59640339 441Turning on one of the character set flags with C<use re> takes precedence over the
1e215989
FC
442C<locale> pragma and the 'unicode_strings' C<feature>, for regular
443expressions. Turning off one of these flags when it is active reverts to
444the behaviour specified by whatever other pragmata are in scope. For
445example:
446
447 use feature "unicode_strings";
448 no re "/u"; # does nothing
449 use re "/l";
450 no re "/l"; # reverts to unicode_strings behaviour
451
de8c5301
YO
452=head2 'debug' mode
453
ffbc6a93 454When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb
IZ
455compiling and using regular expressions. The output is the same as that
456obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
457B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae
IZ
458of the match. Using C<debugcolor> instead of C<debug> enables a
459form of output that can be used to get a colorful display on terminals
460that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
461comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 462strings on/off, pre-point part on/off.
57e8c15d 463See L<perldebug/"Debugging Regular Expressions"> for additional info.
2cd61cdb 464
de8c5301
YO
465As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
466lexically scoped, as the other directives are. However they have both
467compile-time and run-time effects.
468
469See L<perlmodlib/Pragmatic Modules>.
470
471=head2 'Debug' mode
472
a3621e74
YO
473Similarly C<use re 'Debug'> produces debugging output, the difference
474being that it allows the fine tuning of what debugging output will be
be8e71aa
YO
475emitted. Options are divided into three groups, those related to
476compilation, those related to execution and those related to special
477purposes. The options are as follows:
478
479=over 4
480
481=item Compile related options
482
483=over 4
484
485=item COMPILE
486
487Turns on all compile related debug options.
488
489=item PARSE
490
491Turns on debug output related to the process of parsing the pattern.
492
493=item OPTIMISE
494
495Enables output related to the optimisation phase of compilation.
496
24b23f37 497=item TRIEC
be8e71aa
YO
498
499Detailed info about trie compilation.
500
501=item DUMP
502
503Dump the final program out after it is compiled and optimised.
504
d9a72fcc
YO
505=item FLAGS
506
507Dump the flags associated with the program
508
509=item TEST
510
511Print output intended for testing the internals of the compile process
512
be8e71aa
YO
513=back
514
515=item Execute related options
516
517=over 4
518
519=item EXECUTE
520
521Turns on all execute related debug options.
522
523=item MATCH
524
525Turns on debugging of the main matching loop.
526
24b23f37 527=item TRIEE
be8e71aa
YO
528
529Extra debugging of how tries execute.
530
531=item INTUIT
532
48fe68f5 533Enable debugging of start-point optimisations.
be8e71aa
YO
534
535=back
536
537=item Extra debugging options
538
539=over 4
540
541=item EXTRA
542
543Turns on all "extra" debugging options.
544
e7707071
YO
545=item BUFFERS
546
c27a5cfe 547Enable debugging the capture group storage during match. Warning,
e7707071
YO
548this can potentially produce extremely large output.
549
24b23f37
YO
550=item TRIEM
551
552Enable enhanced TRIE debugging. Enhances both TRIEE
553and TRIEC.
554
555=item STATE
556
4ee9a43f 557Enable debugging of states in the engine.
24b23f37
YO
558
559=item STACK
be8e71aa 560
24b23f37
YO
561Enable debugging of the recursion stack in the engine. Enabling
562or disabling this option automatically does the same for debugging
563states as well. This output from this can be quite large.
564
d9a72fcc
YO
565=item GPOS
566
567Enable debugging of the \G modifier.
568
24b23f37
YO
569=item OPTIMISEM
570
48fe68f5 571Enable enhanced optimisation debugging and start-point optimisations.
99cc5cc6 572Probably not useful except when debugging the regexp engine itself.
24b23f37
YO
573
574=item OFFSETS
575
576Dump offset information. This can be used to see how regops correlate
577to the pattern. Output format is
578
579 NODENUM:POSITION[LENGTH]
580
581Where 1 is the position of the first char in the string. Note that position
582can be 0, or larger than the actual length of the pattern, likewise length
583can be zero.
be8e71aa 584
24b23f37 585=item OFFSETSDBG
be8e71aa
YO
586
587Enable debugging of offsets information. This emits copious
fe759410 588amounts of trace information and doesn't mesh well with other
be8e71aa
YO
589debug options.
590
fe759410 591Almost definitely only useful to people hacking
be8e71aa
YO
592on the offsets part of the debug engine.
593
d9a72fcc 594
be8e71aa
YO
595=back
596
597=item Other useful flags
598
599These are useful shortcuts to save on the typing.
600
601=over 4
602
603=item ALL
604
48fe68f5
KW
605Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS.
606(To get every single option without exception, use both ALL and EXTRA.)
be8e71aa
YO
607
608=item All
609
fe759410 610Enable DUMP and all execute options. Equivalent to:
be8e71aa
YO
611
612 use re 'debug';
613
614=item MORE
615
616=item More
617
48fe68f5 618Enable the options enabled by "All", plus STATE, TRIEC, and TRIEM.
be8e71aa 619
dba3f186 620=back
be8e71aa 621
dba3f186 622=back
a3621e74 623
1e2e3d02 624As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
48fe68f5 625lexically scoped, as are the other directives. However they have both
1e2e3d02 626compile-time and run-time effects.
b3eb6a9b 627
de8c5301 628=head2 Exportable Functions
b3eb6a9b 629
de8c5301 630As of perl 5.9.5 're' debug contains a number of utility functions that
4ee9a43f 631may be optionally exported into the caller's namespace. They are listed
de8c5301 632below.
b3eb6a9b 633
de8c5301 634=over 4
b3eb6a9b 635
de8c5301 636=item is_regexp($ref)
02ea72ae 637
de8c5301 638Returns true if the argument is a compiled regular expression as returned
4ee9a43f 639by C<qr//>, false if it is not.
02ea72ae 640
4ee9a43f
RGS
641This function will not be confused by overloading or blessing. In
642internals terms, this extracts the regexp pointer out of the
3a5e0888 643PERL_MAGIC_qr structure so it cannot be fooled.
894be9b7 644
de8c5301 645=item regexp_pattern($ref)
02ea72ae 646
4ee9a43f
RGS
647If the argument is a compiled regular expression as returned by C<qr//>,
648then this function returns the pattern.
be8e71aa 649
4ee9a43f
RGS
650In list context it returns a two element list, the first element
651containing the pattern and the second containing the modifiers used when
652the pattern was compiled.
be8e71aa 653
4ee9a43f 654 my ($pat, $mods) = regexp_pattern($ref);
a3621e74 655
99cc5cc6 656In scalar context it returns the same as perl would when stringifying a raw
4ee9a43f
RGS
657C<qr//> with the same pattern inside. If the argument is not a compiled
658reference then this routine returns false but defined in scalar context,
659and the empty list in list context. Thus the following
f9f4320a 660
dff5e0c4 661 if (regexp_pattern($ref) eq '(?^i:foo)')
dba3f186 662
de8c5301 663will be warning free regardless of what $ref actually is.
380e0b81 664
4ee9a43f
RGS
665Like C<is_regexp> this function will not be confused by overloading
666or blessing of the object.
b3eb6a9b 667
256ddcd0
YO
668=item regmust($ref)
669
432acd5f 670If the argument is a compiled regular expression as returned by C<qr//>,
99cc5cc6 671then this function returns what the optimiser considers to be the longest
432acd5f
RGS
672anchored fixed string and longest floating fixed string in the pattern.
673
674A I<fixed string> is defined as being a substring that must appear for the
675pattern to match. An I<anchored fixed string> is a fixed string that must
676appear at a particular offset from the beginning of the match. A I<floating
677fixed string> is defined as a fixed string that can appear at any point in
678a range of positions relative to the start of the match. For example,
679
680 my $qr = qr/here .* there/x;
681 my ($anchored, $floating) = regmust($qr);
256ddcd0 682 print "anchored:'$anchored'\nfloating:'$floating'\n";
432acd5f 683
256ddcd0
YO
684results in
685
686 anchored:'here'
687 floating:'there'
688
432acd5f
RGS
689Because the C<here> is before the C<.*> in the pattern, its position
690can be determined exactly. That's not true, however, for the C<there>;
691it could appear at any point after where the anchored string appeared.
d952710b 692Perl uses both for its optimisations, preferring the longer, or, if they are
256ddcd0
YO
693equal, the floating.
694
695B<NOTE:> This may not necessarily be the definitive longest anchored and
432acd5f 696floating string. This will be what the optimiser of the Perl that you
256ddcd0
YO
697are using thinks is the longest. If you believe that the result is wrong
698please report it via the L<perlbug> utility.
699
28d8d7f4 700=item regname($name,$all)
44a2ac75 701
28d8d7f4
YO
702Returns the contents of a named buffer of the last successful match. If
703$all is true, then returns an array ref containing one entry per buffer,
44a2ac75
YO
704otherwise returns the first defined buffer.
705
28d8d7f4 706=item regnames($all)
44a2ac75 707
28d8d7f4
YO
708Returns a list of all of the named buffers defined in the last successful
709match. If $all is true, then it returns all names defined, if not it returns
710only names which were involved in the match.
44a2ac75 711
28d8d7f4 712=item regnames_count()
44a2ac75 713
28d8d7f4
YO
714Returns the number of distinct names defined in the pattern used
715for the last successful match.
44a2ac75 716
28d8d7f4
YO
717B<Note:> this result is always the actual number of distinct
718named buffers defined, it may not actually match that which is
719returned by C<regnames()> and related routines when those routines
720have not been called with the $all parameter set.
44a2ac75 721
de8c5301 722=back
b3eb6a9b 723
de8c5301 724=head1 SEE ALSO
b3eb6a9b 725
de8c5301
YO
726L<perlmodlib/Pragmatic Modules>.
727
728=cut