This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Errno: mention that using %! autoloads Errno (RT #119359)
[perl5.git] / ext / re / re.pm
CommitLineData
b3eb6a9b
GS
1package re;
2
99cc5cc6 3# pragma for controlling the regexp engine
de8c5301
YO
4use strict;
5use warnings;
6
60108b47 7our $VERSION = "0.33";
de8c5301 8our @ISA = qw(Exporter);
ec781434 9our @EXPORT_OK = ('regmust',
192c1e27
JH
10 qw(is_regexp regexp_pattern
11 regname regnames regnames_count));
de8c5301
YO
12our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
13
de8c5301
YO
14my %bitmask = (
15 taint => 0x00100000, # HINT_RE_TAINT
16 eval => 0x00200000, # HINT_RE_EVAL
17);
18
1e215989
FC
19my $flags_hint = 0x02000000; # HINT_RE_FLAGS
20my $PMMOD_SHIFT = 0;
21my %reflags = (
22 m => 1 << ($PMMOD_SHIFT + 0),
23 s => 1 << ($PMMOD_SHIFT + 1),
24 i => 1 << ($PMMOD_SHIFT + 2),
25 x => 1 << ($PMMOD_SHIFT + 3),
41d7c59e
MH
26 n => 1 << ($PMMOD_SHIFT + 5),
27 p => 1 << ($PMMOD_SHIFT + 6),
67cdf558 28 strict => 1 << ($PMMOD_SHIFT + 10),
1e215989 29# special cases:
1e215989 30 d => 0,
a62b1201
KW
31 l => 1,
32 u => 2,
cfaf538b 33 a => 3,
b4ab316d 34 aa => 4,
1e215989
FC
35);
36
de8c5301
YO
37sub setcolor {
38 eval { # Ignore errors
39 require Term::Cap;
40
41 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
42 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
43 my @props = split /,/, $props;
44 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
45
46 $colors =~ s/\0//g;
47 $ENV{PERL_RE_COLORS} = $colors;
48 };
49 if ($@) {
50 $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t';
51 }
52
53}
54
55my %flags = (
56 COMPILE => 0x0000FF,
57 PARSE => 0x000001,
58 OPTIMISE => 0x000002,
59 TRIEC => 0x000004,
60 DUMP => 0x000008,
f7819f85 61 FLAGS => 0x000010,
d9a72fcc 62 TEST => 0x000020,
de8c5301
YO
63
64 EXECUTE => 0x00FF00,
65 INTUIT => 0x000100,
66 MATCH => 0x000200,
67 TRIEE => 0x000400,
68
69 EXTRA => 0xFF0000,
70 TRIEM => 0x010000,
71 OFFSETS => 0x020000,
72 OFFSETSDBG => 0x040000,
73 STATE => 0x080000,
74 OPTIMISEM => 0x100000,
75 STACK => 0x280000,
e7707071 76 BUFFERS => 0x400000,
2c296965 77 GPOS => 0x800000,
de8c5301 78);
e7707071 79$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});
de8c5301 80$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
2c296965 81$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE} | $flags{GPOS};
de8c5301
YO
82$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
83$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
84$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
85
ec781434
NC
86if (defined &DynaLoader::boot_DynaLoader) {
87 require XSLoader;
da4061d3 88 XSLoader::load();
de8c5301 89}
ec781434
NC
90# else we're miniperl
91# We need to work for miniperl, because the XS toolchain uses Text::Wrap, which
92# uses re 'taint'.
de8c5301
YO
93
94sub _load_unload {
95 my ($on)= @_;
96 if ($on) {
ec781434
NC
97 # We call install() every time, as if we didn't, we wouldn't
98 # "see" any changes to the color environment var since
99 # the last time it was called.
100
101 # install() returns an integer, which if casted properly
99cc5cc6 102 # in C resolves to a structure containing the regexp
ec781434
NC
103 # hooks. Setting it to a random integer will guarantee
104 # segfaults.
105 $^H{regcomp} = install();
de8c5301
YO
106 } else {
107 delete $^H{regcomp};
108 }
109}
110
111sub bits {
112 my $on = shift;
113 my $bits = 0;
3ab1d973 114 my $turning_all_off = ! @_ && ! $on;
cc4d09e1 115 my %seen; # Has flag already been seen?
3ab1d973
KW
116 if ($turning_all_off) {
117
118 # Pretend were called with certain parameters, which are best dealt
c9a74c77 119 # with that way.
3ab1d973
KW
120 push @_, keys %bitmask; # taint and eval
121 push @_, 'strict';
122 }
123
124 # Process each subpragma parameter
1e215989 125 ARG:
de8c5301
YO
126 foreach my $idx (0..$#_){
127 my $s=$_[$idx];
128 if ($s eq 'Debug' or $s eq 'Debugcolor') {
129 setcolor() if $s =~/color/i;
130 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
131 for my $idx ($idx+1..$#_) {
132 if ($flags{$_[$idx]}) {
133 if ($on) {
134 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
135 } else {
136 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
137 }
138 } else {
139 require Carp;
140 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
141 join(", ",sort keys %flags ) );
142 }
143 }
144 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
145 last;
146 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
147 setcolor() if $s =~/color/i;
148 _load_unload($on);
66e6b4c5 149 last;
de8c5301
YO
150 } elsif (exists $bitmask{$s}) {
151 $bits |= $bitmask{$s};
152 } elsif ($EXPORT_OK{$s}) {
de8c5301
YO
153 require Exporter;
154 re->export_to_level(2, 're', $s);
67cdf558
KW
155 } elsif ($s eq 'strict') {
156 if ($on) {
157 $^H{reflags} |= $reflags{$s};
158 warnings::warnif('experimental::re_strict',
159 "\"use re 'strict'\" is experimental");
160
161 # Turn on warnings if not already done.
162 if (! warnings::enabled('regexp')) {
163 require warnings;
164 warnings->import('regexp');
165 $^H{re_strict} = 1;
166 }
167 }
168 else {
3ab1d973 169 $^H{reflags} &= ~$reflags{$s} if $^H{reflags};
67cdf558
KW
170
171 # Turn off warnings if we turned them on.
172 warnings->unimport('regexp') if $^H{re_strict};
173 }
174 if ($^H{reflags}) {
175 $^H |= $flags_hint;
176 }
177 else {
178 $^H &= ~$flags_hint;
179 }
1e215989
FC
180 } elsif ($s =~ s/^\///) {
181 my $reflags = $^H{reflags} || 0;
6320bfaf 182 my $seen_charset;
342c8524 183 while ($s =~ m/( . )/gx) {
48895a0d 184 local $_ = $1;
cfaf538b 185 if (/[adul]/) {
342c8524
KW
186 # The 'a' may be repeated; hide this from the rest of the
187 # code by counting and getting rid of all of them, then
188 # changing to 'aa' if there is a repeat.
189 if ($_ eq 'a') {
190 my $sav_pos = pos $s;
191 my $a_count = $s =~ s/a//g;
192 pos $s = $sav_pos - 1; # -1 because got rid of the 'a'
193 if ($a_count > 2) {
194 require Carp;
195 Carp::carp(
196 qq 'The "a" flag may only appear a maximum of twice'
197 );
198 }
199 elsif ($a_count == 2) {
200 $_ = 'aa';
201 }
202 }
1e215989 203 if ($on) {
45a507fa 204 if ($seen_charset) {
96ef02be 205 require Carp;
45a507fa
KW
206 if ($seen_charset ne $_) {
207 Carp::carp(
208 qq 'The "$seen_charset" and "$_" flags '
209 .qq 'are exclusive'
210 );
211 }
212 else {
213 Carp::carp(
214 qq 'The "$seen_charset" flag may not appear '
215 .qq 'twice'
216 );
217 }
96ef02be 218 }
6320bfaf
KW
219 $^H{reflags_charset} = $reflags{$_};
220 $seen_charset = $_;
1e215989
FC
221 }
222 else {
6320bfaf 223 delete $^H{reflags_charset}
b10bad5a
KW
224 if defined $^H{reflags_charset}
225 && $^H{reflags_charset} == $reflags{$_};
1e215989
FC
226 }
227 } elsif (exists $reflags{$_}) {
cc4d09e1
KW
228 $seen{$_}++;
229 $on
1e215989
FC
230 ? $reflags |= $reflags{$_}
231 : ($reflags &= ~$reflags{$_});
232 } else {
233 require Carp;
234 Carp::carp(
235 qq'Unknown regular expression flag "$_"'
236 );
237 next ARG;
238 }
239 }
6320bfaf 240 ($^H{reflags} = $reflags or defined $^H{reflags_charset})
b10bad5a
KW
241 ? $^H |= $flags_hint
242 : ($^H &= ~$flags_hint);
de8c5301
YO
243 } else {
244 require Carp;
245 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
246 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
247 ")");
248 }
249 }
60108b47
KW
250 if (exists $seen{'x'} && $seen{'x'} > 1) {
251 require Carp;
252 Carp::croak("Only one /x regex modifier is allowed");
cc4d09e1 253 }
3ab1d973
KW
254
255 if ($turning_all_off) {
256 _load_unload(0);
257 $^H{reflags} = 0;
258 $^H{reflags_charset} = 0;
259 $^H &= ~$flags_hint;
260 }
261
de8c5301
YO
262 $bits;
263}
264
265sub import {
266 shift;
267 $^H |= bits(1, @_);
268}
269
270sub unimport {
271 shift;
272 $^H &= ~ bits(0, @_);
273}
274
2751;
276
277__END__
56953603 278
b3eb6a9b
GS
279=head1 NAME
280
281re - Perl pragma to alter regular expression behaviour
282
283=head1 SYNOPSIS
284
e4d48cc9
GS
285 use re 'taint';
286 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 287
2cd61cdb 288 $pat = '(?{ $foo = 1 })';
e4d48cc9 289 use re 'eval';
48fe68f5
KW
290 /foo${pat}bar/; # won't fail (when not under -T
291 # switch)
e4d48cc9
GS
292
293 {
294 no re 'taint'; # the default
295 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
296
297 no re 'eval'; # the default
48fe68f5
KW
298 /foo${pat}bar/; # disallowed (with or without -T
299 # switch)
e4d48cc9 300 }
b3eb6a9b 301
67cdf558
KW
302 use re 'strict'; # Raise warnings for more conditions
303
1e215989
FC
304 use re '/ix';
305 "FOO" =~ / foo /; # /ix implied
306 no re '/x';
307 "FOO" =~ /foo/; # just /i implied
308
1e2e3d02 309 use re 'debug'; # output debugging info during
48fe68f5 310 /^(.*)$/s; # compile and run time
1e2e3d02 311
2cd61cdb 312
48fe68f5
KW
313 use re 'debugcolor'; # same as 'debug', but with colored
314 # output
02ea72ae
IZ
315 ...
316
48fe68f5
KW
317 use re qw(Debug All); # Same as "use re 'debug'", but you
318 # can use "Debug" with things other
319 # than 'All'
320 use re qw(Debug More); # 'All' plus output more details
321 no re qw(Debug ALL); # Turn on (almost) all re debugging
322 # in this scope
4ee9a43f 323
de8c5301
YO
324 use re qw(is_regexp regexp_pattern); # import utility functions
325 my ($pat,$mods)=regexp_pattern(qr/foo/i);
326 if (is_regexp($obj)) {
327 print "Got regexp: ",
48fe68f5
KW
328 scalar regexp_pattern($obj); # just as perl would stringify
329 } # it but no hassle with blessed
330 # re's.
a3621e74 331
3ffabb8c
GS
332(We use $^X in these examples because it's tainted by default.)
333
b3eb6a9b
GS
334=head1 DESCRIPTION
335
de8c5301
YO
336=head2 'taint' mode
337
b3eb6a9b 338When C<use re 'taint'> is in effect, and a tainted string is the target
99cc5cc6
A
339of a regexp, the regexp memories (or values returned by the m// operator
340in list context) are tainted. This feature is useful when regexp operations
e4d48cc9
GS
341on tainted data aren't meant to extract safe substrings, but to perform
342other transformations.
b3eb6a9b 343
de8c5301
YO
344=head2 'eval' mode
345
99cc5cc6 346When C<use re 'eval'> is in effect, a regexp is allowed to contain
0b370c0a 347C<(?{ ... })> zero-width assertions and C<(??{ ... })> postponed
e128ab2c
DM
348subexpressions that are derived from variable interpolation, rather than
349appearing literally within the regexp. That is normally disallowed, since
350it is a
2cd61cdb
IZ
351potential security risk. Note that this pragma is ignored when the regular
352expression is obtained from tainted data, i.e. evaluation is always
0b370c0a 353disallowed with tainted regular expressions. See L<perlre/(?{ code })>
bb1773de 354and L<perlre/(??{ code })>.
2cd61cdb 355
ffbc6a93 356For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8
GS
357expressions (i.e., the result of C<qr//>) is I<not> considered variable
358interpolation. Thus:
2cd61cdb
IZ
359
360 /foo${pat}bar/
361
ffbc6a93 362I<is> allowed if $pat is a precompiled regular expression, even
0b370c0a 363if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions.
2cd61cdb 364
67cdf558
KW
365=head2 'strict' mode
366
1eac213a
KW
367Note that this is an experimental feature which may be changed or removed in a
368future Perl release.
369
67cdf558
KW
370When C<use re 'strict'> is in effect, stricter checks are applied than
371otherwise when compiling regular expressions patterns. These may cause more
372warnings to be raised than otherwise, and more things to be fatal instead of
373just warnings. The purpose of this is to find and report at compile time some
374things, which may be legal, but have a reasonable possibility of not being the
375programmer's actual intent. This automatically turns on the C<"regexp">
376warnings category (if not already on) within its scope.
377
1eac213a
KW
378As an example of something that is caught under C<"strict'>, but not
379otherwise, is the pattern
67cdf558
KW
380
381 qr/\xABC/
382
383The C<"\x"> construct without curly braces should be followed by exactly two
384hex digits; this one is followed by three. This currently evaluates as
385equivalent to
386
387 qr/\x{AB}C/
388
389that is, the character whose code point value is C<0xAB>, followed by the
390letter C<C>. But since C<C> is a a hex digit, there is a reasonable chance
391that the intent was
392
393 qr/\x{ABC}/
394
395that is the single character at C<0xABC>. Under C<'strict'> it is an error to
396not follow C<\x> with exactly two hex digits. When not under C<'strict'> a
397warning is generated if there is only one hex digit, and no warning is raised
398if there are more than two.
399
400It is expected that what exactly C<'strict'> does will evolve over time as we
401gain experience with it. This means that programs that compile under it in
402today's Perl may not compile, or may have more or fewer warnings, in future
1eac213a
KW
403Perls. There is no backwards compatibility promises with regards to it. Also
404there are already proposals for an alternate syntax for enabling it. For
405these reasons, using it will raise a C<experimental::re_strict> class warning,
67cdf558
KW
406unless that category is turned off.
407
408Note that if a pattern compiled within C<'strict'> is recompiled, say by
409interpolating into another pattern, outside of C<'strict'>, it is not checked
410again for strictness. This is because if it works under strict it must work
411under non-strict.
412
1e215989
FC
413=head2 '/flags' mode
414
415When C<use re '/flags'> is specified, the given flags are automatically
416added to every regular expression till the end of the lexical scope.
417
418C<no re '/flags'> will turn off the effect of C<use re '/flags'> for the
419given flags.
420
421For example, if you want all your regular expressions to have /msx on by
422default, simply put
423
424 use re '/msx';
425
426at the top of your code.
427
cfaf538b 428The character set /adul flags cancel each other out. So, in this example,
1e215989
FC
429
430 use re "/u";
431 "ss" =~ /\xdf/;
432 use re "/d";
433 "ss" =~ /\xdf/;
434
4d220a7d 435the second C<use re> does an implicit C<no re '/u'>.
1e215989 436
59640339 437Turning on one of the character set flags with C<use re> takes precedence over the
1e215989
FC
438C<locale> pragma and the 'unicode_strings' C<feature>, for regular
439expressions. Turning off one of these flags when it is active reverts to
440the behaviour specified by whatever other pragmata are in scope. For
441example:
442
443 use feature "unicode_strings";
444 no re "/u"; # does nothing
445 use re "/l";
446 no re "/l"; # reverts to unicode_strings behaviour
447
de8c5301
YO
448=head2 'debug' mode
449
ffbc6a93 450When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb
IZ
451compiling and using regular expressions. The output is the same as that
452obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
453B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae
IZ
454of the match. Using C<debugcolor> instead of C<debug> enables a
455form of output that can be used to get a colorful display on terminals
456that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
457comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 458strings on/off, pre-point part on/off.
57e8c15d 459See L<perldebug/"Debugging Regular Expressions"> for additional info.
2cd61cdb 460
de8c5301
YO
461As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
462lexically scoped, as the other directives are. However they have both
463compile-time and run-time effects.
464
465See L<perlmodlib/Pragmatic Modules>.
466
467=head2 'Debug' mode
468
a3621e74
YO
469Similarly C<use re 'Debug'> produces debugging output, the difference
470being that it allows the fine tuning of what debugging output will be
be8e71aa
YO
471emitted. Options are divided into three groups, those related to
472compilation, those related to execution and those related to special
473purposes. The options are as follows:
474
475=over 4
476
477=item Compile related options
478
479=over 4
480
481=item COMPILE
482
483Turns on all compile related debug options.
484
485=item PARSE
486
487Turns on debug output related to the process of parsing the pattern.
488
489=item OPTIMISE
490
491Enables output related to the optimisation phase of compilation.
492
24b23f37 493=item TRIEC
be8e71aa
YO
494
495Detailed info about trie compilation.
496
497=item DUMP
498
499Dump the final program out after it is compiled and optimised.
500
d9a72fcc
YO
501=item FLAGS
502
503Dump the flags associated with the program
504
505=item TEST
506
507Print output intended for testing the internals of the compile process
508
be8e71aa
YO
509=back
510
511=item Execute related options
512
513=over 4
514
515=item EXECUTE
516
517Turns on all execute related debug options.
518
519=item MATCH
520
521Turns on debugging of the main matching loop.
522
24b23f37 523=item TRIEE
be8e71aa
YO
524
525Extra debugging of how tries execute.
526
527=item INTUIT
528
48fe68f5 529Enable debugging of start-point optimisations.
be8e71aa
YO
530
531=back
532
533=item Extra debugging options
534
535=over 4
536
537=item EXTRA
538
539Turns on all "extra" debugging options.
540
e7707071
YO
541=item BUFFERS
542
c27a5cfe 543Enable debugging the capture group storage during match. Warning,
e7707071
YO
544this can potentially produce extremely large output.
545
24b23f37
YO
546=item TRIEM
547
548Enable enhanced TRIE debugging. Enhances both TRIEE
549and TRIEC.
550
551=item STATE
552
4ee9a43f 553Enable debugging of states in the engine.
24b23f37
YO
554
555=item STACK
be8e71aa 556
24b23f37
YO
557Enable debugging of the recursion stack in the engine. Enabling
558or disabling this option automatically does the same for debugging
559states as well. This output from this can be quite large.
560
d9a72fcc
YO
561=item GPOS
562
563Enable debugging of the \G modifier.
564
24b23f37
YO
565=item OPTIMISEM
566
48fe68f5 567Enable enhanced optimisation debugging and start-point optimisations.
99cc5cc6 568Probably not useful except when debugging the regexp engine itself.
24b23f37
YO
569
570=item OFFSETS
571
572Dump offset information. This can be used to see how regops correlate
573to the pattern. Output format is
574
575 NODENUM:POSITION[LENGTH]
576
577Where 1 is the position of the first char in the string. Note that position
578can be 0, or larger than the actual length of the pattern, likewise length
579can be zero.
be8e71aa 580
24b23f37 581=item OFFSETSDBG
be8e71aa
YO
582
583Enable debugging of offsets information. This emits copious
fe759410 584amounts of trace information and doesn't mesh well with other
be8e71aa
YO
585debug options.
586
fe759410 587Almost definitely only useful to people hacking
be8e71aa
YO
588on the offsets part of the debug engine.
589
d9a72fcc 590
be8e71aa
YO
591=back
592
593=item Other useful flags
594
595These are useful shortcuts to save on the typing.
596
597=over 4
598
599=item ALL
600
48fe68f5
KW
601Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS.
602(To get every single option without exception, use both ALL and EXTRA.)
be8e71aa
YO
603
604=item All
605
fe759410 606Enable DUMP and all execute options. Equivalent to:
be8e71aa
YO
607
608 use re 'debug';
609
610=item MORE
611
612=item More
613
48fe68f5 614Enable the options enabled by "All", plus STATE, TRIEC, and TRIEM.
be8e71aa 615
dba3f186 616=back
be8e71aa 617
dba3f186 618=back
a3621e74 619
1e2e3d02 620As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
48fe68f5 621lexically scoped, as are the other directives. However they have both
1e2e3d02 622compile-time and run-time effects.
b3eb6a9b 623
de8c5301 624=head2 Exportable Functions
b3eb6a9b 625
de8c5301 626As of perl 5.9.5 're' debug contains a number of utility functions that
4ee9a43f 627may be optionally exported into the caller's namespace. They are listed
de8c5301 628below.
b3eb6a9b 629
de8c5301 630=over 4
b3eb6a9b 631
de8c5301 632=item is_regexp($ref)
02ea72ae 633
de8c5301 634Returns true if the argument is a compiled regular expression as returned
4ee9a43f 635by C<qr//>, false if it is not.
02ea72ae 636
4ee9a43f
RGS
637This function will not be confused by overloading or blessing. In
638internals terms, this extracts the regexp pointer out of the
3a5e0888 639PERL_MAGIC_qr structure so it cannot be fooled.
894be9b7 640
de8c5301 641=item regexp_pattern($ref)
02ea72ae 642
4ee9a43f
RGS
643If the argument is a compiled regular expression as returned by C<qr//>,
644then this function returns the pattern.
be8e71aa 645
4ee9a43f
RGS
646In list context it returns a two element list, the first element
647containing the pattern and the second containing the modifiers used when
648the pattern was compiled.
be8e71aa 649
4ee9a43f 650 my ($pat, $mods) = regexp_pattern($ref);
a3621e74 651
99cc5cc6 652In scalar context it returns the same as perl would when stringifying a raw
4ee9a43f
RGS
653C<qr//> with the same pattern inside. If the argument is not a compiled
654reference then this routine returns false but defined in scalar context,
655and the empty list in list context. Thus the following
f9f4320a 656
dff5e0c4 657 if (regexp_pattern($ref) eq '(?^i:foo)')
dba3f186 658
de8c5301 659will be warning free regardless of what $ref actually is.
380e0b81 660
4ee9a43f
RGS
661Like C<is_regexp> this function will not be confused by overloading
662or blessing of the object.
b3eb6a9b 663
256ddcd0
YO
664=item regmust($ref)
665
432acd5f 666If the argument is a compiled regular expression as returned by C<qr//>,
99cc5cc6 667then this function returns what the optimiser considers to be the longest
432acd5f
RGS
668anchored fixed string and longest floating fixed string in the pattern.
669
670A I<fixed string> is defined as being a substring that must appear for the
671pattern to match. An I<anchored fixed string> is a fixed string that must
672appear at a particular offset from the beginning of the match. A I<floating
673fixed string> is defined as a fixed string that can appear at any point in
674a range of positions relative to the start of the match. For example,
675
676 my $qr = qr/here .* there/x;
677 my ($anchored, $floating) = regmust($qr);
256ddcd0 678 print "anchored:'$anchored'\nfloating:'$floating'\n";
432acd5f 679
256ddcd0
YO
680results in
681
682 anchored:'here'
683 floating:'there'
684
432acd5f
RGS
685Because the C<here> is before the C<.*> in the pattern, its position
686can be determined exactly. That's not true, however, for the C<there>;
687it could appear at any point after where the anchored string appeared.
d952710b 688Perl uses both for its optimisations, preferring the longer, or, if they are
256ddcd0
YO
689equal, the floating.
690
691B<NOTE:> This may not necessarily be the definitive longest anchored and
432acd5f 692floating string. This will be what the optimiser of the Perl that you
256ddcd0
YO
693are using thinks is the longest. If you believe that the result is wrong
694please report it via the L<perlbug> utility.
695
28d8d7f4 696=item regname($name,$all)
44a2ac75 697
28d8d7f4
YO
698Returns the contents of a named buffer of the last successful match. If
699$all is true, then returns an array ref containing one entry per buffer,
44a2ac75
YO
700otherwise returns the first defined buffer.
701
28d8d7f4 702=item regnames($all)
44a2ac75 703
28d8d7f4
YO
704Returns a list of all of the named buffers defined in the last successful
705match. If $all is true, then it returns all names defined, if not it returns
706only names which were involved in the match.
44a2ac75 707
28d8d7f4 708=item regnames_count()
44a2ac75 709
28d8d7f4
YO
710Returns the number of distinct names defined in the pattern used
711for the last successful match.
44a2ac75 712
28d8d7f4
YO
713B<Note:> this result is always the actual number of distinct
714named buffers defined, it may not actually match that which is
715returned by C<regnames()> and related routines when those routines
716have not been called with the $all parameter set.
44a2ac75 717
de8c5301 718=back
b3eb6a9b 719
de8c5301 720=head1 SEE ALSO
b3eb6a9b 721
de8c5301
YO
722L<perlmodlib/Pragmatic Modules>.
723
724=cut