This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Do not inject use Config into Dynaloader.pm when PERL_BUILD_EXPAND_CONFIG_VARS
[perl5.git] / ext / re / re.pm
CommitLineData
b3eb6a9b
GS
1package re;
2
99cc5cc6 3# pragma for controlling the regexp engine
de8c5301
YO
4use strict;
5use warnings;
6
1eac213a 7our $VERSION = "0.32";
de8c5301 8our @ISA = qw(Exporter);
ec781434 9our @EXPORT_OK = ('regmust',
192c1e27
JH
10 qw(is_regexp regexp_pattern
11 regname regnames regnames_count));
de8c5301
YO
12our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
13
de8c5301
YO
14my %bitmask = (
15 taint => 0x00100000, # HINT_RE_TAINT
16 eval => 0x00200000, # HINT_RE_EVAL
17);
18
1e215989
FC
19my $flags_hint = 0x02000000; # HINT_RE_FLAGS
20my $PMMOD_SHIFT = 0;
21my %reflags = (
22 m => 1 << ($PMMOD_SHIFT + 0),
23 s => 1 << ($PMMOD_SHIFT + 1),
24 i => 1 << ($PMMOD_SHIFT + 2),
25 x => 1 << ($PMMOD_SHIFT + 3),
41d7c59e
MH
26 n => 1 << ($PMMOD_SHIFT + 5),
27 p => 1 << ($PMMOD_SHIFT + 6),
67cdf558 28 strict => 1 << ($PMMOD_SHIFT + 10),
1e215989 29# special cases:
1e215989 30 d => 0,
a62b1201
KW
31 l => 1,
32 u => 2,
cfaf538b 33 a => 3,
b4ab316d 34 aa => 4,
1e215989
FC
35);
36
de8c5301
YO
37sub setcolor {
38 eval { # Ignore errors
39 require Term::Cap;
40
41 my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning.
42 my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue';
43 my @props = split /,/, $props;
44 my $colors = join "\t", map {$terminal->Tputs($_,1)} @props;
45
46 $colors =~ s/\0//g;
47 $ENV{PERL_RE_COLORS} = $colors;
48 };
49 if ($@) {
50 $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t';
51 }
52
53}
54
55my %flags = (
56 COMPILE => 0x0000FF,
57 PARSE => 0x000001,
58 OPTIMISE => 0x000002,
59 TRIEC => 0x000004,
60 DUMP => 0x000008,
f7819f85 61 FLAGS => 0x000010,
d9a72fcc 62 TEST => 0x000020,
de8c5301
YO
63
64 EXECUTE => 0x00FF00,
65 INTUIT => 0x000100,
66 MATCH => 0x000200,
67 TRIEE => 0x000400,
68
69 EXTRA => 0xFF0000,
70 TRIEM => 0x010000,
71 OFFSETS => 0x020000,
72 OFFSETSDBG => 0x040000,
73 STATE => 0x080000,
74 OPTIMISEM => 0x100000,
75 STACK => 0x280000,
e7707071 76 BUFFERS => 0x400000,
2c296965 77 GPOS => 0x800000,
de8c5301 78);
e7707071 79$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});
de8c5301 80$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
2c296965 81$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE} | $flags{GPOS};
de8c5301
YO
82$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
83$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE};
84$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC};
85
ec781434
NC
86if (defined &DynaLoader::boot_DynaLoader) {
87 require XSLoader;
da4061d3 88 XSLoader::load();
de8c5301 89}
ec781434
NC
90# else we're miniperl
91# We need to work for miniperl, because the XS toolchain uses Text::Wrap, which
92# uses re 'taint'.
de8c5301
YO
93
94sub _load_unload {
95 my ($on)= @_;
96 if ($on) {
ec781434
NC
97 # We call install() every time, as if we didn't, we wouldn't
98 # "see" any changes to the color environment var since
99 # the last time it was called.
100
101 # install() returns an integer, which if casted properly
99cc5cc6 102 # in C resolves to a structure containing the regexp
ec781434
NC
103 # hooks. Setting it to a random integer will guarantee
104 # segfaults.
105 $^H{regcomp} = install();
de8c5301
YO
106 } else {
107 delete $^H{regcomp};
108 }
109}
110
111sub bits {
112 my $on = shift;
113 my $bits = 0;
3ab1d973 114 my $turning_all_off = ! @_ && ! $on;
cc4d09e1 115 my %seen; # Has flag already been seen?
3ab1d973
KW
116 if ($turning_all_off) {
117
118 # Pretend were called with certain parameters, which are best dealt
c9a74c77 119 # with that way.
3ab1d973
KW
120 push @_, keys %bitmask; # taint and eval
121 push @_, 'strict';
122 }
123
124 # Process each subpragma parameter
1e215989 125 ARG:
de8c5301
YO
126 foreach my $idx (0..$#_){
127 my $s=$_[$idx];
128 if ($s eq 'Debug' or $s eq 'Debugcolor') {
129 setcolor() if $s =~/color/i;
130 ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS};
131 for my $idx ($idx+1..$#_) {
132 if ($flags{$_[$idx]}) {
133 if ($on) {
134 ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]};
135 } else {
136 ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]};
137 }
138 } else {
139 require Carp;
140 Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
141 join(", ",sort keys %flags ) );
142 }
143 }
144 _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
145 last;
146 } elsif ($s eq 'debug' or $s eq 'debugcolor') {
147 setcolor() if $s =~/color/i;
148 _load_unload($on);
66e6b4c5 149 last;
de8c5301
YO
150 } elsif (exists $bitmask{$s}) {
151 $bits |= $bitmask{$s};
152 } elsif ($EXPORT_OK{$s}) {
de8c5301
YO
153 require Exporter;
154 re->export_to_level(2, 're', $s);
67cdf558
KW
155 } elsif ($s eq 'strict') {
156 if ($on) {
157 $^H{reflags} |= $reflags{$s};
158 warnings::warnif('experimental::re_strict',
159 "\"use re 'strict'\" is experimental");
160
161 # Turn on warnings if not already done.
162 if (! warnings::enabled('regexp')) {
163 require warnings;
164 warnings->import('regexp');
165 $^H{re_strict} = 1;
166 }
167 }
168 else {
3ab1d973 169 $^H{reflags} &= ~$reflags{$s} if $^H{reflags};
67cdf558
KW
170
171 # Turn off warnings if we turned them on.
172 warnings->unimport('regexp') if $^H{re_strict};
173 }
174 if ($^H{reflags}) {
175 $^H |= $flags_hint;
176 }
177 else {
178 $^H &= ~$flags_hint;
179 }
1e215989
FC
180 } elsif ($s =~ s/^\///) {
181 my $reflags = $^H{reflags} || 0;
6320bfaf 182 my $seen_charset;
342c8524 183 while ($s =~ m/( . )/gx) {
48895a0d 184 local $_ = $1;
cfaf538b 185 if (/[adul]/) {
342c8524
KW
186 # The 'a' may be repeated; hide this from the rest of the
187 # code by counting and getting rid of all of them, then
188 # changing to 'aa' if there is a repeat.
189 if ($_ eq 'a') {
190 my $sav_pos = pos $s;
191 my $a_count = $s =~ s/a//g;
192 pos $s = $sav_pos - 1; # -1 because got rid of the 'a'
193 if ($a_count > 2) {
194 require Carp;
195 Carp::carp(
196 qq 'The "a" flag may only appear a maximum of twice'
197 );
198 }
199 elsif ($a_count == 2) {
200 $_ = 'aa';
201 }
202 }
1e215989 203 if ($on) {
45a507fa 204 if ($seen_charset) {
96ef02be 205 require Carp;
45a507fa
KW
206 if ($seen_charset ne $_) {
207 Carp::carp(
208 qq 'The "$seen_charset" and "$_" flags '
209 .qq 'are exclusive'
210 );
211 }
212 else {
213 Carp::carp(
214 qq 'The "$seen_charset" flag may not appear '
215 .qq 'twice'
216 );
217 }
96ef02be 218 }
6320bfaf
KW
219 $^H{reflags_charset} = $reflags{$_};
220 $seen_charset = $_;
1e215989
FC
221 }
222 else {
6320bfaf 223 delete $^H{reflags_charset}
b10bad5a
KW
224 if defined $^H{reflags_charset}
225 && $^H{reflags_charset} == $reflags{$_};
1e215989
FC
226 }
227 } elsif (exists $reflags{$_}) {
cc4d09e1
KW
228 $seen{$_}++;
229 $on
1e215989
FC
230 ? $reflags |= $reflags{$_}
231 : ($reflags &= ~$reflags{$_});
232 } else {
233 require Carp;
234 Carp::carp(
235 qq'Unknown regular expression flag "$_"'
236 );
237 next ARG;
238 }
239 }
6320bfaf 240 ($^H{reflags} = $reflags or defined $^H{reflags_charset})
b10bad5a
KW
241 ? $^H |= $flags_hint
242 : ($^H &= ~$flags_hint);
de8c5301
YO
243 } else {
244 require Carp;
245 Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
246 join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask),
247 ")");
248 }
249 }
cc4d09e1
KW
250 if (exists $seen{'x'} && $seen{'x'} > 1
251 && (warnings::enabled("deprecated")
252 || warnings::enabled("regexp")))
253 {
254 my $message = "Having more than one /x regexp modifier is deprecated";
255 if (warnings::enabled("deprecated")) {
256 warnings::warn("deprecated", $message);
257 }
258 else {
259 warnings::warn("regexp", $message);
260 }
261 }
3ab1d973
KW
262
263 if ($turning_all_off) {
264 _load_unload(0);
265 $^H{reflags} = 0;
266 $^H{reflags_charset} = 0;
267 $^H &= ~$flags_hint;
268 }
269
de8c5301
YO
270 $bits;
271}
272
273sub import {
274 shift;
275 $^H |= bits(1, @_);
276}
277
278sub unimport {
279 shift;
280 $^H &= ~ bits(0, @_);
281}
282
2831;
284
285__END__
56953603 286
b3eb6a9b
GS
287=head1 NAME
288
289re - Perl pragma to alter regular expression behaviour
290
291=head1 SYNOPSIS
292
e4d48cc9
GS
293 use re 'taint';
294 ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
b3eb6a9b 295
2cd61cdb 296 $pat = '(?{ $foo = 1 })';
e4d48cc9 297 use re 'eval';
48fe68f5
KW
298 /foo${pat}bar/; # won't fail (when not under -T
299 # switch)
e4d48cc9
GS
300
301 {
302 no re 'taint'; # the default
303 ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
304
305 no re 'eval'; # the default
48fe68f5
KW
306 /foo${pat}bar/; # disallowed (with or without -T
307 # switch)
e4d48cc9 308 }
b3eb6a9b 309
67cdf558
KW
310 use re 'strict'; # Raise warnings for more conditions
311
1e215989
FC
312 use re '/ix';
313 "FOO" =~ / foo /; # /ix implied
314 no re '/x';
315 "FOO" =~ /foo/; # just /i implied
316
1e2e3d02 317 use re 'debug'; # output debugging info during
48fe68f5 318 /^(.*)$/s; # compile and run time
1e2e3d02 319
2cd61cdb 320
48fe68f5
KW
321 use re 'debugcolor'; # same as 'debug', but with colored
322 # output
02ea72ae
IZ
323 ...
324
48fe68f5
KW
325 use re qw(Debug All); # Same as "use re 'debug'", but you
326 # can use "Debug" with things other
327 # than 'All'
328 use re qw(Debug More); # 'All' plus output more details
329 no re qw(Debug ALL); # Turn on (almost) all re debugging
330 # in this scope
4ee9a43f 331
de8c5301
YO
332 use re qw(is_regexp regexp_pattern); # import utility functions
333 my ($pat,$mods)=regexp_pattern(qr/foo/i);
334 if (is_regexp($obj)) {
335 print "Got regexp: ",
48fe68f5
KW
336 scalar regexp_pattern($obj); # just as perl would stringify
337 } # it but no hassle with blessed
338 # re's.
a3621e74 339
3ffabb8c
GS
340(We use $^X in these examples because it's tainted by default.)
341
b3eb6a9b
GS
342=head1 DESCRIPTION
343
de8c5301
YO
344=head2 'taint' mode
345
b3eb6a9b 346When C<use re 'taint'> is in effect, and a tainted string is the target
99cc5cc6
A
347of a regexp, the regexp memories (or values returned by the m// operator
348in list context) are tainted. This feature is useful when regexp operations
e4d48cc9
GS
349on tainted data aren't meant to extract safe substrings, but to perform
350other transformations.
b3eb6a9b 351
de8c5301
YO
352=head2 'eval' mode
353
99cc5cc6 354When C<use re 'eval'> is in effect, a regexp is allowed to contain
0b370c0a 355C<(?{ ... })> zero-width assertions and C<(??{ ... })> postponed
e128ab2c
DM
356subexpressions that are derived from variable interpolation, rather than
357appearing literally within the regexp. That is normally disallowed, since
358it is a
2cd61cdb
IZ
359potential security risk. Note that this pragma is ignored when the regular
360expression is obtained from tainted data, i.e. evaluation is always
0b370c0a 361disallowed with tainted regular expressions. See L<perlre/(?{ code })>
bb1773de 362and L<perlre/(??{ code })>.
2cd61cdb 363
ffbc6a93 364For the purpose of this pragma, interpolation of precompiled regular
0a92e3a8
GS
365expressions (i.e., the result of C<qr//>) is I<not> considered variable
366interpolation. Thus:
2cd61cdb
IZ
367
368 /foo${pat}bar/
369
ffbc6a93 370I<is> allowed if $pat is a precompiled regular expression, even
0b370c0a 371if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions.
2cd61cdb 372
67cdf558
KW
373=head2 'strict' mode
374
1eac213a
KW
375Note that this is an experimental feature which may be changed or removed in a
376future Perl release.
377
67cdf558
KW
378When C<use re 'strict'> is in effect, stricter checks are applied than
379otherwise when compiling regular expressions patterns. These may cause more
380warnings to be raised than otherwise, and more things to be fatal instead of
381just warnings. The purpose of this is to find and report at compile time some
382things, which may be legal, but have a reasonable possibility of not being the
383programmer's actual intent. This automatically turns on the C<"regexp">
384warnings category (if not already on) within its scope.
385
1eac213a
KW
386As an example of something that is caught under C<"strict'>, but not
387otherwise, is the pattern
67cdf558
KW
388
389 qr/\xABC/
390
391The C<"\x"> construct without curly braces should be followed by exactly two
392hex digits; this one is followed by three. This currently evaluates as
393equivalent to
394
395 qr/\x{AB}C/
396
397that is, the character whose code point value is C<0xAB>, followed by the
398letter C<C>. But since C<C> is a a hex digit, there is a reasonable chance
399that the intent was
400
401 qr/\x{ABC}/
402
403that is the single character at C<0xABC>. Under C<'strict'> it is an error to
404not follow C<\x> with exactly two hex digits. When not under C<'strict'> a
405warning is generated if there is only one hex digit, and no warning is raised
406if there are more than two.
407
408It is expected that what exactly C<'strict'> does will evolve over time as we
409gain experience with it. This means that programs that compile under it in
410today's Perl may not compile, or may have more or fewer warnings, in future
1eac213a
KW
411Perls. There is no backwards compatibility promises with regards to it. Also
412there are already proposals for an alternate syntax for enabling it. For
413these reasons, using it will raise a C<experimental::re_strict> class warning,
67cdf558
KW
414unless that category is turned off.
415
416Note that if a pattern compiled within C<'strict'> is recompiled, say by
417interpolating into another pattern, outside of C<'strict'>, it is not checked
418again for strictness. This is because if it works under strict it must work
419under non-strict.
420
1e215989
FC
421=head2 '/flags' mode
422
423When C<use re '/flags'> is specified, the given flags are automatically
424added to every regular expression till the end of the lexical scope.
425
426C<no re '/flags'> will turn off the effect of C<use re '/flags'> for the
427given flags.
428
429For example, if you want all your regular expressions to have /msx on by
430default, simply put
431
432 use re '/msx';
433
434at the top of your code.
435
cfaf538b 436The character set /adul flags cancel each other out. So, in this example,
1e215989
FC
437
438 use re "/u";
439 "ss" =~ /\xdf/;
440 use re "/d";
441 "ss" =~ /\xdf/;
442
4d220a7d 443the second C<use re> does an implicit C<no re '/u'>.
1e215989 444
59640339 445Turning on one of the character set flags with C<use re> takes precedence over the
1e215989
FC
446C<locale> pragma and the 'unicode_strings' C<feature>, for regular
447expressions. Turning off one of these flags when it is active reverts to
448the behaviour specified by whatever other pragmata are in scope. For
449example:
450
451 use feature "unicode_strings";
452 no re "/u"; # does nothing
453 use re "/l";
454 no re "/l"; # reverts to unicode_strings behaviour
455
de8c5301
YO
456=head2 'debug' mode
457
ffbc6a93 458When C<use re 'debug'> is in effect, perl emits debugging messages when
2cd61cdb
IZ
459compiling and using regular expressions. The output is the same as that
460obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the
461B<-Dr> switch. It may be quite voluminous depending on the complexity
02ea72ae
IZ
462of the match. Using C<debugcolor> instead of C<debug> enables a
463form of output that can be used to get a colorful display on terminals
464that understand termcap color sequences. Set C<$ENV{PERL_RE_TC}> to a
465comma-separated list of C<termcap> properties to use for highlighting
ffbc6a93 466strings on/off, pre-point part on/off.
57e8c15d 467See L<perldebug/"Debugging Regular Expressions"> for additional info.
2cd61cdb 468
de8c5301
YO
469As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
470lexically scoped, as the other directives are. However they have both
471compile-time and run-time effects.
472
473See L<perlmodlib/Pragmatic Modules>.
474
475=head2 'Debug' mode
476
a3621e74
YO
477Similarly C<use re 'Debug'> produces debugging output, the difference
478being that it allows the fine tuning of what debugging output will be
be8e71aa
YO
479emitted. Options are divided into three groups, those related to
480compilation, those related to execution and those related to special
481purposes. The options are as follows:
482
483=over 4
484
485=item Compile related options
486
487=over 4
488
489=item COMPILE
490
491Turns on all compile related debug options.
492
493=item PARSE
494
495Turns on debug output related to the process of parsing the pattern.
496
497=item OPTIMISE
498
499Enables output related to the optimisation phase of compilation.
500
24b23f37 501=item TRIEC
be8e71aa
YO
502
503Detailed info about trie compilation.
504
505=item DUMP
506
507Dump the final program out after it is compiled and optimised.
508
d9a72fcc
YO
509=item FLAGS
510
511Dump the flags associated with the program
512
513=item TEST
514
515Print output intended for testing the internals of the compile process
516
be8e71aa
YO
517=back
518
519=item Execute related options
520
521=over 4
522
523=item EXECUTE
524
525Turns on all execute related debug options.
526
527=item MATCH
528
529Turns on debugging of the main matching loop.
530
24b23f37 531=item TRIEE
be8e71aa
YO
532
533Extra debugging of how tries execute.
534
535=item INTUIT
536
48fe68f5 537Enable debugging of start-point optimisations.
be8e71aa
YO
538
539=back
540
541=item Extra debugging options
542
543=over 4
544
545=item EXTRA
546
547Turns on all "extra" debugging options.
548
e7707071
YO
549=item BUFFERS
550
c27a5cfe 551Enable debugging the capture group storage during match. Warning,
e7707071
YO
552this can potentially produce extremely large output.
553
24b23f37
YO
554=item TRIEM
555
556Enable enhanced TRIE debugging. Enhances both TRIEE
557and TRIEC.
558
559=item STATE
560
4ee9a43f 561Enable debugging of states in the engine.
24b23f37
YO
562
563=item STACK
be8e71aa 564
24b23f37
YO
565Enable debugging of the recursion stack in the engine. Enabling
566or disabling this option automatically does the same for debugging
567states as well. This output from this can be quite large.
568
d9a72fcc
YO
569=item GPOS
570
571Enable debugging of the \G modifier.
572
24b23f37
YO
573=item OPTIMISEM
574
48fe68f5 575Enable enhanced optimisation debugging and start-point optimisations.
99cc5cc6 576Probably not useful except when debugging the regexp engine itself.
24b23f37
YO
577
578=item OFFSETS
579
580Dump offset information. This can be used to see how regops correlate
581to the pattern. Output format is
582
583 NODENUM:POSITION[LENGTH]
584
585Where 1 is the position of the first char in the string. Note that position
586can be 0, or larger than the actual length of the pattern, likewise length
587can be zero.
be8e71aa 588
24b23f37 589=item OFFSETSDBG
be8e71aa
YO
590
591Enable debugging of offsets information. This emits copious
fe759410 592amounts of trace information and doesn't mesh well with other
be8e71aa
YO
593debug options.
594
fe759410 595Almost definitely only useful to people hacking
be8e71aa
YO
596on the offsets part of the debug engine.
597
d9a72fcc 598
be8e71aa
YO
599=back
600
601=item Other useful flags
602
603These are useful shortcuts to save on the typing.
604
605=over 4
606
607=item ALL
608
48fe68f5
KW
609Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS.
610(To get every single option without exception, use both ALL and EXTRA.)
be8e71aa
YO
611
612=item All
613
fe759410 614Enable DUMP and all execute options. Equivalent to:
be8e71aa
YO
615
616 use re 'debug';
617
618=item MORE
619
620=item More
621
48fe68f5 622Enable the options enabled by "All", plus STATE, TRIEC, and TRIEM.
be8e71aa 623
dba3f186 624=back
be8e71aa 625
dba3f186 626=back
a3621e74 627
1e2e3d02 628As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
48fe68f5 629lexically scoped, as are the other directives. However they have both
1e2e3d02 630compile-time and run-time effects.
b3eb6a9b 631
de8c5301 632=head2 Exportable Functions
b3eb6a9b 633
de8c5301 634As of perl 5.9.5 're' debug contains a number of utility functions that
4ee9a43f 635may be optionally exported into the caller's namespace. They are listed
de8c5301 636below.
b3eb6a9b 637
de8c5301 638=over 4
b3eb6a9b 639
de8c5301 640=item is_regexp($ref)
02ea72ae 641
de8c5301 642Returns true if the argument is a compiled regular expression as returned
4ee9a43f 643by C<qr//>, false if it is not.
02ea72ae 644
4ee9a43f
RGS
645This function will not be confused by overloading or blessing. In
646internals terms, this extracts the regexp pointer out of the
3a5e0888 647PERL_MAGIC_qr structure so it cannot be fooled.
894be9b7 648
de8c5301 649=item regexp_pattern($ref)
02ea72ae 650
4ee9a43f
RGS
651If the argument is a compiled regular expression as returned by C<qr//>,
652then this function returns the pattern.
be8e71aa 653
4ee9a43f
RGS
654In list context it returns a two element list, the first element
655containing the pattern and the second containing the modifiers used when
656the pattern was compiled.
be8e71aa 657
4ee9a43f 658 my ($pat, $mods) = regexp_pattern($ref);
a3621e74 659
99cc5cc6 660In scalar context it returns the same as perl would when stringifying a raw
4ee9a43f
RGS
661C<qr//> with the same pattern inside. If the argument is not a compiled
662reference then this routine returns false but defined in scalar context,
663and the empty list in list context. Thus the following
f9f4320a 664
dff5e0c4 665 if (regexp_pattern($ref) eq '(?^i:foo)')
dba3f186 666
de8c5301 667will be warning free regardless of what $ref actually is.
380e0b81 668
4ee9a43f
RGS
669Like C<is_regexp> this function will not be confused by overloading
670or blessing of the object.
b3eb6a9b 671
256ddcd0
YO
672=item regmust($ref)
673
432acd5f 674If the argument is a compiled regular expression as returned by C<qr//>,
99cc5cc6 675then this function returns what the optimiser considers to be the longest
432acd5f
RGS
676anchored fixed string and longest floating fixed string in the pattern.
677
678A I<fixed string> is defined as being a substring that must appear for the
679pattern to match. An I<anchored fixed string> is a fixed string that must
680appear at a particular offset from the beginning of the match. A I<floating
681fixed string> is defined as a fixed string that can appear at any point in
682a range of positions relative to the start of the match. For example,
683
684 my $qr = qr/here .* there/x;
685 my ($anchored, $floating) = regmust($qr);
256ddcd0 686 print "anchored:'$anchored'\nfloating:'$floating'\n";
432acd5f 687
256ddcd0
YO
688results in
689
690 anchored:'here'
691 floating:'there'
692
432acd5f
RGS
693Because the C<here> is before the C<.*> in the pattern, its position
694can be determined exactly. That's not true, however, for the C<there>;
695it could appear at any point after where the anchored string appeared.
d952710b 696Perl uses both for its optimisations, preferring the longer, or, if they are
256ddcd0
YO
697equal, the floating.
698
699B<NOTE:> This may not necessarily be the definitive longest anchored and
432acd5f 700floating string. This will be what the optimiser of the Perl that you
256ddcd0
YO
701are using thinks is the longest. If you believe that the result is wrong
702please report it via the L<perlbug> utility.
703
28d8d7f4 704=item regname($name,$all)
44a2ac75 705
28d8d7f4
YO
706Returns the contents of a named buffer of the last successful match. If
707$all is true, then returns an array ref containing one entry per buffer,
44a2ac75
YO
708otherwise returns the first defined buffer.
709
28d8d7f4 710=item regnames($all)
44a2ac75 711
28d8d7f4
YO
712Returns a list of all of the named buffers defined in the last successful
713match. If $all is true, then it returns all names defined, if not it returns
714only names which were involved in the match.
44a2ac75 715
28d8d7f4 716=item regnames_count()
44a2ac75 717
28d8d7f4
YO
718Returns the number of distinct names defined in the pattern used
719for the last successful match.
44a2ac75 720
28d8d7f4
YO
721B<Note:> this result is always the actual number of distinct
722named buffers defined, it may not actually match that which is
723returned by C<regnames()> and related routines when those routines
724have not been called with the $all parameter set.
44a2ac75 725
de8c5301 726=back
b3eb6a9b 727
de8c5301 728=head1 SEE ALSO
b3eb6a9b 729
de8c5301
YO
730L<perlmodlib/Pragmatic Modules>.
731
732=cut