use strict;
use warnings;
-our $VERSION = "0.23";
+our $VERSION = "0.37";
our @ISA = qw(Exporter);
our @EXPORT_OK = ('regmust',
qw(is_regexp regexp_pattern
s => 1 << ($PMMOD_SHIFT + 1),
i => 1 << ($PMMOD_SHIFT + 2),
x => 1 << ($PMMOD_SHIFT + 3),
- p => 1 << ($PMMOD_SHIFT + 4),
+ xx => 1 << ($PMMOD_SHIFT + 4),
+ n => 1 << ($PMMOD_SHIFT + 5),
+ p => 1 << ($PMMOD_SHIFT + 6),
+ strict => 1 << ($PMMOD_SHIFT + 10),
# special cases:
d => 0,
l => 1,
TRIEC => 0x000004,
DUMP => 0x000008,
FLAGS => 0x000010,
+ TEST => 0x000020,
EXECUTE => 0x00FF00,
INTUIT => 0x000100,
sub bits {
my $on = shift;
my $bits = 0;
+ my $turning_all_off = ! @_ && ! $on;
+ if ($turning_all_off) {
+
+ # Pretend were called with certain parameters, which are best dealt
+ # with that way.
+ push @_, keys %bitmask; # taint and eval
+ push @_, 'strict';
+ }
+
+ # Process each subpragma parameter
ARG:
foreach my $idx (0..$#_){
my $s=$_[$idx];
} elsif ($EXPORT_OK{$s}) {
require Exporter;
re->export_to_level(2, 're', $s);
+ } elsif ($s eq 'strict') {
+ if ($on) {
+ $^H{reflags} |= $reflags{$s};
+ warnings::warnif('experimental::re_strict',
+ "\"use re 'strict'\" is experimental");
+
+ # Turn on warnings if not already done.
+ if (! warnings::enabled('regexp')) {
+ require warnings;
+ warnings->import('regexp');
+ $^H{re_strict} = 1;
+ }
+ }
+ else {
+ $^H{reflags} &= ~$reflags{$s} if $^H{reflags};
+
+ # Turn off warnings if we turned them on.
+ warnings->unimport('regexp') if $^H{re_strict};
+ }
+ if ($^H{reflags}) {
+ $^H |= $flags_hint;
+ }
+ else {
+ $^H &= ~$flags_hint;
+ }
} elsif ($s =~ s/^\///) {
my $reflags = $^H{reflags} || 0;
my $seen_charset;
+ my $x_count = 0;
while ($s =~ m/( . )/gx) {
local $_ = $1;
if (/[adul]/) {
}
else {
delete $^H{reflags_charset}
- if defined $^H{reflags_charset}
- && $^H{reflags_charset} == $reflags{$_};
+ if defined $^H{reflags_charset}
+ && $^H{reflags_charset} == $reflags{$_};
}
} elsif (exists $reflags{$_}) {
- $on
+ if ($_ eq 'x') {
+ $x_count++;
+ if ($x_count > 2) {
+ require Carp;
+ Carp::carp(
+ qq 'The "x" flag may only appear a maximum of twice'
+ );
+ }
+ elsif ($x_count == 2) {
+ $_ = 'xx'; # First time through got the /x
+ }
+ }
+
+ $on
? $reflags |= $reflags{$_}
: ($reflags &= ~$reflags{$_});
} else {
}
}
($^H{reflags} = $reflags or defined $^H{reflags_charset})
- ? $^H |= $flags_hint
- : ($^H &= ~$flags_hint);
+ ? $^H |= $flags_hint
+ : ($^H &= ~$flags_hint);
} else {
require Carp;
Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
")");
}
}
+
+ if ($turning_all_off) {
+ _load_unload(0);
+ $^H{reflags} = 0;
+ $^H{reflags_charset} = 0;
+ $^H &= ~$flags_hint;
+ }
+
$bits;
}
# switch)
}
+ use re 'strict'; # Raise warnings for more conditions
+
use re '/ix';
"FOO" =~ / foo /; # /ix implied
no re '/x';
use re qw(is_regexp regexp_pattern); # import utility functions
my ($pat,$mods)=regexp_pattern(qr/foo/i);
- if (is_regexp($obj)) {
+ if (is_regexp($obj)) {
print "Got regexp: ",
scalar regexp_pattern($obj); # just as perl would stringify
} # it but no hassle with blessed
I<is> allowed if $pat is a precompiled regular expression, even
if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions.
+=head2 'strict' mode
+
+Note that this is an experimental feature which may be changed or removed in a
+future Perl release.
+
+When C<use re 'strict'> is in effect, stricter checks are applied than
+otherwise when compiling regular expressions patterns. These may cause more
+warnings to be raised than otherwise, and more things to be fatal instead of
+just warnings. The purpose of this is to find and report at compile time some
+things, which may be legal, but have a reasonable possibility of not being the
+programmer's actual intent. This automatically turns on the C<"regexp">
+warnings category (if not already on) within its scope.
+
+As an example of something that is caught under C<"strict'>, but not
+otherwise, is the pattern
+
+ qr/\xABC/
+
+The C<"\x"> construct without curly braces should be followed by exactly two
+hex digits; this one is followed by three. This currently evaluates as
+equivalent to
+
+ qr/\x{AB}C/
+
+that is, the character whose code point value is C<0xAB>, followed by the
+letter C<C>. But since C<C> is a a hex digit, there is a reasonable chance
+that the intent was
+
+ qr/\x{ABC}/
+
+that is the single character at C<0xABC>. Under C<'strict'> it is an error to
+not follow C<\x> with exactly two hex digits. When not under C<'strict'> a
+warning is generated if there is only one hex digit, and no warning is raised
+if there are more than two.
+
+It is expected that what exactly C<'strict'> does will evolve over time as we
+gain experience with it. This means that programs that compile under it in
+today's Perl may not compile, or may have more or fewer warnings, in future
+Perls. There is no backwards compatibility promises with regards to it. Also
+there are already proposals for an alternate syntax for enabling it. For
+these reasons, using it will raise a C<experimental::re_strict> class warning,
+unless that category is turned off.
+
+Note that if a pattern compiled within C<'strict'> is recompiled, say by
+interpolating into another pattern, outside of C<'strict'>, it is not checked
+again for strictness. This is because if it works under strict it must work
+under non-strict.
+
=head2 '/flags' mode
-When C<use re '/flags'> is specified, the given flags are automatically
+When C<use re '/I<flags>'> is specified, the given I<flags> are automatically
added to every regular expression till the end of the lexical scope.
-
-C<no re '/flags'> will turn off the effect of C<use re '/flags'> for the
+I<flags> can be any combination of
+C<'a'>,
+C<'aa'>,
+C<'d'>,
+C<'i'>,
+C<'l'>,
+C<'m'>,
+C<'n'>,
+C<'p'>,
+C<'s'>,
+C<'u'>,
+C<'x'>,
+and/or
+C<'xx'>.
+
+C<no re '/I<flags>'> will turn off the effect of C<use re '/I<flags>'> for the
given flags.
-For example, if you want all your regular expressions to have /msx on by
+For example, if you want all your regular expressions to have /msxx on by
default, simply put
- use re '/msx';
+ use re '/msxx';
at the top of your code.
-The character set /adul flags cancel each other out. So, in this example,
+The character set C</adul> flags cancel each other out. So, in this example,
use re "/u";
"ss" =~ /\xdf/;
the second C<use re> does an implicit C<no re '/u'>.
+Similarly,
+
+ use re "/xx"; # Doubled-x
+ ...
+ use re "/x"; # Single x from here on
+ ...
+
Turning on one of the character set flags with C<use re> takes precedence over the
C<locale> pragma and the 'unicode_strings' C<feature>, for regular
expressions. Turning off one of these flags when it is active reverts to
See L<perldebug/"Debugging Regular Expressions"> for additional info.
As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
-lexically scoped, as the other directives are. However they have both
+lexically scoped, as the other directives are. However they have both
compile-time and run-time effects.
See L<perlmodlib/Pragmatic Modules>.
Dump the final program out after it is compiled and optimised.
+=item FLAGS
+
+Dump the flags associated with the program
+
+=item TEST
+
+Print output intended for testing the internals of the compile process
+
=back
=item Execute related options
or disabling this option automatically does the same for debugging
states as well. This output from this can be quite large.
+=item GPOS
+
+Enable debugging of the \G modifier.
+
=item OPTIMISEM
Enable enhanced optimisation debugging and start-point optimisations.
Almost definitely only useful to people hacking
on the offsets part of the debug engine.
+
=back
=item Other useful flags
=item ALL
Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS.
-(To get every single option without exception, use both ALL and EXTRA.)
+(To get every single option without exception, use both ALL and EXTRA, or
+starting in 5.30 on a C<-DDEBUGGING>-enabled perl interpreter, use
+the B<-Drv> command-line switches.)
=item All
Because the C<here> is before the C<.*> in the pattern, its position
can be determined exactly. That's not true, however, for the C<there>;
it could appear at any point after where the anchored string appeared.
-Perl uses both for its optimisations, prefering the longer, or, if they are
+Perl uses both for its optimisations, preferring the longer, or, if they are
equal, the floating.
B<NOTE:> This may not necessarily be the definitive longest anchored and