minimum length for the F is 1. This is important as the minimum length
is used to determine offsets in front of and behind the string being
looked for. Since strings can be composites this is the length of the
- pattern at the time it was commited with a scan_commit. Note that
+ pattern at the time it was committed with a scan_commit. Note that
the length is calculated by study_chunk, so that the minimum lengths
are not known until the full pattern has been compiled, thus the
pointer to the value.
SV **longest; /* Either &l_fixed, or &l_float. */
SV *longest_fixed; /* longest fixed string found in pattern */
I32 offset_fixed; /* offset where it starts */
- I32 *minlen_fixed; /* pointer to the minlen relevent to the string */
+ I32 *minlen_fixed; /* pointer to the minlen relevant to the string */
I32 lookbehind_fixed; /* is the position of the string modfied by LB */
SV *longest_float; /* longest floating string found in pattern */
I32 offset_float_min; /* earliest point in string it can appear */
I32 offset_float_max; /* latest point in string it can appear */
- I32 *minlen_float; /* pointer to the minlen relevent to the string */
+ I32 *minlen_float; /* pointer to the minlen relevant to the string */
I32 lookbehind_float; /* is the position of the string modified by LB */
I32 flags;
I32 whilem_c;
cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
if (LOC)
cl->flags |= ANYOF_LOCALE;
- cl->flags |= ANYOF_FOLD;
+ cl->flags |= ANYOF_LOC_NONBITMAP_FOLD;
}
/* Can match anything (initialization) */
if (!(ANYOF_CLASS_TEST_ANY_SET(and_with))
&& !(ANYOF_CLASS_TEST_ANY_SET(cl))
&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && !(and_with->flags & ANYOF_FOLD)
- && !(cl->flags & ANYOF_FOLD)) {
+ && !(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) {
int i;
if (and_with->flags & ANYOF_INVERT)
if (!(and_with->flags & ANYOF_EOS))
cl->flags &= ~ANYOF_EOS;
- if (!(and_with->flags & ANYOF_FOLD))
- cl->flags &= ~ANYOF_FOLD;
+ if (!(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD))
+ cl->flags &= ~ANYOF_LOC_NONBITMAP_FOLD;
if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP &&
!(and_with->flags & ANYOF_INVERT)) {
* (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
*/
if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && !(or_with->flags & ANYOF_FOLD)
- && !(cl->flags & ANYOF_FOLD) ) {
+ && !(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD) ) {
int i;
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
} else {
/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && (!(or_with->flags & ANYOF_FOLD)
- || (cl->flags & ANYOF_FOLD)) ) {
+ && (!(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ || (cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) ) {
int i;
/* OR char bitmap and class bitmap separately */
if (or_with->flags & ANYOF_EOS)
cl->flags |= ANYOF_EOS;
- if (or_with->flags & ANYOF_FOLD)
- cl->flags |= ANYOF_FOLD;
+ if (or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ cl->flags |= ANYOF_LOC_NONBITMAP_FOLD;
/* If both nodes match something outside the bitmap, but what they match
* outside is not the same pointer, and hence not easily compared, give up
tables that are used to generate the final compressed
representation which is what dump_trie expects.
- Part of the reason for their existance is to provide a form
+ Part of the reason for their existence is to provide a form
of documentation as to how the different representations function.
*/
Thus EVAL blocks following a trie may be called a different number of times with
and without the optimisation. With the optimisations dupes will be silently
-ignored. This inconsistant behaviour of EVAL type nodes is well established as
+ignored. This inconsistent behaviour of EVAL type nodes is well established as
the following demonstrates:
'words'=~/(word|word|word)(?{ print $1 })[xyz]/
Example of what happens on a structural level:
-The regexp /(ac|ad|ab)+/ will produce the folowing debug output:
+The regexp /(ac|ad|ab)+/ will produce the following debug output:
1: CURLYM[1] {1,32767}(18)
5: BRANCH(8)
middle and the least common are on the outside. IMO this would be better
than a most to least common mapping as theres a decent chance the most
common letter will share a node with the least common, meaning the node
- will not be compressable. With a middle is most common approach the worst
+ will not be compressible. With a middle is most common approach the worst
case is when we have the least common nodes twice.
*/
return stopnow;
}
-/* REx optimizer. Converts nodes into quickier variants "in place".
+/* REx optimizer. Converts nodes into quicker variants "in place".
Finds fixed substrings. */
/* Stops at toplevel WHILEM as well as at "last". At end *scanp is set
SAVEFREEPV(and_withp)
/* this is a chain of data about sub patterns we are processing that
- need to be handled seperately/specially in study_chunk. Its so
+ need to be handled separately/specially in study_chunk. Its so
we can simulate recursion without losing state. */
struct scan_frame;
typedef struct scan_frame {
which would be constructed from a pattern like /A|LIST|OF|WORDS/
- If we can find such a subseqence we need to turn the first
+ If we can find such a subsequence we need to turn the first
element into a trie and then add the subsequent branch exact
strings to the trie.
If x(1..n)==tail then we can do a simple trie, if not we make
a "jump" trie, such that when we match the appropriate word
- we "jump" to the appopriate tail node. Essentailly we turn
+ we "jump" to the appropriate tail node. Essentially we turn
a nested if into a case structure of sorts.
*/
and noper_next is the same as scan (our current
position in the regex) then the EXACT branch is
a possible optimization target. Once we have
- two or more consequetive such branches we can
+ two or more consecutive such branches we can
create a trie of the EXACT's contents and stich
it in place. If the sequence represents all of
the branches we eliminate the whole thing and
int compat = 1;
- /* If compatibile, we or it in below. It is compatible if is
+ /* If compatible, we or it in below. It is compatible if is
* in the bitmp and either 1) its bit or its fold is set, or 2)
* it's for a locale. Even if there isn't unicode semantics
* here, at runtime there may be because of matching against a
if (uc >= 0x100 ||
(!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
&& !ANYOF_BITMAP_TEST(data->start_class, uc)
- && (!(data->start_class->flags & ANYOF_FOLD)
+ && (!(data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD)
|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold_latin1[uc])))
)
compat = 0;
if (compat) {
ANYOF_BITMAP_SET(data->start_class, uc);
data->start_class->flags &= ~ANYOF_EOS;
- data->start_class->flags |= ANYOF_FOLD;
+ data->start_class->flags |= ANYOF_LOC_NONBITMAP_FOLD;
if (OP(scan) == EXACTFL) {
data->start_class->flags |= ANYOF_LOCALE;
}
}
}
else if (flags & SCF_DO_STCLASS_OR) {
- if (data->start_class->flags & ANYOF_FOLD) {
+ if (data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD) {
/* false positive possible if the class is case-folded.
Assume that the locale settings are the same... */
if (uc < 0x100) {
#ifdef DEBUGGING
OP(nxt1 + 1) = OPTIMIZED; /* was count. */
OP(nxt + 1) = OPTIMIZED; /* was count. */
- NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
- NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
+ NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */
+ NEXT_OFF(nxt + 1) = 0; /* just for consistency. */
#endif
#if 0
while ( nxt1 && (OP(nxt1) != WHILEM)) {
if (SIZE_ONLY) {
HE *he_str;
SV *sv_dat = NULL;
- if (!svname) /* shouldnt happen */
+ if (!svname) /* shouldn't happen */
Perl_croak(aTHX_
"panic: reg_scan_name returned NULL");
if (!RExC_paren_names) {
Note: we have to be careful with escapes, as they can be both literal
and special, and in the case of \10 and friends can either, depending
- on context. Specifically there are two seperate switches for handling
+ on context. Specifically there are two separate switches for handling
escape sequences, with the one for handling literal escapes requiring
a dummy entry for all of the special escapes that are actually handled
by the other.
* which we have to wait to see what folding is in effect at runtime, and
* for things not in the bitmap */
if (FOLD && (LOC || ANYOF_FLAGS(ret) & ANYOF_NONBITMAP)) {
- ANYOF_FLAGS(ret) |= ANYOF_FOLD;
+ ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
}
/* Optimize inverted simple patterns (e.g. [^a-z]). Note that this doesn't
/* A locale node with one point can be folded; all the other cases
* with folding will have two points, since we calculate them above
*/
- if (ANYOF_FLAGS(ret) & ANYOF_FOLD) {
+ if (ANYOF_FLAGS(ret) & ANYOF_LOC_NONBITMAP_FOLD) {
op = EXACTFL;
}
else {
- Look for optimizable sequences at the same time.
- currently only looks for EXACT chains.
-This is expermental code. The idea is to use this routine to perform
+This is experimental code. The idea is to use this routine to perform
in place optimizations on branches and groups as they are constructed,
with the long term intention of removing optimization from study_chunk so
that it is purely analytical.
if (flags & ANYOF_LOCALE)
sv_catpvs(sv, "{loc}");
- if (flags & ANYOF_FOLD)
+ if (flags & ANYOF_LOC_NONBITMAP_FOLD)
sv_catpvs(sv, "{i}");
Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
if (flags & ANYOF_INVERT)
The solution is to make a lightweight copy of the regexp structure
when a qr// is returned from the code executed by (??{$qr}) this
- lightweight copy doesnt actually own any of its data except for
+ lightweight copy doesn't actually own any of its data except for
the starp/end and the actual regexp structure itself.
*/