*/
#ifdef PERL_EXT_RE_BUILD
-/* need to replace pregcomp et al, so enable that */
-# ifndef PERL_IN_XSUB_RE
-# define PERL_IN_XSUB_RE
-# endif
-/* need access to debugger hooks */
-# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
-# define DEBUGGING
-# endif
-#endif
-
-#ifdef PERL_IN_XSUB_RE
-/* We *really* need to overwrite these symbols: */
-# define Perl_regexec_flags my_regexec
-# define Perl_regdump my_regdump
-# define Perl_regprop my_regprop
-# define Perl_re_intuit_start my_re_intuit_start
-/* *These* symbols are masked to allow static link. */
-# define Perl_pregexec my_pregexec
-# define Perl_reginitcolors my_reginitcolors
-# define Perl_regclass_swash my_regclass_swash
-
-# define PERL_NO_GET_CONTEXT
+#include "re_top.h"
#endif
/*
#define PERL_IN_REGEXEC_C
#include "perl.h"
-#include "regcomp.h"
+#ifdef PERL_IN_XSUB_RE
+# include "re_comp.h"
+#else
+# include "regcomp.h"
+#endif
#define RF_tainted 1 /* tainted information used? */
#define RF_warned 2 /* warned about big count? */
#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
-#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
+#define HOPc(pos,off) \
+ (char *)(PL_reg_match_utf8 \
? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
- : (U8*)(pos + off)))
-#define HOPBACKc(pos, off) ((char*) \
- ((PL_reg_match_utf8) \
- ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
- : (pos - off >= PL_bostr) \
- ? (U8*)(pos - off) \
- : (U8*)NULL) \
-)
+ : (U8*)(pos + off))
+#define HOPBACKc(pos, off) \
+ (char*)(PL_reg_match_utf8\
+ ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
+ : (pos - off >= PL_bostr) \
+ ? (U8*)pos - off \
+ : NULL)
-#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
+/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
+
/* for use after a quantifier and before an EXACT-like node -- japhy */
#define JUMPABLE(rn) ( \
OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
OP(rn) == PLUS || OP(rn) == MINMOD || \
- (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
+ (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
)
#define HAS_TEXT(rn) ( \
- PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
+ PL_regkind[OP(rn)] == EXACT || PL_regkind[OP(rn)] == REF \
)
/*
follow but for lookbehind (rn->flags != 0) we skip to the next step.
*/
#define FIND_NEXT_IMPT(rn) STMT_START { \
- while (JUMPABLE(rn)) \
- if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
+ while (JUMPABLE(rn)) { \
+ const OPCODE type = OP(rn); \
+ if (type == SUSPEND || PL_regkind[type] == CURLY) \
rn = NEXTOPER(NEXTOPER(rn)); \
- else if (OP(rn) == PLUS) \
+ else if (type == PLUS) \
rn = NEXTOPER(rn); \
- else if (OP(rn) == IFMATCH) \
+ else if (type == IFMATCH) \
rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
else rn += NEXT_OFF(rn); \
+ } \
} STMT_END
static void restore_pos(pTHX_ void *arg);
#define REGCP_PAREN_ELEMS 4
const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
int p;
+ GET_RE_DEBUG_FLAGS_DECL;
if (paren_elems_to_push < 0)
Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
SSPUSHINT(PL_regstartp[p]);
SSPUSHPTR(PL_reg_start_tmp[p]);
SSPUSHINT(p);
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
+ (UV)p, (IV)PL_regstartp[p],
+ (IV)(PL_reg_start_tmp[p] - PL_bostr),
+ (IV)PL_regendp[p]
+ ));
}
/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
SSPUSHINT(PL_regsize);
);
}
DEBUG_EXECUTE_r(
- if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
+ if (*PL_reglastparen + 1 <= rex->nparens) {
PerlIO_printf(Perl_debug_log,
" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
(IV)(*PL_reglastparen + 1), (IV)rex->nparens);
* building DynaLoader will fail:
* "Error: '*' not in typemap in DynaLoader.xs, line 164"
* --jhi */
- for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
+ for (i = *PL_reglastparen + 1; (U32)i <= rex->nparens; i++) {
if (i > PL_regsize)
PL_regstartp[i] = -1;
PL_regendp[i] = -1;
#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
-#define TRYPAREN(paren, n, input, where) { \
- if (paren) { \
- if (n) { \
- PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
- PL_regendp[paren] = input - PL_bostr; \
- } \
- else \
- PL_regendp[paren] = -1; \
- } \
- REGMATCH(next, where); \
- if (result) \
- sayYES; \
- if (paren && n) \
- PL_regendp[paren] = -1; \
-}
-
-
/*
* pregexec and friends
*/
+#ifndef PERL_IN_XSUB_RE
/*
- pregexec - match a regexp against a string
*/
regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
nosave ? 0 : REXEC_COPY_STR);
}
-
+#endif
/*
* Need to implement the following flags for reg_anch:
sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
strpos;
const int len = PL_reg_match_utf8 ?
- strlen(s) : strend - strpos;
+ (int)strlen(s) : strend - strpos;
if (!PL_colorset)
reginitcolors();
if (PL_reg_match_utf8)
{
char * const last = HOP3c(s, -start_shift, strbeg);
char *last1, *last2;
- char *s1 = s;
+ char * const saved_s = s;
SV* must;
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
&& (!do_utf8
- || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
+ || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
&& t > strpos)))
- /* EMPTY */;
+ NOOP;
else
t = strpos;
t = HOP3c(t, prog->anchored_offset, strend);
}
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
- (long)(HOP3c(s1, 1, strend) - i_strpos)));
+ (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
other_last = HOP3c(last1, prog->anchored_offset+1, strend);
s = HOP3c(last, 1, strend);
goto restart;
(long)(s - i_strpos)));
t = HOP3c(s, -prog->anchored_offset, strbeg);
other_last = HOP3c(s, 1, strend);
- s = s1;
+ s = saved_s;
if (t == strpos)
goto try_at_start;
goto try_at_offset;
}
else { /* Take into account the floating substring. */
char *last, *last1;
- char *s1 = s;
+ char * const saved_s = s;
SV* must;
t = HOP3c(s, -start_shift, strbeg);
}
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
", trying anchored starting at offset %ld...\n",
- (long)(s1 + 1 - i_strpos)));
+ (long)(saved_s + 1 - i_strpos)));
other_last = last;
s = HOP3c(t, 1, strend);
goto restart;
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
other_last = s; /* Fix this later. --Hugo */
- s = s1;
+ s = saved_s;
if (t == strpos)
goto try_at_start;
goto try_at_offset;
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
&& (!do_utf8
- || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
+ || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*)strpos))
&& t > strpos))) {
/* Fixed substring is found far enough so that the match
cannot start at strpos. */
/* Last resort... */
/* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
- if (prog->regstclass) {
+ if (prog->regstclass && OP(prog->regstclass)!=TRIE) {
/* minlen == 0 is possible if regstclass is \b or \B,
and the fixed substr is ''$.
Since minlen is already taken into account, s+1 is before strend;
/* If regstclass takes bytelength more than 1: If charlength==1, OK.
This leaves EXACTF only, which is dealt with in find_byclass(). */
const U8* const str = (U8*)STRING(prog->regstclass);
- const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
+ const int cl_l = (PL_regkind[OP(prog->regstclass)] == EXACT
? CHR_DIST(str+STR_LEN(prog->regstclass), str)
: 1);
- const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
+ const char * endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
: (prog->float_substr || prog->float_utf8
? HOP3c(HOP3c(check_at, -start_shift, strbeg),
cl_l, strend)
: strend);
-
+ /*if (OP(prog->regstclass) == TRIE)
+ endpos++;*/
t = s;
- s = find_byclass(prog, prog->regstclass, s, endpos, 1);
+ s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
if (!s) {
#ifdef DEBUGGING
const char *what = NULL;
}
/* We know what class REx starts with. Try to find this position... */
+/* if reginfo is NULL, its a dryrun */
+/* annoyingly all the vars in this routine have different names from their counterparts
+ in regmatch. /grrr */
+
STATIC char *
-S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, I32 norun)
+S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
+ const char *strend, const regmatch_info *reginfo)
{
dVAR;
const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
!UTF8_IS_INVARIANT((U8)s[0]) ?
reginclass(prog, c, (U8*)s, 0, do_utf8) :
REGINCLASS(prog, c, (U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
/* The assignment of 2 is intentional:
* for the folded sharp s, the skip is 2. */
(skip = SHARP_S_SKIP))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
break;
case CANY:
while (s < strend) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
do_exactf:
e = HOP3c(strend, -((I32)lnc), s);
- if (norun && e < s)
+ if (!reginfo && e < s)
e = s; /* Due to minlen logic of intuit() */
/* The idea in the EXACTF* cases is to first find the
uniflags);
if ( c == c1
&& (ln == len ||
- ibcmp_utf8(s, (char **)0, 0, do_utf8,
- m, (char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ ibcmp_utf8(s, NULL, 0, do_utf8,
+ m, NULL, ln, (bool)UTF))
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
else {
U8 foldbuf[UTF8_MAXBYTES_CASE+1];
&& (f == c1 || f == c2)
&& (ln == foldlen ||
!ibcmp_utf8((char *) foldbuf,
- (char **)0, foldlen, do_utf8,
+ NULL, foldlen, do_utf8,
m,
- (char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ NULL, ln, (bool)UTF))
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
}
s += len;
if ( (c == c1 || c == c2)
&& (ln == len ||
- ibcmp_utf8(s, (char **)0, 0, do_utf8,
- m, (char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ ibcmp_utf8(s, NULL, 0, do_utf8,
+ m, NULL, ln, (bool)UTF))
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
else {
U8 foldbuf[UTF8_MAXBYTES_CASE+1];
&& (f == c1 || f == c2)
&& (ln == foldlen ||
!ibcmp_utf8((char *) foldbuf,
- (char **)0, foldlen, do_utf8,
+ NULL, foldlen, do_utf8,
m,
- (char **)0, ln, (bool)UTF))
- && (norun || regtry(prog, s)) )
+ NULL, ln, (bool)UTF))
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
}
s += len;
&& (ln == 1 || !(OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
s++;
}
&& (ln == 1 || !(OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && (norun || regtry(prog, s)) )
+ && (!reginfo || regtry(reginfo, s)) )
goto got_it;
s++;
}
LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == BOUND ?
- swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
+ (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
- if ((norun || regtry(prog, s)))
+ if ((!reginfo || regtry(reginfo, s)))
goto got_it;
}
s += uskip;
if (tmp ==
!(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
tmp = !tmp;
- if ((norun || regtry(prog, s)))
+ if ((!reginfo || regtry(reginfo, s)))
goto got_it;
}
s++;
}
}
- if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
+ if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
break;
case NBOUNDL:
LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (tmp == !(OP(c) == NBOUND ?
- swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
+ (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
- else if ((norun || regtry(prog, s)))
+ else if ((!reginfo || regtry(reginfo, s)))
goto got_it;
s += uskip;
}
if (tmp ==
!(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
tmp = !tmp;
- else if ((norun || regtry(prog, s)))
+ else if ((!reginfo || regtry(reginfo, s)))
goto got_it;
s++;
}
}
- if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
+ if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
goto got_it;
break;
case ALNUM:
LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isALNUM(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isALNUM_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_ALNUM();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isALNUM(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isALNUM_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isALNUM_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_SPACE();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isSPACE(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isSPACE_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_SPACE();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isSPACE(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isSPACE_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_DIGIT();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isDIGIT(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (isDIGIT_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
LOAD_UTF8_CHARCLASS_DIGIT();
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isDIGIT(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
if (do_utf8) {
while (s + (uskip = UTF8SKIP(s)) <= strend) {
if (!isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
else {
while (s < strend) {
if (!isDIGIT_LC(*s)) {
- if (tmp && (norun || regtry(prog, s)))
+ if (tmp && (!reginfo || regtry(reginfo, s)))
goto got_it;
else
tmp = doevery;
}
}
break;
+ case TRIE:
+ /*Perl_croak(aTHX_ "panic: unknown regstclass TRIE");*/
+ {
+ const enum { trie_plain, trie_utf8, trie_utf8_fold }
+ trie_type = do_utf8 ?
+ (c->flags == EXACT ? trie_utf8 : trie_utf8_fold)
+ : trie_plain;
+ /* what trie are we using right now */
+ reg_ac_data *aho
+ = (reg_ac_data*)prog->data->data[ ARG( c ) ];
+ reg_trie_data *trie=aho->trie;
+
+ const char *last_start = strend - trie->minlen;
+ const char *real_start = s;
+ STRLEN maxlen = trie->maxlen;
+ SV *sv_points;
+ U8 **points; /* map of where we were in the input string
+ when reading a given string. For ASCII this
+ is unnecessary overhead as the relationship
+ is always 1:1, but for unicode, especially
+ case folded unicode this is not true. */
+ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
+
+ GET_RE_DEBUG_FLAGS_DECL;
+
+ /* We can't just allocate points here. We need to wrap it in
+ * an SV so it gets freed properly if there is a croak while
+ * running the match */
+ ENTER;
+ SAVETMPS;
+ sv_points=newSV(maxlen * sizeof(U8 *));
+ SvCUR_set(sv_points,
+ maxlen * sizeof(U8 *));
+ SvPOK_on(sv_points);
+ sv_2mortal(sv_points);
+ points=(U8**)SvPV_nolen(sv_points );
+
+ if (trie->bitmap && trie_type != trie_utf8_fold) {
+ while (s <= last_start && !TRIE_BITMAP_TEST(trie,*s) ) {
+ s++;
+ }
+ }
+
+ while (s <= last_start) {
+ const U32 uniflags = UTF8_ALLOW_DEFAULT;
+ U8 *uc = (U8*)s;
+ U16 charid = 0;
+ U32 base = 1;
+ U32 state = 1;
+ UV uvc = 0;
+ STRLEN len = 0;
+ STRLEN foldlen = 0;
+ U8 *uscan = (U8*)NULL;
+ U8 *leftmost = NULL;
+
+ U32 pointpos = 0;
+
+ while ( state && uc <= (U8*)strend ) {
+ int failed=0;
+ if (aho->states[ state ].wordnum) {
+ U8 *lpos= points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1] ) % maxlen ];
+ if (!leftmost || lpos < leftmost)
+ leftmost= lpos;
+ if (base==0) break;
+ }
+ points[pointpos++ % maxlen]= uc;
+ switch (trie_type) {
+ case trie_utf8_fold:
+ if ( foldlen>0 ) {
+ uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
+ foldlen -= len;
+ uscan += len;
+ len=0;
+ } else {
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
+ uvc = to_uni_fold( uvc, foldbuf, &foldlen );
+ foldlen -= UNISKIP( uvc );
+ uscan = foldbuf + UNISKIP( uvc );
+ }
+ break;
+ case trie_utf8:
+ uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
+ &len, uniflags );
+ break;
+ case trie_plain:
+ uvc = (UV)*uc;
+ len = 1;
+ }
+
+ if (uvc < 256) {
+ charid = trie->charmap[ uvc ];
+ }
+ else {
+ charid = 0;
+ if (trie->widecharmap) {
+ SV** const svpp = hv_fetch(trie->widecharmap,
+ (char*)&uvc, sizeof(UV), 0);
+ if (svpp)
+ charid = (U16)SvIV(*svpp);
+ }
+ }
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "Pos: %d Charid:%3x CV:%4"UVxf" ",
+ (int)((const char*)uc - real_start), charid, uvc)
+ );
+ uc += len;
+
+ do {
+ U32 word = aho->states[ state ].wordnum;
+ base = aho->states[ state ].trans.base;
+
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%sState: %4"UVxf", Base: 0x%-4"UVxf" uvc=%"UVxf" word=%"UVxf"\n",
+ failed ? "Fail transition to " : "",
+ state, base, uvc, word)
+ );
+ if ( base ) {
+ U32 tmp;
+ if (charid &&
+ (base + charid > trie->uniquecharcount )
+ && (base + charid - 1 - trie->uniquecharcount
+ < trie->lasttrans)
+ && trie->trans[base + charid - 1 -
+ trie->uniquecharcount].check == state
+ && (tmp=trie->trans[base + charid - 1 -
+ trie->uniquecharcount ].next))
+ {
+ state = tmp;
+ break;
+ }
+ else {
+ failed++;
+ if ( state == 1 )
+ break;
+ else
+ state = aho->fail[state];
+ }
+ }
+ else {
+ /* we must be accepting here */
+ failed++;
+ break;
+ }
+ } while(state);
+ if (failed) {
+ if (leftmost)
+ break;
+ else if (!charid && trie->bitmap && trie_type != trie_utf8_fold) {
+ while ( uc <= (U8*)last_start && !TRIE_BITMAP_TEST(trie,*uc) ) {
+ uc++;
+ }
+ }
+ }
+ }
+ if ( aho->states[ state ].wordnum ) {
+ U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
+ if (!leftmost || lpos < leftmost)
+ leftmost = lpos;
+ }
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%sState: %4"UVxf", Base: 0x%-4"UVxf" uvc=%"UVxf"\n",
+ "All done: ",
+ state, base, uvc)
+ );
+ if (leftmost) {
+ s = (char*)leftmost;
+ if (!reginfo || regtry(reginfo, s)) {
+ FREETMPS;
+ LEAVE;
+ goto got_it;
+ }
+ s = HOPc(s,1);
+ } else {
+ break;
+ }
+ }
+ FREETMPS;
+ LEAVE;
+ }
+ break;
default:
Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
break;
I32 end_shift = 0; /* Same for the end. */ /* CC */
I32 scream_pos = -1; /* Internal iterator of scream. */
char *scream_olds = NULL;
- SV* oreplsv = GvSV(PL_replgv);
+ SV* const oreplsv = GvSV(PL_replgv);
const bool do_utf8 = DO_UTF8(sv);
I32 multiline;
#ifdef DEBUGGING
SV* dsv0;
SV* dsv1;
#endif
+ regmatch_info reginfo; /* create some info to pass to regtry etc */
GET_RE_DEBUG_FLAGS_DECL;
}
multiline = prog->reganch & PMf_MULTILINE;
+ reginfo.prog = prog;
#ifdef DEBUGGING
dsv0 = PERL_DEBUG_PAD_ZERO(0);
dsv1 = PERL_DEBUG_PAD_ZERO(1);
#endif
-#ifdef DEBUGGING
- PL_regnarrate = DEBUG_r_TEST;
-#endif
-
RX_MATCH_UTF8_set(prog, do_utf8);
minlen = prog->minlen;
PL_reg_flags |= RF_utf8;
/* Mark beginning of line for ^ and lookbehind. */
- PL_regbol = startpos;
+ reginfo.bol = startpos; /* XXX not used ??? */
PL_bostr = strbeg;
- PL_reg_sv = sv;
+ reginfo.sv = sv;
/* Mark end of line for $ (and such) */
PL_regeol = strend;
/* see how far we have to get to not match where we matched before */
- PL_regtill = startpos+minend;
+ reginfo.till = startpos+minend;
/* If there is a "must appear" string, look for it. */
s = startpos;
- if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
+ if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
MAGIC *mg;
if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
- PL_reg_ganch = startpos;
+ reginfo.ganch = startpos;
else if (sv && SvTYPE(sv) >= SVt_PVMG
&& SvMAGIC(sv)
&& (mg = mg_find(sv, PERL_MAGIC_regex_global))
&& mg->mg_len >= 0) {
- PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
+ reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
if (prog->reganch & ROPT_ANCH_GPOS) {
- if (s > PL_reg_ganch)
+ if (s > reginfo.ganch)
goto phooey;
- s = PL_reg_ganch;
+ s = reginfo.ganch;
}
}
else /* pos() not defined */
- PL_reg_ganch = strbeg;
+ reginfo.ganch = strbeg;
}
if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
UNI_DISPLAY_REGEX)
: prog->precomp;
- const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
+ const int len0 = UTF ? (int)SvCUR(dsv0) : prog->prelen;
const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
UNI_DISPLAY_REGEX) : startpos;
- const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
+ const int len1 = do_utf8 ? (int)SvCUR(dsv1) : strend - startpos;
if (!PL_colorset)
reginitcolors();
PerlIO_printf(Perl_debug_log,
/* Simplest case: anchored match need be tried only once. */
/* [unless only anchor is BOL and multiline is set] */
if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
- if (s == startpos && regtry(prog, startpos))
+ if (s == startpos && regtry(®info, startpos))
goto got_it;
else if (multiline || (prog->reganch & ROPT_IMPLICIT)
|| (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
if (s == startpos)
goto after_try;
while (1) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
after_try:
if (s >= end)
s--;
while (s < end) {
if (*s++ == '\n') { /* don't need PL_utf8skip here */
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
}
}
}
goto phooey;
} else if (prog->reganch & ROPT_ANCH_GPOS) {
- if (regtry(prog, PL_reg_ganch))
+ if (regtry(®info, reginfo.ganch))
goto got_it;
goto phooey;
}
while (s < strend) {
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
- if (regtry(prog, s)) goto got_it;
+ if (regtry(®info, s)) goto got_it;
s += UTF8SKIP(s);
while (s < strend && *s == ch)
s += UTF8SKIP(s);
while (s < strend) {
if (*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
- if (regtry(prog, s)) goto got_it;
+ if (regtry(®info, s)) goto got_it;
s++;
while (s < strend && *s == ch)
s++;
}
if (do_utf8) {
while (s <= last1) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
s += UTF8SKIP(s);
}
}
else {
while (s <= last1) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
s++;
}
}
else if ((c = prog->regstclass)) {
if (minlen) {
- I32 op = (U8)OP(prog->regstclass);
+ const OPCODE op = OP(prog->regstclass);
/* don't bother with what can't match */
- if (PL_regkind[op] != EXACT && op != CANY)
+ if (PL_regkind[op] != EXACT && op != CANY && op != TRIE)
strend = HOPc(strend, -(minlen - 1));
}
DEBUG_EXECUTE_r({
- SV *prop = sv_newmortal();
+ SV * const prop = sv_newmortal();
const char *s0;
const char *s1;
int len0;
len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
s1 = UTF ?
sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
- len1 = UTF ? SvCUR(dsv1) : strend - s;
+ len1 = UTF ? (int)SvCUR(dsv1) : strend - s;
PerlIO_printf(Perl_debug_log,
- "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
+ "Matching stclass \"%*.*s\" against \"%*.*s\" (%d chars)\n",
len0, len0, s0,
- len1, len1, s1);
+ len1, len1, s1, (int)(strend - s));
});
- if (find_byclass(prog, c, s, strend, 0))
+ if (find_byclass(prog, c, s, strend, ®info))
goto got_it;
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
}
else {
dontbother = 0;
/* We don't know much -- general case. */
if (do_utf8) {
for (;;) {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
if (s >= strend)
break;
}
else {
do {
- if (regtry(prog, s))
+ if (regtry(®info, s))
goto got_it;
} while (s++ < strend);
}
if ( !(flags & REXEC_NOT_FIRST) ) {
RX_MATCH_COPY_FREE(prog);
if (flags & REXEC_COPY_STR) {
- I32 i = PL_regeol - startpos + (stringarg - strbeg);
+ const I32 i = PL_regeol - startpos + (stringarg - strbeg);
#ifdef PERL_OLD_COPY_ON_WRITE
if ((SvIsCOW(sv)
|| (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
- regtry - try match at specific point
*/
STATIC I32 /* 0 failure, 1 success */
-S_regtry(pTHX_ regexp *prog, char *startpos)
+S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
{
dVAR;
register I32 *sp;
register I32 *ep;
CHECKPOINT lastcp;
+ regexp *prog = reginfo->prog;
GET_RE_DEBUG_FLAGS_DECL;
#ifdef DEBUGGING
/* SAVEI8(cxstack[cxstack_ix].blk_gimme);
cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
- if (PL_reg_sv) {
+ if (reginfo->sv) {
/* Make $_ available to executed code. */
- if (PL_reg_sv != DEFSV) {
+ if (reginfo->sv != DEFSV) {
SAVE_DEFSV;
- DEFSV = PL_reg_sv;
+ DEFSV = reginfo->sv;
}
- if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
- && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
+ if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
+ && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
/* prepare for quick setting of pos */
#ifdef PERL_OLD_COPY_ON_WRITE
if (SvIsCOW(sv))
sv_force_normal_flags(sv, 0);
#endif
- mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
+ mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
&PL_vtbl_mglob, NULL, 0);
mg->mg_len = -1;
}
Newxz(PL_reg_curpm, 1, PMOP);
#ifdef USE_ITHREADS
{
- SV* repointer = newSViv(0);
+ SV* const repointer = newSViv(0);
/* so we know which PL_regex_padav element is PL_reg_curpm */
SvFLAGS(repointer) |= SVf_BREAK;
av_push(PL_regex_padav,repointer);
}
#endif
REGCP_SET(lastcp);
- if (regmatch(prog, prog->program + 1)) {
+ if (regmatch(reginfo, prog->program + 1)) {
prog->endp[0] = PL_reginput - PL_bostr;
return 1;
}
return 0;
}
-#define RE_UNWIND_BRANCH 1
-#define RE_UNWIND_BRANCHJ 2
-
-union re_unwind_t;
-
-typedef struct { /* XX: makes sense to enlarge it... */
- I32 type;
- I32 prev;
- CHECKPOINT lastcp;
-} re_unwind_generic_t;
-
-typedef struct {
- I32 type;
- I32 prev;
- CHECKPOINT lastcp;
- I32 lastparen;
- regnode *next;
- char *locinput;
- I32 nextchr;
- int minmod;
-#ifdef DEBUGGING
- int regindent;
-#endif
-} re_unwind_branch_t;
-
-typedef union re_unwind_t {
- I32 type;
- re_unwind_generic_t generic;
- re_unwind_branch_t branch;
-} re_unwind_t;
#define sayYES goto yes
#define sayNO goto no
#define sayNO_ANYOF goto no_anyof
#define sayYES_FINAL goto yes_final
-#define sayYES_LOUD goto yes_loud
#define sayNO_FINAL goto no_final
#define sayNO_SILENT goto do_no
#define saySAME(x) if (x) goto yes; else goto no
#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
#define POSCACHE_SEEN 1 /* we know what we're caching */
#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
+
#define CACHEsayYES STMT_START { \
if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
- if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
- PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
- else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+ if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
+ PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) | (1<<POSCACHE_SEEN); \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS)) { \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else { \
/* cache records failure, but this is success */ \
DEBUG_r( \
PerlIO_printf(Perl_debug_log, \
} \
sayYES; \
} STMT_END
+
#define CACHEsayNO STMT_START { \
if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
- if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
+ if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
- else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
+ PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
+ } \
+ else { \
/* cache records success, but this is failure */ \
DEBUG_r( \
PerlIO_printf(Perl_debug_log, \
/* Make sure there is a test for this +1 options in re_tests */
#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
+#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
+#define CHRTEST_VOID -1000 /* the c1/c2 "next char" test should be skipped */
+
+#define SLAB_FIRST(s) (&(s)->states[0])
+#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
+
/* grab a new slab and return the first slot in it */
STATIC regmatch_state *
S_push_slab(pTHX)
{
+#if PERL_VERSION < 9
+ dMY_CXT;
+#endif
regmatch_slab *s = PL_regmatch_slab->next;
if (!s) {
Newx(s, 1, regmatch_slab);
PL_regmatch_slab->next = s;
}
PL_regmatch_slab = s;
- return &s->states[0];
+ return SLAB_FIRST(s);
}
/* simulate a recursive call to regmatch */
goto start_recurse; \
resume_point_##where:
+/* push a new state then goto it */
+
+#define PUSH_STATE_GOTO(state, node) \
+ scan = node; \
+ st->resume_state = state; \
+ goto push_state;
+
+/* push a new state with success backtracking, then goto it */
+
+#define PUSH_YES_STATE_GOTO(state, node) \
+ scan = node; \
+ st->resume_state = state; \
+ goto push_yes_state;
+
-/* push a new regex state. Set newst to point to it */
-
-#define PUSH_STATE(newst, resume) \
- depth++; \
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
- st->scan = scan; \
- st->next = next; \
- st->n = n; \
- st->locinput = locinput; \
- st->resume_state = resume; \
- newst = st+1; \
- if (newst > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1])) \
- newst = S_push_slab(aTHX); \
- PL_regmatch_state = newst; \
- newst->cc = 0; \
- newst->minmod = 0; \
- newst->sw = 0; \
- newst->logical = 0; \
- newst->unwind = 0; \
- locinput = PL_reginput; \
- nextchr = UCHARAT(locinput);
-
-#define POP_STATE \
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
- depth--; \
- st--; \
- if (st < &PL_regmatch_slab->states[0]) { \
- PL_regmatch_slab = PL_regmatch_slab->prev; \
- st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]; \
- } \
- PL_regmatch_state = st; \
- scan = st->scan; \
- next = st->next; \
- n = st->n; \
- locinput = st->locinput; \
- nextchr = UCHARAT(locinput);
/*
- regmatch - main matching routine
* allocated since entry are freed.
*/
+/* *** every FOO_fail should = FOO+1 */
+#define TRIE_next (REGNODE_MAX+1)
+#define TRIE_next_fail (REGNODE_MAX+2)
+#define EVAL_A (REGNODE_MAX+3)
+#define EVAL_A_fail (REGNODE_MAX+4)
+#define resume_CURLYX (REGNODE_MAX+5)
+#define resume_WHILEM1 (REGNODE_MAX+6)
+#define resume_WHILEM2 (REGNODE_MAX+7)
+#define resume_WHILEM3 (REGNODE_MAX+8)
+#define resume_WHILEM4 (REGNODE_MAX+9)
+#define resume_WHILEM5 (REGNODE_MAX+10)
+#define resume_WHILEM6 (REGNODE_MAX+11)
+#define BRANCH_next (REGNODE_MAX+12)
+#define BRANCH_next_fail (REGNODE_MAX+13)
+#define CURLYM_A (REGNODE_MAX+14)
+#define CURLYM_A_fail (REGNODE_MAX+15)
+#define CURLYM_B (REGNODE_MAX+16)
+#define CURLYM_B_fail (REGNODE_MAX+17)
+#define IFMATCH_A (REGNODE_MAX+18)
+#define IFMATCH_A_fail (REGNODE_MAX+19)
+#define CURLY_B_min_known (REGNODE_MAX+20)
+#define CURLY_B_min_known_fail (REGNODE_MAX+21)
+#define CURLY_B_min (REGNODE_MAX+22)
+#define CURLY_B_min_fail (REGNODE_MAX+23)
+#define CURLY_B_max (REGNODE_MAX+24)
+#define CURLY_B_max_fail (REGNODE_MAX+25)
+
+
+#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
+
+#ifdef DEBUGGING
+STATIC void
+S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, const bool do_utf8)
+{
+ const int docolor = *PL_colors[0];
+ const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
+ int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
+ /* The part of the string before starttry has one color
+ (pref0_len chars), between starttry and current
+ position another one (pref_len - pref0_len chars),
+ after the current position the third one.
+ We assume that pref0_len <= pref_len, otherwise we
+ decrease pref0_len. */
+ int pref_len = (locinput - PL_bostr) > (5 + taill) - l
+ ? (5 + taill) - l : locinput - PL_bostr;
+ int pref0_len;
+
+ while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
+ pref_len++;
+ pref0_len = pref_len - (locinput - PL_reg_starttry);
+ if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
+ l = ( PL_regeol - locinput > (5 + taill) - pref_len
+ ? (5 + taill) - pref_len : PL_regeol - locinput);
+ while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
+ l--;
+ if (pref0_len < 0)
+ pref0_len = 0;
+ if (pref0_len > pref_len)
+ pref0_len = pref_len;
+ {
+ const char * const s0 =
+ do_utf8 && OP(scan) != CANY ?
+ pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
+ pref0_len, 60, UNI_DISPLAY_REGEX) :
+ locinput - pref_len;
+ const int len0 = do_utf8 ? (int)strlen(s0) : pref0_len;
+ const char * const s1 = do_utf8 && OP(scan) != CANY ?
+ pv_uni_display(PERL_DEBUG_PAD(1),
+ (U8*)(locinput - pref_len + pref0_len),
+ pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
+ locinput - pref_len + pref0_len;
+ const int len1 = do_utf8 ? (int)strlen(s1) : pref_len - pref0_len;
+ const char * const s2 = do_utf8 && OP(scan) != CANY ?
+ pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
+ PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
+ locinput;
+ const int len2 = do_utf8 ? (int)strlen(s2) : l;
+ PerlIO_printf(Perl_debug_log,
+ "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|",
+ (IV)(locinput - PL_bostr),
+ PL_colors[4],
+ len0, s0,
+ PL_colors[5],
+ PL_colors[2],
+ len1, s1,
+ PL_colors[3],
+ (docolor ? "" : "> <"),
+ PL_colors[0],
+ len2, s2,
+ PL_colors[1],
+ 15 - l - pref_len + 1,
+ "");
+ }
+}
+#endif
STATIC I32 /* 0 failure, 1 success */
-S_regmatch(pTHX_ regexp *rex, regnode *prog)
+S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
{
+#if PERL_VERSION < 9
+ dMY_CXT;
+#endif
dVAR;
register const bool do_utf8 = PL_reg_match_utf8;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
+ regexp *rex = reginfo->prog;
+
regmatch_slab *orig_slab;
regmatch_state *orig_state;
/* these variables are NOT saved during a recusive RFEGMATCH: */
register I32 nextchr; /* is always set to UCHARAT(locinput) */
bool result; /* return value of S_regmatch */
- regnode *inner; /* Next node in internal branch. */
int depth = 0; /* depth of recursion */
- regmatch_state *newst; /* when pushing a state, this is the new one */
- regmatch_state *cur_eval = NULL; /* most recent (??{}) state */
+ regmatch_state *yes_state = NULL; /* state to pop to on success of
+ subpattern */
+ U32 state_num;
#ifdef DEBUGGING
- SV *re_debug_flags = NULL;
- GET_RE_DEBUG_FLAGS;
+ GET_RE_DEBUG_FLAGS_DECL;
PL_regindent++;
#endif
Newx(PL_regmatch_slab, 1, regmatch_slab);
PL_regmatch_slab->prev = NULL;
PL_regmatch_slab->next = NULL;
- PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
+ PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
}
/* remember current high-water mark for exit */
/* grab next free state slot */
st = ++PL_regmatch_state;
- if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
+ if (st > SLAB_LAST(PL_regmatch_slab))
st = PL_regmatch_state = S_push_slab(aTHX);
st->minmod = 0;
st->sw = 0;
st->logical = 0;
- st->unwind = 0;
st->cc = NULL;
/* Note that nextchr is a byte even in UTF */
nextchr = UCHARAT(locinput);
DEBUG_EXECUTE_r( {
SV * const prop = sv_newmortal();
- const int docolor = *PL_colors[0];
- const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
- int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
- /* The part of the string before starttry has one color
- (pref0_len chars), between starttry and current
- position another one (pref_len - pref0_len chars),
- after the current position the third one.
- We assume that pref0_len <= pref_len, otherwise we
- decrease pref0_len. */
- int pref_len = (locinput - PL_bostr) > (5 + taill) - l
- ? (5 + taill) - l : locinput - PL_bostr;
- int pref0_len;
-
- while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
- pref_len++;
- pref0_len = pref_len - (locinput - PL_reg_starttry);
- if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
- l = ( PL_regeol - locinput > (5 + taill) - pref_len
- ? (5 + taill) - pref_len : PL_regeol - locinput);
- while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
- l--;
- if (pref0_len < 0)
- pref0_len = 0;
- if (pref0_len > pref_len)
- pref0_len = pref_len;
+ dump_exec_pos( locinput, scan, do_utf8 );
regprop(rex, prop, scan);
- {
- const char * const s0 =
- do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
- pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len;
- const int len0 = do_utf8 ? strlen(s0) : pref0_len;
- const char * const s1 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(1),
- (U8*)(locinput - pref_len + pref0_len),
- pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
- locinput - pref_len + pref0_len;
- const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
- const char * const s2 = do_utf8 && OP(scan) != CANY ?
- pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
- PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
- locinput;
- const int len2 = do_utf8 ? strlen(s2) : l;
- PerlIO_printf(Perl_debug_log,
- "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
- (IV)(locinput - PL_bostr),
- PL_colors[4],
- len0, s0,
- PL_colors[5],
- PL_colors[2],
- len1, s1,
- PL_colors[3],
- (docolor ? "" : "> <"),
- PL_colors[0],
- len2, s2,
- PL_colors[1],
- 15 - l - pref_len + 1,
- "",
- (IV)(scan - rex->program), PL_regindent*2, "",
- SvPVX_const(prop));
- }
+
+ PerlIO_printf(Perl_debug_log,
+ "%3"IVdf":%*s%s(%"IVdf")\n",
+ (IV)(scan - rex->program), PL_regindent*2, "",
+ SvPVX_const(prop),
+ PL_regkind[OP(scan)] == END ? 0 : (IV)(regnext(scan) - rex->program));
});
next = scan + NEXT_OFF(scan);
if (next == scan)
next = NULL;
+ state_num = OP(scan);
- switch (OP(scan)) {
+ reenter_switch:
+ switch (state_num) {
case BOL:
if (locinput == PL_bostr)
{
- /* regtill = regbol; */
+ /* reginfo->till = reginfo->bol; */
break;
}
sayNO;
break;
sayNO;
case GPOS:
- if (locinput == PL_reg_ganch)
+ if (locinput == reginfo->ganch)
break;
sayNO;
case EOL:
nextchr = UCHARAT(++locinput);
break;
+#undef ST
+#define ST st->u.trie
-
- /*
- traverse the TRIE keeping track of all accepting states
- we transition through until we get to a failing node.
-
-
- */
case TRIE:
- case TRIEF:
- case TRIEFL:
{
+ /* what type of TRIE am I? (utf8 makes this contextual) */
+ const enum { trie_plain, trie_utf8, trie_utf8_fold }
+ trie_type = do_utf8 ?
+ (scan->flags == EXACT ? trie_utf8 : trie_utf8_fold)
+ : trie_plain;
+
+ /* what trie are we using right now */
+ reg_trie_data * const trie
+ = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
+ U32 state = trie->startstate;
+
U8 *uc = ( U8* )locinput;
- U32 state = 1;
U16 charid = 0;
U32 base = 0;
UV uvc = 0;
U8 *uscan = (U8*)NULL;
STRLEN bufflen=0;
SV *sv_accept_buff = NULL;
- const enum { trie_plain, trie_utf8, trie_uft8_fold }
- trie_type = do_utf8 ?
- (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
- : trie_plain;
+ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
- /* what trie are we using right now */
- reg_trie_data *trie
- = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
- st->u.trie.accepted = 0; /* how many accepting states we have seen */
- result = 0;
+ ST.accepted = 0; /* how many accepting states we have seen */
+ ST.B = next;
+#ifdef DEBUGGING
+ ST.me = scan;
+#endif
+
+ if (trie->bitmap && trie_type != trie_utf8_fold &&
+ !TRIE_BITMAP_TEST(trie,*locinput)
+ ) {
+ if (trie->states[ state ].wordnum) {
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %smatched empty string...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
+ );
+ break;
+ } else {
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %sfailed to match start class...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
+ );
+ sayNO_SILENT;
+ }
+ }
+
+ /*
+ traverse the TRIE keeping track of all accepting states
+ we transition through until we get to a failing node.
+ */
while ( state && uc <= (U8*)PL_regeol ) {
if (trie->states[ state ].wordnum) {
- if (!st->u.trie.accepted ) {
+ if (!ST.accepted ) {
ENTER;
SAVETMPS;
bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
sizeof(reg_trie_accepted));
SvPOK_on(sv_accept_buff);
sv_2mortal(sv_accept_buff);
- st->u.trie.accept_buff =
+ SAVETMPS;
+ ST.accept_buff =
(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
}
else {
- if (st->u.trie.accepted >= bufflen) {
+ if (ST.accepted >= bufflen) {
bufflen *= 2;
- st->u.trie.accept_buff =(reg_trie_accepted*)
+ ST.accept_buff =(reg_trie_accepted*)
SvGROW(sv_accept_buff,
bufflen * sizeof(reg_trie_accepted));
}
SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
+ sizeof(reg_trie_accepted));
}
- st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
- st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
- ++st->u.trie.accepted;
+ ST.accept_buff[ST.accepted].wordnum = trie->states[state].wordnum;
+ ST.accept_buff[ST.accepted].endpos = uc;
+ ++ST.accepted;
}
base = trie->states[ state ].trans.base;
- DEBUG_TRIE_EXECUTE_r(
+ DEBUG_TRIE_EXECUTE_r({
+ dump_exec_pos( (char *)uc, scan, do_utf8 );
PerlIO_printf( Perl_debug_log,
"%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
- REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
- (UV)state, (UV)base, (UV)st->u.trie.accepted );
- );
+ 2+PL_regindent * 2, "", PL_colors[4],
+ (UV)state, (UV)base, (UV)ST.accepted );
+ });
if ( base ) {
switch (trie_type) {
- case trie_uft8_fold:
+ case trie_utf8_fold:
if ( foldlen>0 ) {
uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
foldlen -= len;
uscan += len;
len=0;
} else {
- U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
uvc = to_uni_fold( uvc, foldbuf, &foldlen );
foldlen -= UNISKIP( uvc );
else {
charid = 0;
if (trie->widecharmap) {
- SV** svpp = (SV**)NULL;
- svpp = hv_fetch(trie->widecharmap,
+ SV** const svpp = hv_fetch(trie->widecharmap,
(char*)&uvc, sizeof(UV), 0);
if (svpp)
charid = (U16)SvIV(*svpp);
charid, uvc, (UV)state, PL_colors[5] );
);
}
- if (!st->u.trie.accepted )
+ if (!ST.accepted )
sayNO;
+ DEBUG_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sgot %"IVdf" possible matches%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "",
+ PL_colors[4], (IV)ST.accepted, PL_colors[5] );
+ );
+ }
+
+ /* FALL THROUGH */
+
+ case TRIE_next_fail: /* we failed - try next alterative */
+
+ if ( ST.accepted == 1 ) {
+ /* only one choice left - just continue */
+ DEBUG_EXECUTE_r({
+ reg_trie_data * const trie
+ = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
+ SV ** const tmp = RX_DEBUG(reginfo->prog)
+ ? av_fetch( trie->words, ST.accept_buff[ 0 ].wordnum-1, 0 )
+ : NULL;
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sonly one match left: #%d <%s>%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ ST.accept_buff[ 0 ].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
+ PL_colors[5] );
+ });
+ PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
+ /* in this case we free tmps/leave before we call regmatch
+ as we wont be using accept_buff again. */
+ FREETMPS;
+ LEAVE;
+ locinput = PL_reginput;
+ nextchr = UCHARAT(locinput);
+ scan = ST.B;
+ continue; /* execute rest of RE */
+ }
+
+ if (!ST.accepted-- ) {
+ FREETMPS;
+ LEAVE;
+ sayNO;
+ }
+
/*
- There was at least one accepting state that we
- transitioned through. Presumably the number of accepting
- states is going to be low, typically one or two. So we
- simply scan through to find the one with lowest wordnum.
- Once we find it, we swap the last state into its place
- and decrement the size. We then try to match the rest of
- the pattern at the point where the word ends, if we
- succeed then we end the loop, otherwise the loop
- eventually terminates once all of the accepting states
- have been tried.
- */
+ There are at least two accepting states left. Presumably
+ the number of accepting states is going to be low,
+ typically two. So we simply scan through to find the one
+ with lowest wordnum. Once we find it, we swap the last
+ state into its place and decrement the size. We then try to
+ match the rest of the pattern at the point where the word
+ ends. If we succeed, control just continues along the
+ regex; if we fail we return here to try the next accepting
+ state
+ */
- if ( st->u.trie.accepted == 1 ) {
- DEBUG_EXECUTE_r({
- SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
- PerlIO_printf( Perl_debug_log,
- "%*s %sonly one match : #%d <%s>%s\n",
- REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
- st->u.trie.accept_buff[ 0 ].wordnum,
- tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
- PL_colors[5] );
- });
- PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
- /* in this case we free tmps/leave before we call regmatch
- as we wont be using accept_buff again. */
- FREETMPS;
- LEAVE;
- REGMATCH(scan + NEXT_OFF(scan), TRIE1);
- /*** all unsaved local vars undefined at this point */
- } else {
- DEBUG_EXECUTE_r(
- PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
- REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
- PL_colors[5] );
- );
- while ( !result && st->u.trie.accepted-- ) {
- U32 best = 0;
- U32 cur;
- for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
- DEBUG_TRIE_EXECUTE_r(
- PerlIO_printf( Perl_debug_log,
- "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
- REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
- (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
- st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
- );
-
- if (st->u.trie.accept_buff[cur].wordnum <
- st->u.trie.accept_buff[best].wordnum)
- best = cur;
- }
- DEBUG_EXECUTE_r({
- reg_trie_data * const trie = (reg_trie_data*)
- rex->data->data[ARG(scan)];
- SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
- PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
- REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
- st->u.trie.accept_buff[best].wordnum,
- tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
- PL_colors[5] );
- });
- if ( best<st->u.trie.accepted ) {
- reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
- st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
- st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
- best = st->u.trie.accepted;
- }
- PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
-
- /*
- as far as I can tell we only need the SAVETMPS/FREETMPS
- for re's with EVAL in them but I'm leaving them in for
- all until I can be sure.
- */
- SAVETMPS;
- REGMATCH(scan + NEXT_OFF(scan), TRIE2);
- /*** all unsaved local vars undefined at this point */
- FREETMPS;
- }
- FREETMPS;
- LEAVE;
+ {
+ U32 best = 0;
+ U32 cur;
+ for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
+ DEBUG_TRIE_EXECUTE_r(
+ PerlIO_printf( Perl_debug_log,
+ "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
+ REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
+ (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
+ ST.accept_buff[ cur ].wordnum, PL_colors[5] );
+ );
+
+ if (ST.accept_buff[cur].wordnum <
+ ST.accept_buff[best].wordnum)
+ best = cur;
}
-
- if (result) {
- sayYES;
- } else {
- sayNO;
+
+ DEBUG_EXECUTE_r({
+ reg_trie_data * const trie
+ = (reg_trie_data*)rex->data->data[ ARG(ST.me) ];
+ SV ** const tmp = RX_DEBUG(reginfo->prog)
+ ? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 )
+ : NULL;
+ PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at node #%d %s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
+ ST.accept_buff[best].wordnum,
+ tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr", REG_NODE_NUM(scan),
+ PL_colors[5] );
+ });
+
+ if ( best<ST.accepted ) {
+ reg_trie_accepted tmp = ST.accept_buff[ best ];
+ ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
+ ST.accept_buff[ ST.accepted ] = tmp;
+ best = ST.accepted;
}
+ PL_reginput = (char *)ST.accept_buff[ best ].endpos;
}
- /* unreached codepoint */
+ PUSH_STATE_GOTO(TRIE_next, ST.B);
+ /* NOTREACHED */
+
+#undef ST
+
case EXACT: {
char *s = STRING(scan);
st->ln = STR_LEN(scan);
if (do_utf8 != UTF) {
/* The target and the pattern have differing utf8ness. */
char *l = locinput;
- const char *e = s + st->ln;
+ const char * const e = s + st->ln;
if (do_utf8) {
/* The target is utf8, the pattern is not utf8. */
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case EXACTF: {
- char *s = STRING(scan);
+ char * const s = STRING(scan);
st->ln = STR_LEN(scan);
if (do_utf8 || UTF) {
/* Either target or the pattern are utf8. */
- char *l = locinput;
+ const char * const l = locinput;
char *e = PL_regeol;
if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
if (do_utf8) {
LOAD_UTF8_CHARCLASS_ALNUM();
if (!(OP(scan) == ALNUM
- ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
+ ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput)))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS_ALNUM();
if (OP(scan) == NALNUM
- ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
+ ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
: isALNUM_LC_utf8((U8*)locinput))
{
sayNO;
if (UTF8_IS_CONTINUED(nextchr)) {
LOAD_UTF8_CHARCLASS_SPACE();
if (!(OP(scan) == SPACE
- ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
+ ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput)))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS_SPACE();
if (OP(scan) == NSPACE
- ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
+ ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
: isSPACE_LC_utf8((U8*)locinput))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS_DIGIT();
if (!(OP(scan) == DIGIT
- ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
+ ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput)))
{
sayNO;
if (do_utf8) {
LOAD_UTF8_CHARCLASS_DIGIT();
if (OP(scan) == NDIGIT
- ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
+ ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
: isDIGIT_LC_utf8((U8*)locinput))
{
sayNO;
break;
case BACK:
break;
- case EVAL:
+
+#undef ST
+#define ST st->u.eval
+
+ case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
{
SV *ret;
{
* necessary */
MAGIC *mg = NULL;
- SV *sv;
+ const SV *sv;
if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
mg = mg_find(sv, PERL_MAGIC_qr);
else if (SvSMAGICAL(ret)) {
(strlen(re->precomp) > 60 ? "..." : ""))
);
- st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
- REGCP_SET(st->u.eval.lastcp);
+ ST.cp = regcppush(0); /* Save *all* the positions. */
+ REGCP_SET(ST.lastcp);
*PL_reglastparen = 0;
*PL_reglastcloseparen = 0;
PL_reginput = locinput;
PL_reg_maxiter = 0;
st->logical = 0;
- st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
+ ST.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
((re->reganch & ROPT_UTF8) != 0);
- if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
- st->u.eval.prev_rex = rex;
+ if (ST.toggleutf) PL_reg_flags ^= RF_utf8;
+ ST.prev_rex = rex;
rex = re;
- st->u.eval.prev_eval = cur_eval;
- st->u.eval.prev_slab = PL_regmatch_slab;
- st->u.eval.depth = depth;
- cur_eval = st;
- PUSH_STATE(newst, resume_EVAL);
- st = newst;
-
+ ST.B = next;
/* now continue from first node in postoned RE */
- next = re->program + 1;
- break;
+ PUSH_YES_STATE_GOTO(EVAL_A, re->program + 1);
/* NOTREACHED */
}
/* /(?(?{...})X|Y)/ */
st->logical = 0;
break;
}
+
+ case EVAL_A: /* successfully ran inner rex (??{rex}) */
+ if (ST.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = ST.prev_rex;
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+ /* Restore parens of the caller without popping the
+ * savestack */
+ {
+ const I32 tmp = PL_savestack_ix;
+ PL_savestack_ix = ST.lastcp;
+ regcppop(rex);
+ PL_savestack_ix = tmp;
+ }
+ PL_reginput = locinput;
+ /* continue at the node following the (??{...}) */
+ scan = ST.B;
+ continue;
+
+ case EVAL_A_fail: /* unsuccessfully ran inner rex (??{rex}) */
+ /* Restore state to the outer re then re-throw the failure */
+ if (ST.toggleutf)
+ PL_reg_flags ^= RF_utf8;
+ ReREFCNT_dec(rex);
+ rex = ST.prev_rex;
+
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
+ PL_reginput = locinput;
+ REGCP_UNWIND(ST.lastcp);
+ regcppop(rex);
+ sayNO_SILENT;
+
+#undef ST
+
case OPEN:
n = ARG(scan); /* which paren pair */
PL_reg_start_tmp[n] = locinput;
/* No need to save/restore up to this paren */
I32 parenfloor = scan->flags;
+ /* Dave says:
+
+ CURLYX and WHILEM are always paired: they're the moral
+ equivalent of pp_enteriter anbd pp_iter.
+
+ The only time next could be null is if the node tree is
+ corrupt. This was mentioned on p5p a few days ago.
+
+ See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
+ So we'll assert that this is true:
+ */
+ assert(next);
if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
next += ARG(next);
/* XXXX Probably it is better to teach regpush to support
* that we can try again after backing off.
*/
+ /* Dave says:
+
+ st->cc gets initialised by CURLYX ready for use by WHILEM.
+ So again, unless somethings been corrupted, st->cc cannot
+ be null at that point in WHILEM.
+
+ See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
+ So we'll assert that this is true:
+ */
+ assert(st->cc);
st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
st->u.whilem.cache_offset = 0;
st->u.whilem.cache_bit = 0;
*that* much linear. */
if (!PL_reg_maxiter) {
PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
+ /* possible overflow for long strings and many CURLYX's */
+ if (PL_reg_maxiter < 0)
+ PL_reg_maxiter = I32_MAX;
PL_reg_leftiter = PL_reg_maxiter;
}
if (PL_reg_leftiter-- == 0) {
/* cache records failure */
sayNO_SILENT;
}
- PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit);
}
}
CACHEsayNO;
}
/* NOTREACHED */
- case BRANCHJ:
+
+#undef ST
+#define ST st->u.branch
+
+ case BRANCHJ: /* /(...|A|...)/ with long next pointer */
next = scan + ARG(scan);
if (next == scan)
next = NULL;
- inner = NEXTOPER(NEXTOPER(scan));
- goto do_branch;
- case BRANCH:
- inner = NEXTOPER(scan);
- do_branch:
- {
- I32 type;
- type = OP(scan);
- if (!next || OP(next) != type) /* No choice. */
- next = inner; /* Avoid recursion. */
- else {
- const I32 lastparen = *PL_reglastparen;
- /* Put unwinding data on stack */
- const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
- re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
-
- uw->prev = st->unwind;
- st->unwind = unwind1;
- uw->type = ((type == BRANCH)
- ? RE_UNWIND_BRANCH
- : RE_UNWIND_BRANCHJ);
- uw->lastparen = lastparen;
- uw->next = next;
- uw->locinput = locinput;
- uw->nextchr = nextchr;
- uw->minmod = st->minmod;
-#ifdef DEBUGGING
- uw->regindent = ++PL_regindent;
-#endif
+ scan = NEXTOPER(scan);
+ /* FALL THROUGH */
- REGCP_SET(uw->lastcp);
+ case BRANCH: /* /(...|A|...)/ */
+ scan = NEXTOPER(scan); /* scan now points to inner node */
+ if (!next || (OP(next) != BRANCH && OP(next) != BRANCHJ))
+ /* last branch; skip state push and jump direct to node */
+ continue;
+ ST.lastparen = *PL_reglastparen;
+ ST.next_branch = next;
+ REGCP_SET(ST.cp);
+ PL_reginput = locinput;
- /* Now go into the first branch */
- next = inner;
- }
- }
- break;
+ /* Now go into the branch */
+ PUSH_STATE_GOTO(BRANCH_next, scan);
+ /* NOTREACHED */
+
+ case BRANCH_next_fail: /* that branch failed; try the next, if any */
+ REGCP_UNWIND(ST.cp);
+ for (n = *PL_reglastparen; n > ST.lastparen; n--)
+ PL_regendp[n] = -1;
+ *PL_reglastparen = n;
+ scan = ST.next_branch;
+ /* no more branches? */
+ if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ))
+ sayNO;
+ continue; /* execute next BRANCH[J] op */
+ /* NOTREACHED */
+
case MINMOD:
st->minmod = 1;
break;
- case CURLYM:
- {
- st->u.curlym.l = st->u.curlym.matches = 0;
-
- /* We suppose that the next guy does not need
- backtracking: in particular, it is of constant non-zero length,
- and has no parenths to influence future backrefs. */
- st->ln = ARG1(scan); /* min to match */
- n = ARG2(scan); /* max to match */
- st->u.curlym.paren = scan->flags;
- if (st->u.curlym.paren) {
- if (st->u.curlym.paren > PL_regsize)
- PL_regsize = st->u.curlym.paren;
- if (st->u.curlym.paren > (I32)*PL_reglastparen)
- *PL_reglastparen = st->u.curlym.paren;
- }
+
+#undef ST
+#define ST st->u.curlym
+
+ case CURLYM: /* /A{m,n}B/ where A is fixed-length */
+
+ /* This is an optimisation of CURLYX that enables us to push
+ * only a single backtracking state, no matter now many matches
+ * there are in {m,n}. It relies on the pattern being constant
+ * length, with no parens to influence future backrefs
+ */
+
+ ST.me = scan;
scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
- if (st->u.curlym.paren)
+
+ /* if paren positive, emulate an OPEN/CLOSE around A */
+ if (ST.me->flags) {
+ I32 paren = ST.me->flags;
+ if (paren > PL_regsize)
+ PL_regsize = paren;
+ if (paren > (I32)*PL_reglastparen)
+ *PL_reglastparen = paren;
scan += NEXT_OFF(scan); /* Skip former OPEN. */
- PL_reginput = locinput;
- st->u.curlym.maxwanted = st->minmod ? st->ln : n;
- if (st->u.curlym.maxwanted) {
- while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
- REGMATCH(scan, CURLYM1);
- /*** all unsaved local vars undefined at this point */
- if (!result)
- break;
- /* on first match, determine length, u.curlym.l */
- if (!st->u.curlym.matches++) {
- if (PL_reg_match_utf8) {
- char *s = locinput;
- while (s < PL_reginput) {
- st->u.curlym.l++;
- s += UTF8SKIP(s);
- }
- }
- else {
- st->u.curlym.l = PL_reginput - locinput;
- }
- if (st->u.curlym.l == 0) {
- st->u.curlym.matches = st->u.curlym.maxwanted;
- break;
- }
- }
- locinput = PL_reginput;
- }
}
+ ST.A = scan;
+ ST.B = next;
+ ST.alen = 0;
+ ST.count = 0;
+ ST.minmod = st->minmod;
+ st->minmod = 0;
+ ST.c1 = CHRTEST_UNINIT;
+ REGCP_SET(ST.cp);
- PL_reginput = locinput;
+ if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
+ goto curlym_do_B;
- if (st->minmod) {
- st->minmod = 0;
- if (st->ln && st->u.curlym.matches < st->ln)
- sayNO;
- if (HAS_TEXT(next) || JUMPABLE(next)) {
- regnode *text_node = next;
+ curlym_do_A: /* execute the A in /A{m,n}B/ */
+ PL_reginput = locinput;
+ PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
+ /* NOTREACHED */
- if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+ case CURLYM_A: /* we've just matched an A */
+ locinput = st->locinput;
+ nextchr = UCHARAT(locinput);
- if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
- else {
- if (PL_regkind[(U8)OP(text_node)] == REF) {
- st->u.curlym.c1 = st->u.curlym.c2 = -1000;
- goto assume_ok_MM;
- }
- else { st->u.curlym.c1 = (U8)*STRING(text_node); }
- if (OP(text_node) == EXACTF || OP(text_node) == REFF)
- st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
- else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
- else
- st->u.curlym.c2 = st->u.curlym.c1;
+ ST.count++;
+ /* after first match, determine A's length: u.curlym.alen */
+ if (ST.count == 1) {
+ if (PL_reg_match_utf8) {
+ char *s = locinput;
+ while (s < PL_reginput) {
+ ST.alen++;
+ s += UTF8SKIP(s);
}
}
- else
- st->u.curlym.c1 = st->u.curlym.c2 = -1000;
- assume_ok_MM:
- REGCP_SET(st->u.curlym.lastcp);
- while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
- /* If it could work, try it. */
- if (st->u.curlym.c1 == -1000 ||
- UCHARAT(PL_reginput) == st->u.curlym.c1 ||
- UCHARAT(PL_reginput) == st->u.curlym.c2)
- {
- if (st->u.curlym.paren) {
- if (st->ln) {
- PL_regstartp[st->u.curlym.paren] =
- HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
- PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[st->u.curlym.paren] = -1;
- }
- REGMATCH(next, CURLYM2);
- /*** all unsaved local vars undefined at this point */
- if (result)
- sayYES;
- REGCP_UNWIND(st->u.curlym.lastcp);
- }
- /* Couldn't or didn't -- move forward. */
- PL_reginput = locinput;
- REGMATCH(scan, CURLYM3);
- /*** all unsaved local vars undefined at this point */
- if (result) {
- st->ln++;
- locinput = PL_reginput;
- }
- else
- sayNO;
+ else {
+ ST.alen = PL_reginput - locinput;
}
+ if (ST.alen == 0)
+ ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
}
- else {
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s matched %"IVdf" times, len=%"IVdf"...\n",
- (int)(REPORT_CODE_OFF+PL_regindent*2), "",
- (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
- );
- if (st->u.curlym.matches >= st->ln) {
- if (HAS_TEXT(next) || JUMPABLE(next)) {
- regnode *text_node = next;
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
+ (int)(REPORT_CODE_OFF+PL_regindent*2), "",
+ (IV) ST.count, (IV)ST.alen)
+ );
- if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+ locinput = PL_reginput;
+ if (ST.count < (ST.minmod ? ARG1(ST.me) : ARG2(ST.me)))
+ goto curlym_do_A; /* try to match another A */
+ goto curlym_do_B; /* try to match B */
- if (! HAS_TEXT(text_node)) st->u.curlym.c1 = st->u.curlym.c2 = -1000;
- else {
- if (PL_regkind[(U8)OP(text_node)] == REF) {
- st->u.curlym.c1 = st->u.curlym.c2 = -1000;
- goto assume_ok_REG;
- }
- else { st->u.curlym.c1 = (U8)*STRING(text_node); }
-
- if (OP(text_node) == EXACTF || OP(text_node) == REFF)
- st->u.curlym.c2 = PL_fold[st->u.curlym.c1];
- else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- st->u.curlym.c2 = PL_fold_locale[st->u.curlym.c1];
- else
- st->u.curlym.c2 = st->u.curlym.c1;
- }
- }
- else
- st->u.curlym.c1 = st->u.curlym.c2 = -1000;
- }
- assume_ok_REG:
- REGCP_SET(st->u.curlym.lastcp);
- while (st->u.curlym.matches >= st->ln) {
- /* If it could work, try it. */
- if (st->u.curlym.c1 == -1000 ||
- UCHARAT(PL_reginput) == st->u.curlym.c1 ||
- UCHARAT(PL_reginput) == st->u.curlym.c2)
+ case CURLYM_A_fail: /* just failed to match an A */
+ REGCP_UNWIND(ST.cp);
+ if (ST.minmod || ST.count < ARG1(ST.me) /* min*/ )
+ sayNO;
+
+ curlym_do_B: /* execute the B in /A{m,n}B/ */
+ PL_reginput = locinput;
+ if (ST.c1 == CHRTEST_UNINIT) {
+ /* calculate c1 and c2 for possible match of 1st char
+ * following curly */
+ ST.c1 = ST.c2 = CHRTEST_VOID;
+ if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
+ regnode *text_node = ST.B;
+ if (! HAS_TEXT(text_node))
+ FIND_NEXT_IMPT(text_node);
+ if (HAS_TEXT(text_node)
+ && PL_regkind[OP(text_node)] != REF)
{
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s trying tail with matches=%"IVdf"...\n",
- (int)(REPORT_CODE_OFF+PL_regindent*2),
- "", (IV)st->u.curlym.matches)
- );
- if (st->u.curlym.paren) {
- if (st->u.curlym.matches) {
- PL_regstartp[st->u.curlym.paren]
- = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
- PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
- }
- else
- PL_regendp[st->u.curlym.paren] = -1;
- }
- REGMATCH(next, CURLYM4);
- /*** all unsaved local vars undefined at this point */
- if (result)
- sayYES;
- REGCP_UNWIND(st->u.curlym.lastcp);
+ ST.c1 = (U8)*STRING(text_node);
+ ST.c2 =
+ (OP(text_node) == EXACTF || OP(text_node) == REFF)
+ ? PL_fold[ST.c1]
+ : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
+ ? PL_fold_locale[ST.c1]
+ : ST.c1;
}
- /* Couldn't or didn't -- back up. */
- st->u.curlym.matches--;
- locinput = HOPc(locinput, -st->u.curlym.l);
- PL_reginput = locinput;
}
}
- sayNO;
+
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s CURLYM trying tail with matches=%"IVdf"...\n",
+ (int)(REPORT_CODE_OFF+PL_regindent*2),
+ "", (IV)ST.count)
+ );
+ if (ST.c1 != CHRTEST_VOID
+ && UCHARAT(PL_reginput) != ST.c1
+ && UCHARAT(PL_reginput) != ST.c2)
+ {
+ /* simulate B failing */
+ state_num = CURLYM_B_fail;
+ goto reenter_switch;
+ }
+
+ if (ST.me->flags) {
+ /* mark current A as captured */
+ I32 paren = ST.me->flags;
+ if (ST.count) {
+ PL_regstartp[paren]
+ = HOPc(PL_reginput, -ST.alen) - PL_bostr;
+ PL_regendp[paren] = PL_reginput - PL_bostr;
+ }
+ else
+ PL_regendp[paren] = -1;
+ }
+ PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
/* NOTREACHED */
- break;
- }
- case CURLYN:
- st->u.plus.paren = scan->flags; /* Which paren to set */
- if (st->u.plus.paren > PL_regsize)
- PL_regsize = st->u.plus.paren;
- if (st->u.plus.paren > (I32)*PL_reglastparen)
- *PL_reglastparen = st->u.plus.paren;
- st->ln = ARG1(scan); /* min to match */
- n = ARG2(scan); /* max to match */
- scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
- goto repeat;
- case CURLY:
- st->u.plus.paren = 0;
- st->ln = ARG1(scan); /* min to match */
- n = ARG2(scan); /* max to match */
- scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
- goto repeat;
- case STAR:
- st->ln = 0;
- n = REG_INFTY;
+
+ case CURLYM_B_fail: /* just failed to match a B */
+ REGCP_UNWIND(ST.cp);
+ if (ST.minmod) {
+ if (ST.count == ARG2(ST.me) /* max */)
+ sayNO;
+ goto curlym_do_A; /* try to match a further A */
+ }
+ /* backtrack one A */
+ if (ST.count == ARG1(ST.me) /* min */)
+ sayNO;
+ ST.count--;
+ locinput = HOPc(locinput, -ST.alen);
+ goto curlym_do_B; /* try to match B */
+
+#undef ST
+#define ST st->u.curly
+
+#define CURLY_SETPAREN(paren, success) \
+ if (paren) { \
+ if (success) { \
+ PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr; \
+ PL_regendp[paren] = locinput - PL_bostr; \
+ } \
+ else \
+ PL_regendp[paren] = -1; \
+ }
+
+ case STAR: /* /A*B/ where A is width 1 */
+ ST.paren = 0;
+ ST.min = 0;
+ ST.max = REG_INFTY;
scan = NEXTOPER(scan);
- st->u.plus.paren = 0;
goto repeat;
- case PLUS:
- st->ln = 1;
- n = REG_INFTY;
+ case PLUS: /* /A+B/ where A is width 1 */
+ ST.paren = 0;
+ ST.min = 1;
+ ST.max = REG_INFTY;
scan = NEXTOPER(scan);
- st->u.plus.paren = 0;
+ goto repeat;
+ case CURLYN: /* /(A){m,n}B/ where A is width 1 */
+ ST.paren = scan->flags; /* Which paren to set */
+ if (ST.paren > PL_regsize)
+ PL_regsize = ST.paren;
+ if (ST.paren > (I32)*PL_reglastparen)
+ *PL_reglastparen = ST.paren;
+ ST.min = ARG1(scan); /* min to match */
+ ST.max = ARG2(scan); /* max to match */
+ scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
+ goto repeat;
+ case CURLY: /* /A{m,n}B/ where A is width 1 */
+ ST.paren = 0;
+ ST.min = ARG1(scan); /* min to match */
+ ST.max = ARG2(scan); /* max to match */
+ scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
repeat:
/*
* Lookahead to avoid useless match attempts
* when we know what character comes next.
- */
-
- /*
+ *
* Used to only do .*x and .*?x, but now it allows
* for )'s, ('s and (?{ ... })'s to be in the way
* of the quantifier and the EXACT-like node. -- japhy
*/
+ if (ST.min > ST.max) /* XXX make this a compile-time check? */
+ sayNO;
if (HAS_TEXT(next) || JUMPABLE(next)) {
U8 *s;
regnode *text_node = next;
- if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
+ if (! HAS_TEXT(text_node))
+ FIND_NEXT_IMPT(text_node);
- if (! HAS_TEXT(text_node)) st->u.plus.c1 = st->u.plus.c2 = -1000;
+ if (! HAS_TEXT(text_node))
+ ST.c1 = ST.c2 = CHRTEST_VOID;
else {
- if (PL_regkind[(U8)OP(text_node)] == REF) {
- st->u.plus.c1 = st->u.plus.c2 = -1000;
+ if (PL_regkind[OP(text_node)] == REF) {
+ ST.c1 = ST.c2 = CHRTEST_VOID;
goto assume_ok_easy;
}
- else { s = (U8*)STRING(text_node); }
+ else
+ s = (U8*)STRING(text_node);
if (!UTF) {
- st->u.plus.c2 = st->u.plus.c1 = *s;
+ ST.c2 = ST.c1 = *s;
if (OP(text_node) == EXACTF || OP(text_node) == REFF)
- st->u.plus.c2 = PL_fold[st->u.plus.c1];
+ ST.c2 = PL_fold[ST.c1];
else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
- st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
+ ST.c2 = PL_fold_locale[ST.c1];
}
else { /* UTF */
if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
- st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
+ ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
uniflags);
- st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
+ ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
uniflags);
}
else {
- st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
+ ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
uniflags);
}
}
}
}
else
- st->u.plus.c1 = st->u.plus.c2 = -1000;
+ ST.c1 = ST.c2 = CHRTEST_VOID;
assume_ok_easy:
+
+ ST.A = scan;
+ ST.B = next;
PL_reginput = locinput;
if (st->minmod) {
st->minmod = 0;
- if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
+ if (ST.min && regrepeat(rex, ST.A, ST.min) < ST.min)
sayNO;
+ ST.count = ST.min;
locinput = PL_reginput;
- REGCP_SET(st->u.plus.lastcp);
- if (st->u.plus.c1 != -1000) {
- st->u.plus.old = locinput;
- st->u.plus.count = 0;
-
- if (n == REG_INFTY) {
- st->u.plus.e = PL_regeol - 1;
- if (do_utf8)
- while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
- st->u.plus.e--;
- }
- else if (do_utf8) {
- int m = n - st->ln;
- for (st->u.plus.e = locinput;
- m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
- st->u.plus.e += UTF8SKIP(st->u.plus.e);
- }
- else {
- st->u.plus.e = locinput + n - st->ln;
- if (st->u.plus.e >= PL_regeol)
- st->u.plus.e = PL_regeol - 1;
- }
- while (1) {
- /* Find place 'next' could work */
- if (!do_utf8) {
- if (st->u.plus.c1 == st->u.plus.c2) {
- while (locinput <= st->u.plus.e &&
- UCHARAT(locinput) != st->u.plus.c1)
- locinput++;
- } else {
- while (locinput <= st->u.plus.e
- && UCHARAT(locinput) != st->u.plus.c1
- && UCHARAT(locinput) != st->u.plus.c2)
- locinput++;
- }
- st->u.plus.count = locinput - st->u.plus.old;
- }
- else {
- if (st->u.plus.c1 == st->u.plus.c2) {
- STRLEN len;
- /* count initialised to
- * utf8_distance(old, locinput) */
- while (locinput <= st->u.plus.e &&
- utf8n_to_uvchr((U8*)locinput,
- UTF8_MAXBYTES, &len,
- uniflags) != (UV)st->u.plus.c1) {
- locinput += len;
- st->u.plus.count++;
- }
- } else {
- STRLEN len;
- /* count initialised to
- * utf8_distance(old, locinput) */
- while (locinput <= st->u.plus.e) {
- UV c = utf8n_to_uvchr((U8*)locinput,
- UTF8_MAXBYTES, &len,
- uniflags);
- if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
- break;
- locinput += len;
- st->u.plus.count++;
- }
- }
- }
- if (locinput > st->u.plus.e)
- sayNO;
- /* PL_reginput == old now */
- if (locinput != st->u.plus.old) {
- st->ln = 1; /* Did some */
- if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
- sayNO;
- }
- /* PL_reginput == locinput now */
- TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
- /*** all unsaved local vars undefined at this point */
- PL_reginput = locinput; /* Could be reset... */
- REGCP_UNWIND(st->u.plus.lastcp);
- /* Couldn't or didn't -- move forward. */
- st->u.plus.old = locinput;
- if (do_utf8)
- locinput += UTF8SKIP(locinput);
- else
- locinput++;
- st->u.plus.count = 1;
- }
+ REGCP_SET(ST.cp);
+ if (ST.c1 == CHRTEST_VOID)
+ goto curly_try_B_min;
+
+ ST.oldloc = locinput;
+
+ /* set ST.maxpos to the furthest point along the
+ * string that could possibly match */
+ if (ST.max == REG_INFTY) {
+ ST.maxpos = PL_regeol - 1;
+ if (do_utf8)
+ while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
+ ST.maxpos--;
}
- else
- while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
- UV c;
- if (st->u.plus.c1 != -1000) {
- if (do_utf8)
- c = utf8n_to_uvchr((U8*)PL_reginput,
- UTF8_MAXBYTES, 0,
- uniflags);
- else
- c = UCHARAT(PL_reginput);
- /* If it could work, try it. */
- if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
- {
- TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
- /*** all unsaved local vars undefined at this point */
- REGCP_UNWIND(st->u.plus.lastcp);
- }
- }
- /* If it could work, try it. */
- else if (st->u.plus.c1 == -1000)
- {
- TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
- /*** all unsaved local vars undefined at this point */
- REGCP_UNWIND(st->u.plus.lastcp);
- }
- /* Couldn't or didn't -- move forward. */
- PL_reginput = locinput;
- if (regrepeat(rex, scan, 1)) {
- st->ln++;
- locinput = PL_reginput;
- }
- else
- sayNO;
+ else if (do_utf8) {
+ int m = ST.max - ST.min;
+ for (ST.maxpos = locinput;
+ m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
+ ST.maxpos += UTF8SKIP(ST.maxpos);
+ }
+ else {
+ ST.maxpos = locinput + ST.max - ST.min;
+ if (ST.maxpos >= PL_regeol)
+ ST.maxpos = PL_regeol - 1;
}
+ goto curly_try_B_min_known;
+
}
else {
- n = regrepeat(rex, scan, n);
+ ST.count = regrepeat(rex, ST.A, ST.max);
locinput = PL_reginput;
- if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
- (OP(next) != MEOL ||
- OP(next) == SEOL || OP(next) == EOS))
+ if (ST.count < ST.min)
+ sayNO;
+ if ((ST.count > ST.min)
+ && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
{
- st->ln = n; /* why back off? */
- /* ...because $ and \Z can match before *and* after
+ /* A{m,n} must come at the end of the string, there's
+ * no point in backing off ... */
+ ST.min = ST.count;
+ /* ...except that $ and \Z can match before *and* after
newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
- We should back off by one in this case. */
- if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
- st->ln--;
+ We may back off by one in this case. */
+ if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
+ ST.min--;
}
- REGCP_SET(st->u.plus.lastcp);
- {
- UV c = 0;
- while (n >= st->ln) {
- if (st->u.plus.c1 != -1000) {
- if (do_utf8)
- c = utf8n_to_uvchr((U8*)PL_reginput,
- UTF8_MAXBYTES, 0,
- uniflags);
- else
- c = UCHARAT(PL_reginput);
+ REGCP_SET(ST.cp);
+ goto curly_try_B_max;
+ }
+ /* NOTREACHED */
+
+
+ case CURLY_B_min_known_fail:
+ /* failed to find B in a non-greedy match where c1,c2 valid */
+ if (ST.paren && ST.count)
+ PL_regendp[ST.paren] = -1;
+
+ PL_reginput = locinput; /* Could be reset... */
+ REGCP_UNWIND(ST.cp);
+ /* Couldn't or didn't -- move forward. */
+ ST.oldloc = locinput;
+ if (do_utf8)
+ locinput += UTF8SKIP(locinput);
+ else
+ locinput++;
+ ST.count++;
+ curly_try_B_min_known:
+ /* find the next place where 'B' could work, then call B */
+ {
+ int n;
+ if (do_utf8) {
+ n = (ST.oldloc == locinput) ? 0 : 1;
+ if (ST.c1 == ST.c2) {
+ STRLEN len;
+ /* set n to utf8_distance(oldloc, locinput) */
+ while (locinput <= ST.maxpos &&
+ utf8n_to_uvchr((U8*)locinput,
+ UTF8_MAXBYTES, &len,
+ uniflags) != (UV)ST.c1) {
+ locinput += len;
+ n++;
}
- /* If it could work, try it. */
- if (st->u.plus.c1 == -1000 || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
- {
- TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
- /*** all unsaved local vars undefined at this point */
- REGCP_UNWIND(st->u.plus.lastcp);
- }
- /* Couldn't or didn't -- back up. */
- n--;
- PL_reginput = locinput = HOPc(locinput, -1);
}
+ else {
+ /* set n to utf8_distance(oldloc, locinput) */
+ while (locinput <= ST.maxpos) {
+ STRLEN len;
+ const UV c = utf8n_to_uvchr((U8*)locinput,
+ UTF8_MAXBYTES, &len,
+ uniflags);
+ if (c == (UV)ST.c1 || c == (UV)ST.c2)
+ break;
+ locinput += len;
+ n++;
+ }
+ }
+ }
+ else {
+ if (ST.c1 == ST.c2) {
+ while (locinput <= ST.maxpos &&
+ UCHARAT(locinput) != ST.c1)
+ locinput++;
+ }
+ else {
+ while (locinput <= ST.maxpos
+ && UCHARAT(locinput) != ST.c1
+ && UCHARAT(locinput) != ST.c2)
+ locinput++;
+ }
+ n = locinput - ST.oldloc;
+ }
+ if (locinput > ST.maxpos)
+ sayNO;
+ /* PL_reginput == oldloc now */
+ if (n) {
+ ST.count += n;
+ if (regrepeat(rex, ST.A, n) < n)
+ sayNO;
}
+ PL_reginput = locinput;
+ CURLY_SETPAREN(ST.paren, ST.count);
+ PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
}
- sayNO;
- break;
- case END:
- if (cur_eval) {
- /* we have successfully completed the execution of a
- * postponed re. Pop all states back to the last EVAL
- * then continue with the node following the (??{...})
- */
-
- /* this simulates a POP_STATE, except that it pops several
- * levels, and doesn't restore locinput */
-
- st = cur_eval;
- PL_regmatch_slab = st->u.eval.prev_slab;
- cur_eval = st->u.eval.prev_eval;
- depth = st->u.eval.depth;
+ /* NOTREACHED */
- PL_regmatch_state = st;
- scan = st->scan;
- next = st->next;
- n = st->n;
- if (st->u.eval.toggleutf)
- PL_reg_flags ^= RF_utf8;
- ReREFCNT_dec(rex);
- rex = st->u.eval.prev_rex;
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
+ case CURLY_B_min_fail:
+ /* failed to find B in a non-greedy match where c1,c2 invalid */
+ if (ST.paren && ST.count)
+ PL_regendp[ST.paren] = -1;
- /* Restore parens of the caller without popping the
- * savestack */
+ REGCP_UNWIND(ST.cp);
+ /* failed -- move forward one */
+ PL_reginput = locinput;
+ if (regrepeat(rex, ST.A, 1)) {
+ ST.count++;
+ locinput = PL_reginput;
+ if (ST.count <= ST.max || (ST.max == REG_INFTY &&
+ ST.count > 0)) /* count overflow ? */
{
- I32 tmp = PL_savestack_ix;
- PL_savestack_ix = st->u.eval.lastcp;
- regcppop(rex);
- PL_savestack_ix = tmp;
+ curly_try_B_min:
+ CURLY_SETPAREN(ST.paren, ST.count);
+ PUSH_STATE_GOTO(CURLY_B_min, ST.B);
}
+ }
+ sayNO;
+ /* NOTREACHED */
- PL_reginput = locinput;
- /* resume at node following the (??{...}) */
- break;
-
+ curly_try_B_max:
+ /* a successful greedy match: now try to match B */
+ {
+ UV c = 0;
+ if (ST.c1 != CHRTEST_VOID)
+ c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
+ UTF8_MAXBYTES, 0, uniflags)
+ : UCHARAT(PL_reginput);
+ /* If it could work, try it. */
+ if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
+ CURLY_SETPAREN(ST.paren, ST.count);
+ PUSH_STATE_GOTO(CURLY_B_max, ST.B);
+ /* NOTREACHED */
+ }
}
+ /* FALL THROUGH */
+ case CURLY_B_max_fail:
+ /* failed to find B in a greedy match */
+ if (ST.paren && ST.count)
+ PL_regendp[ST.paren] = -1;
+
+ REGCP_UNWIND(ST.cp);
+ /* back up. */
+ if (--ST.count < ST.min)
+ sayNO;
+ PL_reginput = locinput = HOPc(locinput, -1);
+ goto curly_try_B_max;
- if (locinput < PL_regtill) {
+#undef ST
+
+
+ case END:
+ if (locinput < reginfo->till) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
"%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
PL_colors[4],
(long)(locinput - PL_reg_starttry),
- (long)(PL_regtill - PL_reg_starttry),
+ (long)(reginfo->till - PL_reg_starttry),
PL_colors[5]));
sayNO_FINAL; /* Cannot match: too short. */
}
PL_reginput = locinput; /* put where regtry can find it */
sayYES_FINAL; /* Success! */
- case SUCCEED:
+
+ case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
+ DEBUG_EXECUTE_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s %ssubpattern success...%s\n",
+ REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]));
PL_reginput = locinput; /* put where regtry can find it */
- sayYES_LOUD; /* Success! */
- case SUSPEND:
- n = 1;
+ sayYES_FINAL; /* Success! */
+
+#undef ST
+#define ST st->u.ifmatch
+
+ case SUSPEND: /* (?>A) */
+ ST.wanted = 1;
PL_reginput = locinput;
goto do_ifmatch;
- case UNLESSM:
- n = 0;
- if (scan->flags) {
- char * const s = HOPBACKc(locinput, scan->flags);
- if (!s)
- goto say_yes;
- PL_reginput = s;
- }
- else
- PL_reginput = locinput;
- goto do_ifmatch;
- case IFMATCH:
- n = 1;
+
+ case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
+ ST.wanted = 0;
+ goto ifmatch_trivial_fail_test;
+
+ case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
+ ST.wanted = 1;
+ ifmatch_trivial_fail_test:
if (scan->flags) {
char * const s = HOPBACKc(locinput, scan->flags);
- if (!s)
- goto say_no;
+ if (!s) {
+ /* trivial fail */
+ if (st->logical) {
+ st->logical = 0;
+ st->sw = 1 - (bool)ST.wanted;
+ }
+ else if (ST.wanted)
+ sayNO;
+ next = scan + ARG(scan);
+ if (next == scan)
+ next = NULL;
+ break;
+ }
PL_reginput = s;
}
else
PL_reginput = locinput;
do_ifmatch:
- REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
- /*** all unsaved local vars undefined at this point */
- if (result != n) {
- say_no:
- if (st->logical) {
- st->logical = 0;
- st->sw = 0;
- goto do_longjump;
- }
- else
- sayNO;
- }
- say_yes:
+ ST.me = scan;
+ /* execute body of (?...A) */
+ PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
+ /* NOTREACHED */
+
+ case IFMATCH_A_fail: /* body of (?...A) failed */
+ ST.wanted = !ST.wanted;
+ /* FALL THROUGH */
+
+ case IFMATCH_A: /* body of (?...A) succeeded */
if (st->logical) {
st->logical = 0;
- st->sw = 1;
+ st->sw = (bool)ST.wanted;
}
- if (OP(scan) == SUSPEND) {
+ else if (!ST.wanted)
+ sayNO;
+
+ if (OP(ST.me) == SUSPEND)
locinput = PL_reginput;
+ else {
+ locinput = PL_reginput = st->locinput;
nextchr = UCHARAT(locinput);
}
- /* FALL THROUGH. */
+ scan = ST.me + ARG(ST.me);
+ if (scan == ST.me)
+ scan = NULL;
+ continue; /* execute B */
+
+#undef ST
+
case LONGJMP:
- do_longjump:
next = scan + ARG(scan);
if (next == scan)
next = NULL;
Perl_croak(aTHX_ "regexp memory corruption");
}
- reenter:
scan = next;
continue;
/* NOTREACHED */
+ push_yes_state:
+ /* push a state that backtracks on success */
+ st->u.yes.prev_yes_state = yes_state;
+ yes_state = st;
+ /* FALL THROUGH */
+ push_state:
+ /* push a new regex state, then continue at scan */
+ {
+ regmatch_state *newst;
+
+ depth++;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ "PUSH STATE(%d)\n", depth));
+ st->locinput = locinput;
+ newst = st+1;
+ if (newst > SLAB_LAST(PL_regmatch_slab))
+ newst = S_push_slab(aTHX);
+ PL_regmatch_state = newst;
+ newst->cc = st->cc;
+ /* XXX probably don't need to initialise these */
+ newst->minmod = 0;
+ newst->sw = 0;
+ newst->logical = 0;
+
+ locinput = PL_reginput;
+ nextchr = UCHARAT(locinput);
+ st = newst;
+ continue;
+ /* NOTREACHED */
+ }
+
/* simulate recursively calling regmatch(), but without actually
* recursing - ie save the current state on the heap rather than on
* the stack, then re-enter the loop. This avoids complex regexes
regmatch_state *oldst = st;
depth++;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH RECURSE STATE(%d)\n", depth));
/* grab the next free state slot */
st++;
- if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
+ if (st > SLAB_LAST(PL_regmatch_slab))
st = S_push_slab(aTHX);
PL_regmatch_state = st;
st->minmod = 0;
st->sw = 0;
st->logical = 0;
- st->unwind = 0;
#ifdef DEBUGGING
PL_regindent++;
#endif
/*NOTREACHED*/
sayNO;
-yes_loud:
- DEBUG_EXECUTE_r(
- PerlIO_printf(Perl_debug_log,
- "%*s %scould match...%s\n",
- REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
- );
- goto yes;
yes_final:
+
+ if (yes_state) {
+ /* we have successfully completed a subexpression, but we must now
+ * pop to the state marked by yes_state and continue from there */
+
+ assert(st != yes_state);
+ while (yes_state < SLAB_FIRST(PL_regmatch_slab)
+ || yes_state > SLAB_LAST(PL_regmatch_slab))
+ {
+ /* not in this slab, pop slab */
+ depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
+ PL_regmatch_slab = PL_regmatch_slab->prev;
+ st = SLAB_LAST(PL_regmatch_slab);
+ }
+ depth -= (st - yes_state);
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATES (%d..%d)\n",
+ depth+1, depth+(st - yes_state)));
+ st = yes_state;
+ yes_state = st->u.yes.prev_yes_state;
+ PL_regmatch_state = st;
+
+ switch (st->resume_state) {
+ case IFMATCH_A:
+ case CURLYM_A:
+ case EVAL_A:
+ state_num = st->resume_state;
+ goto reenter_switch;
+
+ case CURLYM_B:
+ case BRANCH_next:
+ case TRIE_next:
+ case CURLY_B_max:
+ default:
+ Perl_croak(aTHX_ "unexpected yes resume state");
+ }
+ }
+
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
PL_colors[4], PL_colors[5]));
yes:
result = 1;
/* XXX this is duplicate(ish) code to that in the do_no section.
- * eventually a yes should just pop the whole stack */
+ * will disappear when REGFMATCH goes */
if (depth) {
/* restore previous state and re-enter */
- POP_STATE;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth));
+ depth--;
+ st--;
+ if (st < SLAB_FIRST(PL_regmatch_slab)) {
+ PL_regmatch_slab = PL_regmatch_slab->prev;
+ st = SLAB_LAST(PL_regmatch_slab);
+ }
+ PL_regmatch_state = st;
+ scan = st->scan;
+ next = st->next;
+ n = st->n;
+ locinput= st->locinput;
+ nextchr = UCHARAT(locinput);
switch (st->resume_state) {
- case resume_TRIE1:
- goto resume_point_TRIE1;
- case resume_TRIE2:
- goto resume_point_TRIE2;
- case resume_EVAL:
- break;
case resume_CURLYX:
goto resume_point_CURLYX;
case resume_WHILEM1:
goto resume_point_WHILEM5;
case resume_WHILEM6:
goto resume_point_WHILEM6;
- case resume_CURLYM1:
- goto resume_point_CURLYM1;
- case resume_CURLYM2:
- goto resume_point_CURLYM2;
- case resume_CURLYM3:
- goto resume_point_CURLYM3;
- case resume_CURLYM4:
- goto resume_point_CURLYM4;
- case resume_IFMATCH:
- goto resume_point_IFMATCH;
- case resume_PLUS1:
- goto resume_point_PLUS1;
- case resume_PLUS2:
- goto resume_point_PLUS2;
- case resume_PLUS3:
- goto resume_point_PLUS3;
- case resume_PLUS4:
- goto resume_point_PLUS4;
+
+ case TRIE_next:
+ case CURLYM_A:
+ case CURLYM_B:
+ case EVAL_A:
+ case IFMATCH_A:
+ case BRANCH_next:
+ case CURLY_B_max:
+ case CURLY_B_min:
+ case CURLY_B_min_known:
+ break;
+
default:
Perl_croak(aTHX_ "regexp resume memory corruption");
}
"%*s %sfailed...%s\n",
REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
);
- goto do_no;
no_final:
do_no:
- if (st->unwind) {
- re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
-
- switch (uw->type) {
- case RE_UNWIND_BRANCH:
- case RE_UNWIND_BRANCHJ:
- {
- re_unwind_branch_t * const uwb = &(uw->branch);
- const I32 lastparen = uwb->lastparen;
-
- REGCP_UNWIND(uwb->lastcp);
- for (n = *PL_reglastparen; n > lastparen; n--)
- PL_regendp[n] = -1;
- *PL_reglastparen = n;
- scan = next = uwb->next;
- st->minmod = uwb->minmod;
- if ( !scan ||
- OP(scan) != (uwb->type == RE_UNWIND_BRANCH
- ? BRANCH : BRANCHJ) ) { /* Failure */
- st->unwind = uwb->prev;
-#ifdef DEBUGGING
- PL_regindent--;
-#endif
- goto do_no;
- }
- /* Have more choice yet. Reuse the same uwb. */
- if ((n = (uwb->type == RE_UNWIND_BRANCH
- ? NEXT_OFF(next) : ARG(next))))
- next += n;
- else
- next = NULL; /* XXXX Needn't unwinding in this case... */
- uwb->next = next;
- next = NEXTOPER(scan);
- if (uwb->type == RE_UNWIND_BRANCHJ)
- next = NEXTOPER(next);
- locinput = uwb->locinput;
- nextchr = uwb->nextchr;
-#ifdef DEBUGGING
- PL_regindent = uwb->regindent;
-#endif
-
- goto reenter;
- }
- /* NOTREACHED */
- default:
- Perl_croak(aTHX_ "regexp unwind memory corruption");
- }
- /* NOTREACHED */
- }
#ifdef DEBUGGING
PL_regindent--;
if (depth) {
/* there's a previous state to backtrack to */
- POP_STATE;
- switch (st->resume_state) {
- case resume_TRIE1:
- goto resume_point_TRIE1;
- case resume_TRIE2:
- goto resume_point_TRIE2;
- case resume_EVAL:
- /* we have failed an (??{...}). Restore state to the outer re
- * then re-throw the failure */
- if (st->u.eval.toggleutf)
- PL_reg_flags ^= RF_utf8;
- ReREFCNT_dec(rex);
- rex = st->u.eval.prev_rex;
- cur_eval = st->u.eval.prev_eval;
-
- /* XXXX This is too dramatic a measure... */
- PL_reg_maxiter = 0;
-
- PL_reginput = locinput;
- REGCP_UNWIND(st->u.eval.lastcp);
- regcppop(rex);
- goto do_no;
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth));
+ depth--;
+ st--;
+ if (st < SLAB_FIRST(PL_regmatch_slab)) {
+ PL_regmatch_slab = PL_regmatch_slab->prev;
+ st = SLAB_LAST(PL_regmatch_slab);
+ }
+ PL_regmatch_state = st;
+ scan = st->scan;
+ next = st->next;
+ n = st->n;
+ locinput= st->locinput;
+ nextchr = UCHARAT(locinput);
+ switch (st->resume_state) {
case resume_CURLYX:
goto resume_point_CURLYX;
case resume_WHILEM1:
goto resume_point_WHILEM5;
case resume_WHILEM6:
goto resume_point_WHILEM6;
- case resume_CURLYM1:
- goto resume_point_CURLYM1;
- case resume_CURLYM2:
- goto resume_point_CURLYM2;
- case resume_CURLYM3:
- goto resume_point_CURLYM3;
- case resume_CURLYM4:
- goto resume_point_CURLYM4;
- case resume_IFMATCH:
- goto resume_point_IFMATCH;
- case resume_PLUS1:
- goto resume_point_PLUS1;
- case resume_PLUS2:
- goto resume_point_PLUS2;
- case resume_PLUS3:
- goto resume_point_PLUS3;
- case resume_PLUS4:
- goto resume_point_PLUS4;
+
+ case TRIE_next:
+ case EVAL_A:
+ case BRANCH_next:
+ case CURLYM_A:
+ case CURLYM_B:
+ case IFMATCH_A:
+ case CURLY_B_max:
+ case CURLY_B_min:
+ case CURLY_B_min_known:
+ if (yes_state == st)
+ yes_state = st->u.yes.prev_yes_state;
+ state_num = st->resume_state + 1; /* failure = success + 1 */
+ goto reenter_switch;
+
default:
Perl_croak(aTHX_ "regexp resume memory corruption");
}
/* free all slabs above current one */
if (orig_slab->next) {
- regmatch_slab *osl, *sl = orig_slab->next;
+ regmatch_slab *sl = orig_slab->next;
orig_slab->next = NULL;
while (sl) {
- osl = sl;
+ regmatch_slab * const osl = sl;
sl = sl->next;
Safefree(osl);
}
PL_reginput = scan;
DEBUG_r({
- SV *re_debug_flags = NULL;
- SV * const prop = sv_newmortal();
- GET_RE_DEBUG_FLAGS;
- DEBUG_EXECUTE_r({
- regprop(prog, prop, p);
- PerlIO_printf(Perl_debug_log,
- "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
- REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
- });
+ GET_RE_DEBUG_FLAGS_DECL;
+ DEBUG_EXECUTE_r({
+ SV * const prop = sv_newmortal();
+ regprop(prog, prop, p);
+ PerlIO_printf(Perl_debug_log,
+ "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
+ REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
});
+ });
return(c);
}
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
/*
- regclass_swash - prepare the utf8 swash
*/
SV *sw = NULL;
SV *si = NULL;
SV *alt = NULL;
- const struct reg_data *data = prog ? prog->data : NULL;
+ const struct reg_data * const data = prog ? prog->data : NULL;
if (data && data->count) {
const U32 n = ARG(node);
return sw;
}
+#endif
/*
- reginclass - determine if a character falls into a character class
}
STATIC U8 *
-S_reghop3(U8 *s, I32 off, U8* lim)
+S_reghop3(U8 *s, I32 off, const U8* lim)
{
dVAR;
if (off >= 0) {
}
STATIC U8 *
-S_reghopmaybe3(U8* s, I32 off, U8* lim)
+S_reghopmaybe3(U8* s, I32 off, const U8* lim)
{
dVAR;
if (off >= 0) {
s += UTF8SKIP(s);
}
if (off >= 0)
- return 0;
+ return NULL;
}
else {
while (off++) {
break;
}
if (off <= 0)
- return 0;
+ return NULL;
}
return s;
}