This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Using uninitialized (and wrong) variables is not good.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
be3c0a43 70 **** Copyright (c) 1991-2002, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
e54858b0 114 (PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
a84d97b6
HS
143/*
144 Search for mandatory following text node; for lookahead, the text must
145 follow but for lookbehind (rn->flags != 0) we skip to the next step.
146*/
cca55fe3 147#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 148 while (JUMPABLE(rn)) \
a84d97b6 149 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 150 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
151 else if (OP(rn) == PLUS) \
152 rn = NEXTOPER(rn); \
a84d97b6
HS
153 else if (OP(rn) == IFMATCH) \
154 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 155 else rn += NEXT_OFF(rn); \
5f80c4cf 156} STMT_END
74750237 157
acfe0abc 158static void restore_pos(pTHX_ void *arg);
51371543 159
76e3520e 160STATIC CHECKPOINT
cea2e8a9 161S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 162{
3280af22 163 int retval = PL_savestack_ix;
b1ce53c5
JH
164#define REGCP_PAREN_ELEMS 4
165 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
166 int p;
167
e49a9654
IH
168 if (paren_elems_to_push < 0)
169 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
170
a01268b5 171#define REGCP_OTHER_ELEMS 6
b1ce53c5 172 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 173 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 174/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
175 SSPUSHINT(PL_regendp[p]);
176 SSPUSHINT(PL_regstartp[p]);
3280af22 177 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
178 SSPUSHINT(p);
179 }
b1ce53c5 180/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
181 SSPUSHINT(PL_regsize);
182 SSPUSHINT(*PL_reglastparen);
a01268b5 183 SSPUSHINT(*PL_reglastcloseparen);
3280af22 184 SSPUSHPTR(PL_reginput);
41123dfd
JH
185#define REGCP_FRAME_ELEMS 2
186/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
187 * are needed for the regexp context stack bookkeeping. */
188 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 189 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 190
a0d0e21e
LW
191 return retval;
192}
193
c277df42 194/* These are needed since we do not localize EVAL nodes: */
02db2b7b 195# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 197 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 198
02db2b7b 199# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 200 PerlIO_printf(Perl_debug_log, \
faccc32b 201 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 202 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 203
76e3520e 204STATIC char *
cea2e8a9 205S_regcppop(pTHX)
a0d0e21e 206{
b1ce53c5 207 I32 i;
a0d0e21e
LW
208 U32 paren = 0;
209 char *input;
cf93c79d 210 I32 tmps;
b1ce53c5
JH
211
212 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 213 i = SSPOPINT;
b1ce53c5
JH
214 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
215 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 216 input = (char *) SSPOPPTR;
a01268b5 217 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
218 *PL_reglastparen = SSPOPINT;
219 PL_regsize = SSPOPINT;
b1ce53c5
JH
220
221 /* Now restore the parentheses context. */
41123dfd
JH
222 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
223 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 224 paren = (U32)SSPOPINT;
3280af22 225 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
226 PL_regstartp[paren] = SSPOPINT;
227 tmps = SSPOPINT;
3280af22
NIS
228 if (paren <= *PL_reglastparen)
229 PL_regendp[paren] = tmps;
c277df42 230 DEBUG_r(
c3464db5 231 PerlIO_printf(Perl_debug_log,
b900a521 232 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 233 (UV)paren, (IV)PL_regstartp[paren],
b900a521 234 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 235 (IV)PL_regendp[paren],
3280af22 236 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 237 );
a0d0e21e 238 }
c277df42 239 DEBUG_r(
3280af22 240 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 241 PerlIO_printf(Perl_debug_log,
faccc32b
JH
242 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
243 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
244 }
245 );
daf18116 246#if 1
dafc8851
JH
247 /* It would seem that the similar code in regtry()
248 * already takes care of this, and in fact it is in
249 * a better location to since this code can #if 0-ed out
250 * but the code in regtry() is needed or otherwise tests
251 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
252 * (as of patchlevel 7877) will fail. Then again,
253 * this code seems to be necessary or otherwise
254 * building DynaLoader will fail:
255 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
256 * --jhi */
3280af22
NIS
257 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
258 if (paren > PL_regsize)
cf93c79d
IZ
259 PL_regstartp[paren] = -1;
260 PL_regendp[paren] = -1;
a0d0e21e 261 }
dafc8851 262#endif
a0d0e21e
LW
263 return input;
264}
265
0f5d15d6 266STATIC char *
cea2e8a9 267S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
268{
269 I32 tmp = PL_savestack_ix;
270
271 PL_savestack_ix = ss;
272 regcppop();
273 PL_savestack_ix = tmp;
942e002e 274 return Nullch;
0f5d15d6
IZ
275}
276
277typedef struct re_cc_state
278{
279 I32 ss;
280 regnode *node;
281 struct re_cc_state *prev;
282 CURCUR *cc;
283 regexp *re;
284} re_cc_state;
285
02db2b7b 286#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 287
29d1e993
HS
288#define TRYPAREN(paren, n, input) { \
289 if (paren) { \
290 if (n) { \
291 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
292 PL_regendp[paren] = input - PL_bostr; \
293 } \
294 else \
295 PL_regendp[paren] = -1; \
296 } \
297 if (regmatch(next)) \
298 sayYES; \
299 if (paren && n) \
300 PL_regendp[paren] = -1; \
301}
302
303
a687059c 304/*
e50aee73 305 * pregexec and friends
a687059c
LW
306 */
307
308/*
c277df42 309 - pregexec - match a regexp against a string
a687059c 310 */
c277df42 311I32
864dbfa3 312Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 313 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
314/* strend: pointer to null at end of string */
315/* strbeg: real beginning of string */
316/* minend: end of match must be >=minend after stringarg. */
317/* nosave: For optimizations. */
318{
319 return
9041c2e3 320 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
321 nosave ? 0 : REXEC_COPY_STR);
322}
0f5d15d6
IZ
323
324STATIC void
cea2e8a9 325S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
326{
327 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
328#ifdef DEBUGGING
329 PL_regprogram = prog->program;
330#endif
331 PL_regnpar = prog->nparens;
9041c2e3
NIS
332 PL_regdata = prog->data;
333 PL_reg_re = prog;
0f5d15d6 334}
22e551b9 335
9041c2e3 336/*
cad2e5aa
JH
337 * Need to implement the following flags for reg_anch:
338 *
339 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
340 * USE_INTUIT_ML
341 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
342 * INTUIT_AUTORITATIVE_ML
343 * INTUIT_ONCE_NOML - Intuit can match in one location only.
344 * INTUIT_ONCE_ML
345 *
346 * Another flag for this function: SECOND_TIME (so that float substrs
347 * with giant delta may be not rechecked).
348 */
349
350/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
351
2c2d71f5 352/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
353 Otherwise, only SvCUR(sv) is used to get strbeg. */
354
355/* XXXX We assume that strpos is strbeg unless sv. */
356
6eb5f6b9
JH
357/* XXXX Some places assume that there is a fixed substring.
358 An update may be needed if optimizer marks as "INTUITable"
359 RExen without fixed substrings. Similarly, it is assumed that
360 lengths of all the strings are no more than minlen, thus they
361 cannot come from lookahead.
362 (Or minlen should take into account lookahead.) */
363
2c2d71f5
JH
364/* A failure to find a constant substring means that there is no need to make
365 an expensive call to REx engine, thus we celebrate a failure. Similarly,
366 finding a substring too deep into the string means that less calls to
30944b6d
IZ
367 regtry() should be needed.
368
369 REx compiler's optimizer found 4 possible hints:
370 a) Anchored substring;
371 b) Fixed substring;
372 c) Whether we are anchored (beginning-of-line or \G);
373 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 374 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
375 string which does not contradict any of them.
376 */
2c2d71f5 377
6eb5f6b9
JH
378/* Most of decisions we do here should have been done at compile time.
379 The nodes of the REx which we used for the search should have been
380 deleted from the finite automaton. */
381
cad2e5aa
JH
382char *
383Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
384 char *strend, U32 flags, re_scream_pos_data *data)
385{
b7953727 386 register I32 start_shift = 0;
cad2e5aa 387 /* Should be nonnegative! */
b7953727 388 register I32 end_shift = 0;
2c2d71f5
JH
389 register char *s;
390 register SV *check;
a1933d95 391 char *strbeg;
cad2e5aa
JH
392 char *t;
393 I32 ml_anch;
6eb5f6b9 394 register char *other_last = Nullch; /* other substr checked before this */
b7953727 395 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
396#ifdef DEBUGGING
397 char *i_strpos = strpos;
ce333219 398 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 399#endif
cad2e5aa 400
b8d68ded
JH
401 if (prog->reganch & ROPT_UTF8) {
402 DEBUG_r(PerlIO_printf(Perl_debug_log,
403 "UTF-8 regex...\n"));
404 PL_reg_flags |= RF_utf8;
405 }
406
2a782b5b 407 DEBUG_r({
b8d68ded 408 char *s = PL_reg_match_utf8 ?
c728cb41
JH
409 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
410 strpos;
b8d68ded
JH
411 int len = PL_reg_match_utf8 ?
412 strlen(s) : strend - strpos;
2a782b5b
JH
413 if (!PL_colorset)
414 reginitcolors();
b8d68ded
JH
415 if (PL_reg_match_utf8)
416 DEBUG_r(PerlIO_printf(Perl_debug_log,
417 "UTF-8 target...\n"));
2a782b5b
JH
418 PerlIO_printf(Perl_debug_log,
419 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
420 PL_colors[4],PL_colors[5],PL_colors[0],
421 prog->precomp,
422 PL_colors[1],
423 (strlen(prog->precomp) > 60 ? "..." : ""),
424 PL_colors[0],
425 (int)(len > 60 ? 60 : len),
426 s, PL_colors[1],
427 (len > 60 ? "..." : "")
428 );
429 });
cad2e5aa 430
1aa99e6b 431 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
432 DEBUG_r(PerlIO_printf(Perl_debug_log,
433 "String too short... [re_intuit_start]\n"));
cad2e5aa 434 goto fail;
2c2d71f5 435 }
a1933d95 436 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 437 PL_regeol = strend;
653099ff 438 check = prog->check_substr;
2c2d71f5 439 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
440 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
441 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 442 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 443
7e25d62c
JH
444 if (!ml_anch) {
445 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
446 | ROPT_IMPLICIT)) /* not a real BOL */
447 /* SvCUR is not set on references: SvRV and SvPVX overlap */
448 && sv && !SvROK(sv)
449 && (strpos != strbeg)) {
450 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
451 goto fail;
452 }
453 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 454 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 455 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
456 I32 slen;
457
1aa99e6b 458 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
459 if (SvTAIL(check)) {
460 slen = SvCUR(check); /* >= 1 */
cad2e5aa 461
9041c2e3 462 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
463 || (strend - s == slen && strend[-1] != '\n')) {
464 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
465 goto fail_finish;
cad2e5aa
JH
466 }
467 /* Now should match s[0..slen-2] */
468 slen--;
653099ff 469 if (slen && (*SvPVX(check) != *s
cad2e5aa 470 || (slen > 1
653099ff 471 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
472 report_neq:
473 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
474 goto fail_finish;
475 }
cad2e5aa 476 }
653099ff
GS
477 else if (*SvPVX(check) != *s
478 || ((slen = SvCUR(check)) > 1
479 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
480 goto report_neq;
481 goto success_at_start;
7e25d62c 482 }
cad2e5aa 483 }
2c2d71f5 484 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 485 s = strpos;
2c2d71f5 486 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 487 end_shift = prog->minlen - start_shift -
653099ff 488 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 489 if (!ml_anch) {
653099ff
GS
490 I32 end = prog->check_offset_max + CHR_SVLEN(check)
491 - (SvTAIL(check) != 0);
1aa99e6b 492 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
493
494 if (end_shift < eshift)
495 end_shift = eshift;
496 }
cad2e5aa 497 }
2c2d71f5 498 else { /* Can match at random position */
cad2e5aa
JH
499 ml_anch = 0;
500 s = strpos;
2c2d71f5
JH
501 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
502 /* Should be nonnegative! */
503 end_shift = prog->minlen - start_shift -
653099ff 504 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
505 }
506
2c2d71f5 507#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 508 if (end_shift < 0)
6bbae5e6 509 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
510#endif
511
2c2d71f5
JH
512 restart:
513 /* Find a possible match in the region s..strend by looking for
514 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 515 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
516 I32 p = -1; /* Internal iterator of scream. */
517 I32 *pp = data ? data->scream_pos : &p;
518
2c2d71f5
JH
519 if (PL_screamfirst[BmRARE(check)] >= 0
520 || ( BmRARE(check) == '\n'
521 && (BmPREVIOUS(check) == SvCUR(check) - 1)
522 && SvTAIL(check) ))
9041c2e3 523 s = screaminstr(sv, check,
2c2d71f5 524 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 525 else
2c2d71f5 526 goto fail_finish;
cad2e5aa
JH
527 if (data)
528 *data->scream_olds = s;
529 }
f33976b4 530 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
531 s = fbm_instr((U8*)(s + start_shift),
532 (U8*)(strend - end_shift),
533 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 534 else
1aa99e6b
IH
535 s = fbm_instr(HOP3(s, start_shift, strend),
536 HOP3(strend, -end_shift, strbeg),
2c2d71f5 537 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
538
539 /* Update the count-of-usability, remove useless subpatterns,
540 unshift s. */
2c2d71f5
JH
541
542 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
543 (s ? "Found" : "Did not find"),
544 ((check == prog->anchored_substr) ? "anchored" : "floating"),
545 PL_colors[0],
7b0972df
JH
546 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
547 SvPVX(check),
2c2d71f5
JH
548 PL_colors[1], (SvTAIL(check) ? "$" : ""),
549 (s ? " at offset " : "...\n") ) );
550
551 if (!s)
552 goto fail_finish;
553
6eb5f6b9
JH
554 check_at = s;
555
2c2d71f5 556 /* Finish the diagnostic message */
30944b6d 557 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
558
559 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
560 Start with the other substr.
561 XXXX no SCREAM optimization yet - and a very coarse implementation
562 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
563 *always* match. Probably should be marked during compile...
564 Probably it is right to do no SCREAM here...
565 */
566
567 if (prog->float_substr && prog->anchored_substr) {
30944b6d 568 /* Take into account the "other" substring. */
2c2d71f5
JH
569 /* XXXX May be hopelessly wrong for UTF... */
570 if (!other_last)
6eb5f6b9 571 other_last = strpos;
2c2d71f5 572 if (check == prog->float_substr) {
30944b6d
IZ
573 do_other_anchored:
574 {
1aa99e6b 575 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
576 char *s1 = s;
577
2c2d71f5
JH
578 t = s - prog->check_offset_max;
579 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
580 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 581 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 582 && t > strpos)))
30944b6d 583 /* EMPTY */;
2c2d71f5
JH
584 else
585 t = strpos;
1aa99e6b 586 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
587 if (t < other_last) /* These positions already checked */
588 t = other_last;
1aa99e6b 589 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
590 if (last < last1)
591 last1 = last;
592 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
593 /* On end-of-str: see comment below. */
594 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
595 HOP3(HOP3(last1, prog->anchored_offset, strend)
596 + SvCUR(prog->anchored_substr),
597 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
598 prog->anchored_substr,
599 PL_multiline ? FBMrf_MULTILINE : 0);
600 DEBUG_r(PerlIO_printf(Perl_debug_log,
601 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
602 (s ? "Found" : "Contradicts"),
603 PL_colors[0],
7b0972df
JH
604 (int)(SvCUR(prog->anchored_substr)
605 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
606 SvPVX(prog->anchored_substr),
607 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
608 if (!s) {
609 if (last1 >= last2) {
610 DEBUG_r(PerlIO_printf(Perl_debug_log,
611 ", giving up...\n"));
612 goto fail_finish;
613 }
614 DEBUG_r(PerlIO_printf(Perl_debug_log,
615 ", trying floating at offset %ld...\n",
1aa99e6b
IH
616 (long)(HOP3c(s1, 1, strend) - i_strpos)));
617 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
618 s = HOP3c(last, 1, strend);
2c2d71f5
JH
619 goto restart;
620 }
621 else {
622 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 623 (long)(s - i_strpos)));
1aa99e6b
IH
624 t = HOP3c(s, -prog->anchored_offset, strbeg);
625 other_last = HOP3c(s, 1, strend);
30944b6d 626 s = s1;
2c2d71f5
JH
627 if (t == strpos)
628 goto try_at_start;
2c2d71f5
JH
629 goto try_at_offset;
630 }
30944b6d 631 }
2c2d71f5
JH
632 }
633 else { /* Take into account the floating substring. */
634 char *last, *last1;
635 char *s1 = s;
636
1aa99e6b
IH
637 t = HOP3c(s, -start_shift, strbeg);
638 last1 = last =
639 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
640 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
641 last = HOP3c(t, prog->float_max_offset, strend);
642 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
643 if (s < other_last)
644 s = other_last;
2c2d71f5
JH
645 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
646 /* fbm_instr() takes into account exact value of end-of-str
647 if the check is SvTAIL(ed). Since false positives are OK,
648 and end-of-str is not later than strend we are OK. */
649 s = fbm_instr((unsigned char*)s,
650 (unsigned char*)last + SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0),
652 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
653 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
654 (s ? "Found" : "Contradicts"),
655 PL_colors[0],
7b0972df
JH
656 (int)(SvCUR(prog->float_substr)
657 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
658 SvPVX(prog->float_substr),
659 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
660 if (!s) {
661 if (last1 == last) {
662 DEBUG_r(PerlIO_printf(Perl_debug_log,
663 ", giving up...\n"));
664 goto fail_finish;
665 }
666 DEBUG_r(PerlIO_printf(Perl_debug_log,
667 ", trying anchored starting at offset %ld...\n",
30944b6d 668 (long)(s1 + 1 - i_strpos)));
803ff556 669 other_last = last;
1aa99e6b 670 s = HOP3c(t, 1, strend);
2c2d71f5
JH
671 goto restart;
672 }
673 else {
674 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 675 (long)(s - i_strpos)));
803ff556 676 other_last = s; /* Fix this later. --Hugo */
30944b6d 677 s = s1;
2c2d71f5
JH
678 if (t == strpos)
679 goto try_at_start;
2c2d71f5
JH
680 goto try_at_offset;
681 }
682 }
cad2e5aa 683 }
2c2d71f5
JH
684
685 t = s - prog->check_offset_max;
2c2d71f5
JH
686 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
687 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
688 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
689 && t > strpos))) {
2c2d71f5
JH
690 /* Fixed substring is found far enough so that the match
691 cannot start at strpos. */
692 try_at_offset:
cad2e5aa 693 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
694 /* Eventually fbm_*() should handle this, but often
695 anchored_offset is not 0, so this check will not be wasted. */
696 /* XXXX In the code below we prefer to look for "^" even in
697 presence of anchored substrings. And we search even
698 beyond the found float position. These pessimizations
699 are historical artefacts only. */
700 find_anchor:
2c2d71f5 701 while (t < strend - prog->minlen) {
cad2e5aa 702 if (*t == '\n') {
4ee3650e 703 if (t < check_at - prog->check_offset_min) {
30944b6d 704 if (prog->anchored_substr) {
4ee3650e
GS
705 /* Since we moved from the found position,
706 we definitely contradict the found anchored
30944b6d
IZ
707 substr. Due to the above check we do not
708 contradict "check" substr.
709 Thus we can arrive here only if check substr
710 is float. Redo checking for "other"=="fixed".
711 */
9041c2e3 712 strpos = t + 1;
30944b6d
IZ
713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
714 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
715 goto do_other_anchored;
716 }
4ee3650e
GS
717 /* We don't contradict the found floating substring. */
718 /* XXXX Why not check for STCLASS? */
cad2e5aa 719 s = t + 1;
2c2d71f5 720 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 721 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
722 goto set_useful;
723 }
4ee3650e
GS
724 /* Position contradicts check-string */
725 /* XXXX probably better to look for check-string
726 than for "\n", so one should lower the limit for t? */
727 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 728 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 729 other_last = strpos = s = t + 1;
cad2e5aa
JH
730 goto restart;
731 }
732 t++;
733 }
2c2d71f5
JH
734 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
735 PL_colors[0],PL_colors[1]));
736 goto fail_finish;
cad2e5aa 737 }
f5952150
GS
738 else {
739 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
740 PL_colors[0],PL_colors[1]));
741 }
cad2e5aa
JH
742 s = t;
743 set_useful:
2c2d71f5 744 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
745 }
746 else {
f5952150 747 /* The found string does not prohibit matching at strpos,
2c2d71f5 748 - no optimization of calling REx engine can be performed,
f5952150
GS
749 unless it was an MBOL and we are not after MBOL,
750 or a future STCLASS check will fail this. */
2c2d71f5
JH
751 try_at_start:
752 /* Even in this situation we may use MBOL flag if strpos is offset
753 wrt the start of the string. */
05b4157f 754 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 755 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
756 /* May be due to an implicit anchor of m{.*foo} */
757 && !(prog->reganch & ROPT_IMPLICIT))
758 {
cad2e5aa
JH
759 t = strpos;
760 goto find_anchor;
761 }
30944b6d 762 DEBUG_r( if (ml_anch)
f5952150
GS
763 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
764 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 765 );
2c2d71f5 766 success_at_start:
30944b6d 767 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 768 && prog->check_substr /* Could be deleted already */
cad2e5aa 769 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
770 && prog->check_substr == prog->float_substr)
771 {
cad2e5aa 772 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
774 SvREFCNT_dec(prog->check_substr);
775 prog->check_substr = Nullsv; /* disable */
776 prog->float_substr = Nullsv; /* clear */
5e39e1e5 777 check = Nullsv; /* abort */
cad2e5aa 778 s = strpos;
3cf5c195
IZ
779 /* XXXX This is a remnant of the old implementation. It
780 looks wasteful, since now INTUIT can use many
6eb5f6b9 781 other heuristics. */
cad2e5aa
JH
782 prog->reganch &= ~RE_USE_INTUIT;
783 }
784 else
785 s = strpos;
786 }
787
6eb5f6b9
JH
788 /* Last resort... */
789 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
790 if (prog->regstclass) {
791 /* minlen == 0 is possible if regstclass is \b or \B,
792 and the fixed substr is ''$.
793 Since minlen is already taken into account, s+1 is before strend;
794 accidentally, minlen >= 1 guaranties no false positives at s + 1
795 even for \b or \B. But (minlen? 1 : 0) below assumes that
796 regstclass does not come from lookahead... */
797 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
798 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 799 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 800 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 801 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 802 : 1);
6eb5f6b9 803 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
804 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
805 : (prog->float_substr
806 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
807 cl_l, strend)
808 : strend);
a1933d95 809 char *startpos = strbeg;
6eb5f6b9
JH
810
811 t = s;
76384e4a 812 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 813 PL_regdata = prog->data;
76384e4a
GS
814 PL_bostr = startpos;
815 }
f33976b4 816 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
817 if (!s) {
818#ifdef DEBUGGING
b7953727 819 char *what = 0;
6eb5f6b9
JH
820#endif
821 if (endpos == strend) {
822 DEBUG_r( PerlIO_printf(Perl_debug_log,
823 "Could not match STCLASS...\n") );
824 goto fail;
825 }
66e933ab
GS
826 DEBUG_r( PerlIO_printf(Perl_debug_log,
827 "This position contradicts STCLASS...\n") );
653099ff
GS
828 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
829 goto fail;
6eb5f6b9
JH
830 /* Contradict one of substrings */
831 if (prog->anchored_substr) {
6eb5f6b9
JH
832 if (prog->anchored_substr == check) {
833 DEBUG_r( what = "anchored" );
834 hop_and_restart:
1aa99e6b 835 s = HOP3c(t, 1, strend);
66e933ab
GS
836 if (s + start_shift + end_shift > strend) {
837 /* XXXX Should be taken into account earlier? */
838 DEBUG_r( PerlIO_printf(Perl_debug_log,
839 "Could not match STCLASS...\n") );
840 goto fail;
841 }
5e39e1e5
HS
842 if (!check)
843 goto giveup;
6eb5f6b9 844 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 845 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
846 what, (long)(s + start_shift - i_strpos)) );
847 goto restart;
848 }
66e933ab 849 /* Have both, check_string is floating */
6eb5f6b9
JH
850 if (t + start_shift >= check_at) /* Contradicts floating=check */
851 goto retry_floating_check;
852 /* Recheck anchored substring, but not floating... */
9041c2e3 853 s = check_at;
5e39e1e5
HS
854 if (!check)
855 goto giveup;
6eb5f6b9 856 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 857 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
858 (long)(other_last - i_strpos)) );
859 goto do_other_anchored;
860 }
60e71179
GS
861 /* Another way we could have checked stclass at the
862 current position only: */
863 if (ml_anch) {
864 s = t = t + 1;
5e39e1e5
HS
865 if (!check)
866 goto giveup;
60e71179 867 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
868 "Looking for /%s^%s/m starting at offset %ld...\n",
869 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 870 goto try_at_offset;
66e933ab 871 }
60e71179
GS
872 if (!prog->float_substr) /* Could have been deleted */
873 goto fail;
6eb5f6b9
JH
874 /* Check is floating subtring. */
875 retry_floating_check:
876 t = check_at - start_shift;
877 DEBUG_r( what = "floating" );
878 goto hop_and_restart;
879 }
b7953727
JH
880 if (t != s) {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 882 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
883 (long)(t - i_strpos), (long)(s - i_strpos))
884 );
885 }
886 else {
887 DEBUG_r(PerlIO_printf(Perl_debug_log,
888 "Does not contradict STCLASS...\n");
889 );
890 }
6eb5f6b9 891 }
5e39e1e5
HS
892 giveup:
893 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
894 PL_colors[4], (check ? "Guessed" : "Giving up"),
895 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 896 return s;
2c2d71f5
JH
897
898 fail_finish: /* Substring not found */
66e933ab
GS
899 if (prog->check_substr) /* could be removed already */
900 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 901 fail:
2c2d71f5 902 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
903 PL_colors[4],PL_colors[5]));
904 return Nullch;
905}
9661b544 906
6eb5f6b9 907/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
908STATIC char *
909S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 910{
6eb5f6b9
JH
911 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
912 char *m;
d8093b23
G
913 STRLEN ln;
914 unsigned int c1;
915 unsigned int c2;
6eb5f6b9
JH
916 char *e;
917 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 918 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 919
6eb5f6b9
JH
920 /* We know what class it must start with. */
921 switch (OP(c)) {
6eb5f6b9 922 case ANYOF:
a0ed51b3 923 while (s < strend) {
e0f9d4a8
JH
924 STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
925
926 if (reginclass(c, (U8*)s, do_utf8) ||
ebc501f0
JH
927 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
928 /* The assignment of 2 is intentional:
929 * for the sharp s, the skip is 2. */
930 (skip = SHARP_S_SKIP)
931 )) {
6eb5f6b9
JH
932 if (tmp && (norun || regtry(prog, s)))
933 goto got_it;
934 else
935 tmp = doevery;
a0ed51b3 936 }
e0f9d4a8
JH
937 else
938 tmp = 1;
939 s += skip;
a0d0e21e 940 }
6eb5f6b9 941 break;
f33976b4
DB
942 case CANY:
943 while (s < strend) {
944 if (tmp && (norun || regtry(prog, s)))
945 goto got_it;
946 else
947 tmp = doevery;
948 s++;
949 }
950 break;
6eb5f6b9
JH
951 case EXACTF:
952 m = STRING(c);
953 ln = STR_LEN(c);
1aa99e6b 954 if (UTF) {
a2a2844f 955 STRLEN ulen1, ulen2;
e7ae6809
JH
956 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
957 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
958
959 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
960 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
961
962 c1 = utf8_to_uvuni(tmpbuf1, 0);
963 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
964 }
965 else {
966 c1 = *(U8*)m;
967 c2 = PL_fold[c1];
968 }
6eb5f6b9
JH
969 goto do_exactf;
970 case EXACTFL:
971 m = STRING(c);
972 ln = STR_LEN(c);
d8093b23 973 c1 = *(U8*)m;
6eb5f6b9
JH
974 c2 = PL_fold_locale[c1];
975 do_exactf:
93c50829 976 e = do_utf8 ? s + ln : strend - ln;
b3c9acc1 977
6eb5f6b9
JH
978 if (norun && e < s)
979 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 980
60a8b682
JH
981 /* The idea in the EXACTF* cases is to first find the
982 * first character of the EXACTF* node and then, if
983 * necessary, case-insensitively compare the full
984 * text of the node. The c1 and c2 are the first
985 * characters (though in Unicode it gets a bit
986 * more complicated because there are more cases
987 * than just upper and lower: one is really supposed
988 * to use the so-called folding case for case-insensitive
989 * matching (called "loose matching" in Unicode). */
990
1aa99e6b 991 if (do_utf8) {
575cac57
JH
992 UV c, f;
993 U8 tmpbuf [UTF8_MAXLEN+1];
994 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
995 STRLEN len, foldlen;
d7f013c8 996
09091399 997 if (c1 == c2) {
1aa99e6b 998 while (s <= e) {
575cac57 999 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
1000 if ( c == c1
1001 && (ln == len ||
66423254
JH
1002 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1003 m, (char **)0, ln, UTF))
55da9344 1004 && (norun || regtry(prog, s)) )
1aa99e6b 1005 goto got_it;
80aecb99
JH
1006 else {
1007 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1008 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1009 if ( f != c
1010 && (f == c1 || f == c2)
1011 && (ln == foldlen ||
66423254
JH
1012 !ibcmp_utf8((char *) foldbuf,
1013 (char **)0, foldlen, do_utf8,
d07ddd77 1014 m,
66423254 1015 (char **)0, ln, UTF))
80aecb99
JH
1016 && (norun || regtry(prog, s)) )
1017 goto got_it;
1018 }
1aa99e6b
IH
1019 s += len;
1020 }
09091399
JH
1021 }
1022 else {
1aa99e6b 1023 while (s <= e) {
575cac57 1024 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1025
60a8b682 1026 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1027 * Note that not all the possible combinations
1028 * are handled here: some of them are handled
1029 * by the standard folding rules, and some of
1030 * them (the character class or ANYOF cases)
1031 * are handled during compiletime in
1032 * regexec.c:S_regclass(). */
880bd946
JH
1033 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1034 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1035 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1036
1037 if ( (c == c1 || c == c2)
1038 && (ln == len ||
66423254
JH
1039 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1040 m, (char **)0, ln, UTF))
55da9344 1041 && (norun || regtry(prog, s)) )
1aa99e6b 1042 goto got_it;
80aecb99
JH
1043 else {
1044 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1045 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1046 if ( f != c
1047 && (f == c1 || f == c2)
1048 && (ln == foldlen ||
a6872d42 1049 !ibcmp_utf8((char *) foldbuf,
66423254 1050 (char **)0, foldlen, do_utf8,
d07ddd77 1051 m,
66423254 1052 (char **)0, ln, UTF))
80aecb99
JH
1053 && (norun || regtry(prog, s)) )
1054 goto got_it;
1055 }
1aa99e6b
IH
1056 s += len;
1057 }
09091399 1058 }
1aa99e6b
IH
1059 }
1060 else {
1061 if (c1 == c2)
1062 while (s <= e) {
1063 if ( *(U8*)s == c1
1064 && (ln == 1 || !(OP(c) == EXACTF
1065 ? ibcmp(s, m, ln)
1066 : ibcmp_locale(s, m, ln)))
1067 && (norun || regtry(prog, s)) )
1068 goto got_it;
1069 s++;
1070 }
1071 else
1072 while (s <= e) {
1073 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1074 && (ln == 1 || !(OP(c) == EXACTF
1075 ? ibcmp(s, m, ln)
1076 : ibcmp_locale(s, m, ln)))
1077 && (norun || regtry(prog, s)) )
1078 goto got_it;
1079 s++;
1080 }
b3c9acc1
IZ
1081 }
1082 break;
bbce6d69 1083 case BOUNDL:
3280af22 1084 PL_reg_flags |= RF_tainted;
bbce6d69 1085 /* FALL THROUGH */
a0d0e21e 1086 case BOUND:
ffc61ed2 1087 if (do_utf8) {
12d33761 1088 if (s == PL_bostr)
ffc61ed2
JH
1089 tmp = '\n';
1090 else {
1aa99e6b 1091 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1092
0064a8a9
JH
1093 if (s > (char*)r)
1094 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1095 }
1096 tmp = ((OP(c) == BOUND ?
9041c2e3 1097 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1098 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1099 while (s < strend) {
1100 if (tmp == !(OP(c) == BOUND ?
3568d838 1101 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1102 isALNUM_LC_utf8((U8*)s)))
1103 {
1104 tmp = !tmp;
1105 if ((norun || regtry(prog, s)))
1106 goto got_it;
1107 }
1108 s += UTF8SKIP(s);
a687059c 1109 }
a0d0e21e 1110 }
667bb95a 1111 else {
12d33761 1112 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1113 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1114 while (s < strend) {
1115 if (tmp ==
1116 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1117 tmp = !tmp;
1118 if ((norun || regtry(prog, s)))
1119 goto got_it;
1120 }
1121 s++;
a0ed51b3 1122 }
a0ed51b3 1123 }
6eb5f6b9 1124 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1125 goto got_it;
1126 break;
bbce6d69 1127 case NBOUNDL:
3280af22 1128 PL_reg_flags |= RF_tainted;
bbce6d69 1129 /* FALL THROUGH */
a0d0e21e 1130 case NBOUND:
ffc61ed2 1131 if (do_utf8) {
12d33761 1132 if (s == PL_bostr)
ffc61ed2
JH
1133 tmp = '\n';
1134 else {
1aa99e6b 1135 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1136
0064a8a9
JH
1137 if (s > (char*)r)
1138 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1139 }
1140 tmp = ((OP(c) == NBOUND ?
9041c2e3 1141 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1142 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1143 while (s < strend) {
1144 if (tmp == !(OP(c) == NBOUND ?
3568d838 1145 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1146 isALNUM_LC_utf8((U8*)s)))
1147 tmp = !tmp;
1148 else if ((norun || regtry(prog, s)))
1149 goto got_it;
1150 s += UTF8SKIP(s);
1151 }
a0d0e21e 1152 }
667bb95a 1153 else {
12d33761 1154 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1155 tmp = ((OP(c) == NBOUND ?
1156 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1157 while (s < strend) {
1158 if (tmp ==
1159 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1160 tmp = !tmp;
1161 else if ((norun || regtry(prog, s)))
1162 goto got_it;
1163 s++;
1164 }
a0ed51b3 1165 }
6eb5f6b9 1166 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1167 goto got_it;
1168 break;
a0d0e21e 1169 case ALNUM:
ffc61ed2 1170 if (do_utf8) {
8269fa76 1171 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1172 while (s < strend) {
3568d838 1173 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1174 if (tmp && (norun || regtry(prog, s)))
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
bbce6d69 1179 else
ffc61ed2
JH
1180 tmp = 1;
1181 s += UTF8SKIP(s);
bbce6d69 1182 }
bbce6d69 1183 }
ffc61ed2
JH
1184 else {
1185 while (s < strend) {
1186 if (isALNUM(*s)) {
1187 if (tmp && (norun || regtry(prog, s)))
1188 goto got_it;
1189 else
1190 tmp = doevery;
1191 }
a0ed51b3 1192 else
ffc61ed2
JH
1193 tmp = 1;
1194 s++;
a0ed51b3 1195 }
a0ed51b3
LW
1196 }
1197 break;
bbce6d69 1198 case ALNUML:
3280af22 1199 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1200 if (do_utf8) {
1201 while (s < strend) {
1202 if (isALNUM_LC_utf8((U8*)s)) {
1203 if (tmp && (norun || regtry(prog, s)))
1204 goto got_it;
1205 else
1206 tmp = doevery;
1207 }
a687059c 1208 else
ffc61ed2
JH
1209 tmp = 1;
1210 s += UTF8SKIP(s);
a0d0e21e 1211 }
a0d0e21e 1212 }
ffc61ed2
JH
1213 else {
1214 while (s < strend) {
1215 if (isALNUM_LC(*s)) {
1216 if (tmp && (norun || regtry(prog, s)))
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
a0ed51b3 1221 else
ffc61ed2
JH
1222 tmp = 1;
1223 s++;
a0ed51b3 1224 }
a0ed51b3
LW
1225 }
1226 break;
a0d0e21e 1227 case NALNUM:
ffc61ed2 1228 if (do_utf8) {
8269fa76 1229 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1230 while (s < strend) {
3568d838 1231 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1232 if (tmp && (norun || regtry(prog, s)))
1233 goto got_it;
1234 else
1235 tmp = doevery;
1236 }
bbce6d69 1237 else
ffc61ed2
JH
1238 tmp = 1;
1239 s += UTF8SKIP(s);
bbce6d69 1240 }
bbce6d69 1241 }
ffc61ed2
JH
1242 else {
1243 while (s < strend) {
1244 if (!isALNUM(*s)) {
1245 if (tmp && (norun || regtry(prog, s)))
1246 goto got_it;
1247 else
1248 tmp = doevery;
1249 }
a0ed51b3 1250 else
ffc61ed2
JH
1251 tmp = 1;
1252 s++;
a0ed51b3 1253 }
a0ed51b3
LW
1254 }
1255 break;
bbce6d69 1256 case NALNUML:
3280af22 1257 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1258 if (do_utf8) {
1259 while (s < strend) {
1260 if (!isALNUM_LC_utf8((U8*)s)) {
1261 if (tmp && (norun || regtry(prog, s)))
1262 goto got_it;
1263 else
1264 tmp = doevery;
1265 }
a687059c 1266 else
ffc61ed2
JH
1267 tmp = 1;
1268 s += UTF8SKIP(s);
a687059c 1269 }
a0d0e21e 1270 }
ffc61ed2
JH
1271 else {
1272 while (s < strend) {
1273 if (!isALNUM_LC(*s)) {
1274 if (tmp && (norun || regtry(prog, s)))
1275 goto got_it;
1276 else
1277 tmp = doevery;
1278 }
a0ed51b3 1279 else
ffc61ed2
JH
1280 tmp = 1;
1281 s++;
a0ed51b3 1282 }
a0ed51b3
LW
1283 }
1284 break;
a0d0e21e 1285 case SPACE:
ffc61ed2 1286 if (do_utf8) {
8269fa76 1287 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1288 while (s < strend) {
3568d838 1289 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1290 if (tmp && (norun || regtry(prog, s)))
1291 goto got_it;
1292 else
1293 tmp = doevery;
1294 }
a0d0e21e 1295 else
ffc61ed2
JH
1296 tmp = 1;
1297 s += UTF8SKIP(s);
2304df62 1298 }
a0d0e21e 1299 }
ffc61ed2
JH
1300 else {
1301 while (s < strend) {
1302 if (isSPACE(*s)) {
1303 if (tmp && (norun || regtry(prog, s)))
1304 goto got_it;
1305 else
1306 tmp = doevery;
1307 }
a0ed51b3 1308 else
ffc61ed2
JH
1309 tmp = 1;
1310 s++;
a0ed51b3 1311 }
a0ed51b3
LW
1312 }
1313 break;
bbce6d69 1314 case SPACEL:
3280af22 1315 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1316 if (do_utf8) {
1317 while (s < strend) {
1318 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1319 if (tmp && (norun || regtry(prog, s)))
1320 goto got_it;
1321 else
1322 tmp = doevery;
1323 }
bbce6d69 1324 else
ffc61ed2
JH
1325 tmp = 1;
1326 s += UTF8SKIP(s);
bbce6d69 1327 }
bbce6d69 1328 }
ffc61ed2
JH
1329 else {
1330 while (s < strend) {
1331 if (isSPACE_LC(*s)) {
1332 if (tmp && (norun || regtry(prog, s)))
1333 goto got_it;
1334 else
1335 tmp = doevery;
1336 }
a0ed51b3 1337 else
ffc61ed2
JH
1338 tmp = 1;
1339 s++;
a0ed51b3 1340 }
a0ed51b3
LW
1341 }
1342 break;
a0d0e21e 1343 case NSPACE:
ffc61ed2 1344 if (do_utf8) {
8269fa76 1345 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1346 while (s < strend) {
3568d838 1347 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1348 if (tmp && (norun || regtry(prog, s)))
1349 goto got_it;
1350 else
1351 tmp = doevery;
1352 }
a0d0e21e 1353 else
ffc61ed2
JH
1354 tmp = 1;
1355 s += UTF8SKIP(s);
a687059c 1356 }
a0d0e21e 1357 }
ffc61ed2
JH
1358 else {
1359 while (s < strend) {
1360 if (!isSPACE(*s)) {
1361 if (tmp && (norun || regtry(prog, s)))
1362 goto got_it;
1363 else
1364 tmp = doevery;
1365 }
a0ed51b3 1366 else
ffc61ed2
JH
1367 tmp = 1;
1368 s++;
a0ed51b3 1369 }
a0ed51b3
LW
1370 }
1371 break;
bbce6d69 1372 case NSPACEL:
3280af22 1373 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1374 if (do_utf8) {
1375 while (s < strend) {
1376 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1377 if (tmp && (norun || regtry(prog, s)))
1378 goto got_it;
1379 else
1380 tmp = doevery;
1381 }
bbce6d69 1382 else
ffc61ed2
JH
1383 tmp = 1;
1384 s += UTF8SKIP(s);
bbce6d69 1385 }
bbce6d69 1386 }
ffc61ed2
JH
1387 else {
1388 while (s < strend) {
1389 if (!isSPACE_LC(*s)) {
1390 if (tmp && (norun || regtry(prog, s)))
1391 goto got_it;
1392 else
1393 tmp = doevery;
1394 }
a0ed51b3 1395 else
ffc61ed2
JH
1396 tmp = 1;
1397 s++;
a0ed51b3 1398 }
a0ed51b3
LW
1399 }
1400 break;
a0d0e21e 1401 case DIGIT:
ffc61ed2 1402 if (do_utf8) {
8269fa76 1403 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1404 while (s < strend) {
3568d838 1405 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1406 if (tmp && (norun || regtry(prog, s)))
1407 goto got_it;
1408 else
1409 tmp = doevery;
1410 }
a0d0e21e 1411 else
ffc61ed2
JH
1412 tmp = 1;
1413 s += UTF8SKIP(s);
2b69d0c2 1414 }
a0d0e21e 1415 }
ffc61ed2
JH
1416 else {
1417 while (s < strend) {
1418 if (isDIGIT(*s)) {
1419 if (tmp && (norun || regtry(prog, s)))
1420 goto got_it;
1421 else
1422 tmp = doevery;
1423 }
a0ed51b3 1424 else
ffc61ed2
JH
1425 tmp = 1;
1426 s++;
a0ed51b3 1427 }
a0ed51b3
LW
1428 }
1429 break;
b8c5462f
JH
1430 case DIGITL:
1431 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1432 if (do_utf8) {
1433 while (s < strend) {
1434 if (isDIGIT_LC_utf8((U8*)s)) {
1435 if (tmp && (norun || regtry(prog, s)))
1436 goto got_it;
1437 else
1438 tmp = doevery;
1439 }
b8c5462f 1440 else
ffc61ed2
JH
1441 tmp = 1;
1442 s += UTF8SKIP(s);
b8c5462f 1443 }
b8c5462f 1444 }
ffc61ed2
JH
1445 else {
1446 while (s < strend) {
1447 if (isDIGIT_LC(*s)) {
1448 if (tmp && (norun || regtry(prog, s)))
1449 goto got_it;
1450 else
1451 tmp = doevery;
1452 }
b8c5462f 1453 else
ffc61ed2
JH
1454 tmp = 1;
1455 s++;
b8c5462f 1456 }
b8c5462f
JH
1457 }
1458 break;
a0d0e21e 1459 case NDIGIT:
ffc61ed2 1460 if (do_utf8) {
8269fa76 1461 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1462 while (s < strend) {
3568d838 1463 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1464 if (tmp && (norun || regtry(prog, s)))
1465 goto got_it;
1466 else
1467 tmp = doevery;
1468 }
a0d0e21e 1469 else
ffc61ed2
JH
1470 tmp = 1;
1471 s += UTF8SKIP(s);
a687059c 1472 }
a0d0e21e 1473 }
ffc61ed2
JH
1474 else {
1475 while (s < strend) {
1476 if (!isDIGIT(*s)) {
1477 if (tmp && (norun || regtry(prog, s)))
1478 goto got_it;
1479 else
1480 tmp = doevery;
1481 }
a0ed51b3 1482 else
ffc61ed2
JH
1483 tmp = 1;
1484 s++;
a0ed51b3 1485 }
a0ed51b3
LW
1486 }
1487 break;
b8c5462f
JH
1488 case NDIGITL:
1489 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1490 if (do_utf8) {
1491 while (s < strend) {
1492 if (!isDIGIT_LC_utf8((U8*)s)) {
1493 if (tmp && (norun || regtry(prog, s)))
1494 goto got_it;
1495 else
1496 tmp = doevery;
1497 }
b8c5462f 1498 else
ffc61ed2
JH
1499 tmp = 1;
1500 s += UTF8SKIP(s);
b8c5462f 1501 }
a0ed51b3 1502 }
ffc61ed2
JH
1503 else {
1504 while (s < strend) {
1505 if (!isDIGIT_LC(*s)) {
1506 if (tmp && (norun || regtry(prog, s)))
1507 goto got_it;
1508 else
1509 tmp = doevery;
1510 }
cf93c79d 1511 else
ffc61ed2
JH
1512 tmp = 1;
1513 s++;
b8c5462f 1514 }
b8c5462f
JH
1515 }
1516 break;
b3c9acc1 1517 default:
3c3eec57
GS
1518 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1519 break;
d6a28714 1520 }
6eb5f6b9
JH
1521 return 0;
1522 got_it:
1523 return s;
1524}
1525
1526/*
1527 - regexec_flags - match a regexp against a string
1528 */
1529I32
1530Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1531 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1532/* strend: pointer to null at end of string */
1533/* strbeg: real beginning of string */
1534/* minend: end of match must be >=minend after stringarg. */
1535/* data: May be used for some additional optimizations. */
1536/* nosave: For optimizations. */
1537{
6eb5f6b9
JH
1538 register char *s;
1539 register regnode *c;
1540 register char *startpos = stringarg;
6eb5f6b9
JH
1541 I32 minlen; /* must match at least this many chars */
1542 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1543 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1544 constant substr. */ /* CC */
1545 I32 end_shift = 0; /* Same for the end. */ /* CC */
1546 I32 scream_pos = -1; /* Internal iterator of scream. */
1547 char *scream_olds;
1548 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1549 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1550#ifdef DEBUGGING
9e55ce06
JH
1551 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1552 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1553#endif
6eb5f6b9
JH
1554
1555 PL_regcc = 0;
1556
1557 cache_re(prog);
1558#ifdef DEBUGGING
aea4f609 1559 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1560#endif
1561
1562 /* Be paranoid... */
1563 if (prog == NULL || startpos == NULL) {
1564 Perl_croak(aTHX_ "NULL regexp parameter");
1565 return 0;
1566 }
1567
1568 minlen = prog->minlen;
61a36c01 1569 if (strend - startpos < minlen) {
a72c7584
JH
1570 DEBUG_r(PerlIO_printf(Perl_debug_log,
1571 "String too short [regexec_flags]...\n"));
1572 goto phooey;
1aa99e6b 1573 }
6eb5f6b9 1574
6eb5f6b9
JH
1575 /* Check validity of program. */
1576 if (UCHARAT(prog->program) != REG_MAGIC) {
1577 Perl_croak(aTHX_ "corrupted regexp program");
1578 }
1579
1580 PL_reg_flags = 0;
1581 PL_reg_eval_set = 0;
1582 PL_reg_maxiter = 0;
1583
1584 if (prog->reganch & ROPT_UTF8)
1585 PL_reg_flags |= RF_utf8;
1586
1587 /* Mark beginning of line for ^ and lookbehind. */
1588 PL_regbol = startpos;
1589 PL_bostr = strbeg;
1590 PL_reg_sv = sv;
1591
1592 /* Mark end of line for $ (and such) */
1593 PL_regeol = strend;
1594
1595 /* see how far we have to get to not match where we matched before */
1596 PL_regtill = startpos+minend;
1597
1598 /* We start without call_cc context. */
1599 PL_reg_call_cc = 0;
1600
1601 /* If there is a "must appear" string, look for it. */
1602 s = startpos;
1603
1604 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1605 MAGIC *mg;
1606
1607 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1608 PL_reg_ganch = startpos;
1609 else if (sv && SvTYPE(sv) >= SVt_PVMG
1610 && SvMAGIC(sv)
14befaf4
DM
1611 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1612 && mg->mg_len >= 0) {
6eb5f6b9
JH
1613 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1614 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1615 if (s > PL_reg_ganch)
6eb5f6b9
JH
1616 goto phooey;
1617 s = PL_reg_ganch;
1618 }
1619 }
1620 else /* pos() not defined */
1621 PL_reg_ganch = strbeg;
1622 }
1623
699c3c34
JH
1624 if (do_utf8 == (UTF!=0) &&
1625 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1626 re_scream_pos_data d;
1627
1628 d.scream_olds = &scream_olds;
1629 d.scream_pos = &scream_pos;
1630 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1631 if (!s) {
1632 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1633 goto phooey; /* not present */
3fa9c3d7 1634 }
6eb5f6b9
JH
1635 }
1636
2a782b5b 1637 DEBUG_r({
9e55ce06
JH
1638 char *s0 = UTF ?
1639 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1640 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1641 prog->precomp;
1642 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1643 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1644 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1645 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1646 if (!PL_colorset)
1647 reginitcolors();
1648 PerlIO_printf(Perl_debug_log,
9e55ce06 1649 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1650 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1651 len0, len0, s0,
2a782b5b 1652 PL_colors[1],
9e55ce06 1653 len0 > 60 ? "..." : "",
2a782b5b 1654 PL_colors[0],
9e55ce06
JH
1655 (int)(len1 > 60 ? 60 : len1),
1656 s1, PL_colors[1],
1657 (len1 > 60 ? "..." : "")
2a782b5b
JH
1658 );
1659 });
6eb5f6b9
JH
1660
1661 /* Simplest case: anchored match need be tried only once. */
1662 /* [unless only anchor is BOL and multiline is set] */
1663 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1664 if (s == startpos && regtry(prog, startpos))
1665 goto got_it;
1666 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1667 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1668 {
1669 char *end;
1670
1671 if (minlen)
1672 dontbother = minlen - 1;
1aa99e6b 1673 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1674 /* for multiline we only have to try after newlines */
1675 if (prog->check_substr) {
1676 if (s == startpos)
1677 goto after_try;
1678 while (1) {
1679 if (regtry(prog, s))
1680 goto got_it;
1681 after_try:
1682 if (s >= end)
1683 goto phooey;
1684 if (prog->reganch & RE_USE_INTUIT) {
1685 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1686 if (!s)
1687 goto phooey;
1688 }
1689 else
1690 s++;
1691 }
1692 } else {
1693 if (s > startpos)
1694 s--;
1695 while (s < end) {
1696 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1697 if (regtry(prog, s))
1698 goto got_it;
1699 }
1700 }
1701 }
1702 }
1703 goto phooey;
1704 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1705 if (regtry(prog, PL_reg_ganch))
1706 goto got_it;
1707 goto phooey;
1708 }
1709
1710 /* Messy cases: unanchored match. */
9041c2e3 1711 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1712 /* we have /x+whatever/ */
1713 /* it must be a one character string (XXXX Except UTF?) */
1714 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1715#ifdef DEBUGGING
1716 int did_match = 0;
1717#endif
1718
1aa99e6b 1719 if (do_utf8) {
6eb5f6b9
JH
1720 while (s < strend) {
1721 if (*s == ch) {
bf93d4cc 1722 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1723 if (regtry(prog, s)) goto got_it;
1724 s += UTF8SKIP(s);
1725 while (s < strend && *s == ch)
1726 s += UTF8SKIP(s);
1727 }
1728 s += UTF8SKIP(s);
1729 }
1730 }
1731 else {
1732 while (s < strend) {
1733 if (*s == ch) {
bf93d4cc 1734 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1735 if (regtry(prog, s)) goto got_it;
1736 s++;
1737 while (s < strend && *s == ch)
1738 s++;
1739 }
1740 s++;
1741 }
1742 }
b7953727 1743 DEBUG_r(if (!did_match)
bf93d4cc 1744 PerlIO_printf(Perl_debug_log,
b7953727
JH
1745 "Did not find anchored character...\n")
1746 );
6eb5f6b9
JH
1747 }
1748 /*SUPPRESS 560*/
1aa99e6b
IH
1749 else if (do_utf8 == (UTF!=0) &&
1750 (prog->anchored_substr != Nullsv
9041c2e3 1751 || (prog->float_substr != Nullsv
1aa99e6b 1752 && prog->float_max_offset < strend - s))) {
9041c2e3 1753 SV *must = prog->anchored_substr
6eb5f6b9 1754 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1755 I32 back_max =
6eb5f6b9 1756 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1757 I32 back_min =
6eb5f6b9 1758 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1759 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1760 -(I32)(CHR_SVLEN(must)
1aa99e6b 1761 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1762 char *last1; /* Last position checked before */
bf93d4cc
GS
1763#ifdef DEBUGGING
1764 int did_match = 0;
1765#endif
6eb5f6b9
JH
1766
1767 if (s > PL_bostr)
1768 last1 = HOPc(s, -1);
1769 else
1770 last1 = s - 1; /* bogus */
1771
1772 /* XXXX check_substr already used to find `s', can optimize if
1773 check_substr==must. */
1774 scream_pos = -1;
1775 dontbother = end_shift;
1776 strend = HOPc(strend, -dontbother);
1777 while ( (s <= last) &&
9041c2e3 1778 ((flags & REXEC_SCREAM)
1aa99e6b 1779 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1780 end_shift, &scream_pos, 0))
1aa99e6b 1781 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1782 (unsigned char*)strend, must,
6eb5f6b9 1783 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1784 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1785 if (HOPc(s, -back_max) > last1) {
1786 last1 = HOPc(s, -back_min);
1787 s = HOPc(s, -back_max);
1788 }
1789 else {
1790 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1791
1792 last1 = HOPc(s, -back_min);
1793 s = t;
1794 }
1aa99e6b 1795 if (do_utf8) {
6eb5f6b9
JH
1796 while (s <= last1) {
1797 if (regtry(prog, s))
1798 goto got_it;
1799 s += UTF8SKIP(s);
1800 }
1801 }
1802 else {
1803 while (s <= last1) {
1804 if (regtry(prog, s))
1805 goto got_it;
1806 s++;
1807 }
1808 }
1809 }
b7953727
JH
1810 DEBUG_r(if (!did_match)
1811 PerlIO_printf(Perl_debug_log,
1812 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1813 ((must == prog->anchored_substr)
1814 ? "anchored" : "floating"),
1815 PL_colors[0],
1816 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1817 SvPVX(must),
b7953727
JH
1818 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1819 );
6eb5f6b9
JH
1820 goto phooey;
1821 }
155aba94 1822 else if ((c = prog->regstclass)) {
66e933ab
GS
1823 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1824 /* don't bother with what can't match */
6eb5f6b9 1825 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1826 DEBUG_r({
1827 SV *prop = sv_newmortal();
9e55ce06
JH
1828 char *s0;
1829 char *s1;
1830 int len0;
1831 int len1;
1832
ffc61ed2 1833 regprop(prop, c);
9e55ce06
JH
1834 s0 = UTF ?
1835 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1836 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1837 SvPVX(prop);
1838 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1839 s1 = UTF ?
c728cb41 1840 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1841 len1 = UTF ? SvCUR(dsv1) : strend - s;
1842 PerlIO_printf(Perl_debug_log,
1843 "Matching stclass `%*.*s' against `%*.*s'\n",
1844 len0, len0, s0,
1845 len1, len1, s1);
ffc61ed2 1846 });
6eb5f6b9
JH
1847 if (find_byclass(prog, c, s, strend, startpos, 0))
1848 goto got_it;
bf93d4cc 1849 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1850 }
1851 else {
1852 dontbother = 0;
1853 if (prog->float_substr != Nullsv) { /* Trim the end. */
1854 char *last;
d6a28714
JH
1855
1856 if (flags & REXEC_SCREAM) {
1857 last = screaminstr(sv, prog->float_substr, s - strbeg,
1858 end_shift, &scream_pos, 1); /* last one */
1859 if (!last)
ffc61ed2 1860 last = scream_olds; /* Only one occurrence. */
b8c5462f 1861 }
d6a28714
JH
1862 else {
1863 STRLEN len;
1864 char *little = SvPV(prog->float_substr, len);
1865
1866 if (SvTAIL(prog->float_substr)) {
1867 if (memEQ(strend - len + 1, little, len - 1))
1868 last = strend - len + 1;
1869 else if (!PL_multiline)
9041c2e3 1870 last = memEQ(strend - len, little, len)
d6a28714 1871 ? strend - len : Nullch;
b8c5462f 1872 else
d6a28714
JH
1873 goto find_last;
1874 } else {
1875 find_last:
9041c2e3 1876 if (len)
d6a28714 1877 last = rninstr(s, strend, little, little + len);
b8c5462f 1878 else
d6a28714 1879 last = strend; /* matching `$' */
b8c5462f 1880 }
b8c5462f 1881 }
bf93d4cc
GS
1882 if (last == NULL) {
1883 DEBUG_r(PerlIO_printf(Perl_debug_log,
1884 "%sCan't trim the tail, match fails (should not happen)%s\n",
1885 PL_colors[4],PL_colors[5]));
1886 goto phooey; /* Should not happen! */
1887 }
d6a28714
JH
1888 dontbother = strend - last + prog->float_min_offset;
1889 }
1890 if (minlen && (dontbother < minlen))
1891 dontbother = minlen - 1;
1892 strend -= dontbother; /* this one's always in bytes! */
1893 /* We don't know much -- general case. */
1aa99e6b 1894 if (do_utf8) {
d6a28714
JH
1895 for (;;) {
1896 if (regtry(prog, s))
1897 goto got_it;
1898 if (s >= strend)
1899 break;
b8c5462f 1900 s += UTF8SKIP(s);
d6a28714
JH
1901 };
1902 }
1903 else {
1904 do {
1905 if (regtry(prog, s))
1906 goto got_it;
1907 } while (s++ < strend);
1908 }
1909 }
1910
1911 /* Failure. */
1912 goto phooey;
1913
1914got_it:
1915 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1916
1917 if (PL_reg_eval_set) {
1918 /* Preserve the current value of $^R */
1919 if (oreplsv != GvSV(PL_replgv))
1920 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1921 restored, the value remains
1922 the same. */
acfe0abc 1923 restore_pos(aTHX_ 0);
d6a28714
JH
1924 }
1925
1926 /* make sure $`, $&, $', and $digit will work later */
1927 if ( !(flags & REXEC_NOT_FIRST) ) {
1928 if (RX_MATCH_COPIED(prog)) {
1929 Safefree(prog->subbeg);
1930 RX_MATCH_COPIED_off(prog);
1931 }
1932 if (flags & REXEC_COPY_STR) {
1933 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1934
1935 s = savepvn(strbeg, i);
1936 prog->subbeg = s;
1937 prog->sublen = i;
1938 RX_MATCH_COPIED_on(prog);
1939 }
1940 else {
1941 prog->subbeg = strbeg;
1942 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1943 }
1944 }
9041c2e3 1945
d6a28714
JH
1946 return 1;
1947
1948phooey:
bf93d4cc
GS
1949 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1950 PL_colors[4],PL_colors[5]));
d6a28714 1951 if (PL_reg_eval_set)
acfe0abc 1952 restore_pos(aTHX_ 0);
d6a28714
JH
1953 return 0;
1954}
1955
1956/*
1957 - regtry - try match at specific point
1958 */
1959STATIC I32 /* 0 failure, 1 success */
1960S_regtry(pTHX_ regexp *prog, char *startpos)
1961{
d6a28714
JH
1962 register I32 i;
1963 register I32 *sp;
1964 register I32 *ep;
1965 CHECKPOINT lastcp;
1966
02db2b7b
IZ
1967#ifdef DEBUGGING
1968 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1969#endif
d6a28714
JH
1970 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1971 MAGIC *mg;
1972
1973 PL_reg_eval_set = RS_init;
1974 DEBUG_r(DEBUG_s(
b900a521
JH
1975 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1976 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1977 ));
e8347627 1978 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1979 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1980 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1981 SAVETMPS;
1982 /* Apparently this is not needed, judging by wantarray. */
e8347627 1983 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1984 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1985
1986 if (PL_reg_sv) {
1987 /* Make $_ available to executed code. */
1988 if (PL_reg_sv != DEFSV) {
4d1ff10f 1989 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1990 SAVESPTR(DEFSV);
1991 DEFSV = PL_reg_sv;
b8c5462f 1992 }
d6a28714 1993
9041c2e3 1994 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1995 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1996 /* prepare for quick setting of pos */
14befaf4
DM
1997 sv_magic(PL_reg_sv, (SV*)0,
1998 PERL_MAGIC_regex_global, Nullch, 0);
1999 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 2000 mg->mg_len = -1;
b8c5462f 2001 }
d6a28714
JH
2002 PL_reg_magic = mg;
2003 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2004 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2005 }
09687e5a 2006 if (!PL_reg_curpm) {
0f79a09d 2007 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
2008#ifdef USE_ITHREADS
2009 {
2010 SV* repointer = newSViv(0);
577e12cc 2011 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2012 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2013 av_push(PL_regex_padav,repointer);
2014 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2015 PL_regex_pad = AvARRAY(PL_regex_padav);
2016 }
2017#endif
2018 }
aaa362c4 2019 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2020 PL_reg_oldcurpm = PL_curpm;
2021 PL_curpm = PL_reg_curpm;
2022 if (RX_MATCH_COPIED(prog)) {
2023 /* Here is a serious problem: we cannot rewrite subbeg,
2024 since it may be needed if this match fails. Thus
2025 $` inside (?{}) could fail... */
2026 PL_reg_oldsaved = prog->subbeg;
2027 PL_reg_oldsavedlen = prog->sublen;
2028 RX_MATCH_COPIED_off(prog);
2029 }
2030 else
2031 PL_reg_oldsaved = Nullch;
2032 prog->subbeg = PL_bostr;
2033 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2034 }
2035 prog->startp[0] = startpos - PL_bostr;
2036 PL_reginput = startpos;
2037 PL_regstartp = prog->startp;
2038 PL_regendp = prog->endp;
2039 PL_reglastparen = &prog->lastparen;
a01268b5 2040 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
2041 prog->lastparen = 0;
2042 PL_regsize = 0;
2043 DEBUG_r(PL_reg_starttry = startpos);
2044 if (PL_reg_start_tmpl <= prog->nparens) {
2045 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2046 if(PL_reg_start_tmp)
2047 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2048 else
2049 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2050 }
2051
128e8167
JH
2052#ifdef DEBUGGING
2053 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2054 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2055 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2056#endif
2057
d6a28714
JH
2058 /* XXXX What this code is doing here?!!! There should be no need
2059 to do this again and again, PL_reglastparen should take care of
3dd2943c 2060 this! --ilya*/
dafc8851
JH
2061
2062 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2063 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2064 * PL_reglastparen), is not needed at all by the test suite
2065 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2066 * enough, for building DynaLoader, or otherwise this
2067 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2068 * will happen. Meanwhile, this code *is* needed for the
2069 * above-mentioned test suite tests to succeed. The common theme
2070 * on those tests seems to be returning null fields from matches.
2071 * --jhi */
dafc8851 2072#if 1
d6a28714
JH
2073 sp = prog->startp;
2074 ep = prog->endp;
2075 if (prog->nparens) {
09e8ae3b 2076 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2077 *++sp = -1;
2078 *++ep = -1;
2079 }
2080 }
dafc8851 2081#endif
02db2b7b 2082 REGCP_SET(lastcp);
d6a28714
JH
2083 if (regmatch(prog->program + 1)) {
2084 prog->endp[0] = PL_reginput - PL_bostr;
2085 return 1;
2086 }
02db2b7b 2087 REGCP_UNWIND(lastcp);
d6a28714
JH
2088 return 0;
2089}
2090
02db2b7b
IZ
2091#define RE_UNWIND_BRANCH 1
2092#define RE_UNWIND_BRANCHJ 2
2093
2094union re_unwind_t;
2095
2096typedef struct { /* XX: makes sense to enlarge it... */
2097 I32 type;
2098 I32 prev;
2099 CHECKPOINT lastcp;
2100} re_unwind_generic_t;
2101
2102typedef struct {
2103 I32 type;
2104 I32 prev;
2105 CHECKPOINT lastcp;
2106 I32 lastparen;
2107 regnode *next;
2108 char *locinput;
2109 I32 nextchr;
2110#ifdef DEBUGGING
2111 int regindent;
2112#endif
2113} re_unwind_branch_t;
2114
2115typedef union re_unwind_t {
2116 I32 type;
2117 re_unwind_generic_t generic;
2118 re_unwind_branch_t branch;
2119} re_unwind_t;
2120
8ba1375e
MJD
2121#define sayYES goto yes
2122#define sayNO goto no
e0f9d4a8 2123#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2124#define sayYES_FINAL goto yes_final
2125#define sayYES_LOUD goto yes_loud
2126#define sayNO_FINAL goto no_final
2127#define sayNO_SILENT goto do_no
2128#define saySAME(x) if (x) goto yes; else goto no
2129
2130#define REPORT_CODE_OFF 24
2131
d6a28714
JH
2132/*
2133 - regmatch - main matching routine
2134 *
2135 * Conceptually the strategy is simple: check to see whether the current
2136 * node matches, call self recursively to see whether the rest matches,
2137 * and then act accordingly. In practice we make some effort to avoid
2138 * recursion, in particular by going through "ordinary" nodes (that don't
2139 * need to know whether the rest of the match failed) by a loop instead of
2140 * by recursion.
2141 */
2142/* [lwall] I've hoisted the register declarations to the outer block in order to
2143 * maybe save a little bit of pushing and popping on the stack. It also takes
2144 * advantage of machines that use a register save mask on subroutine entry.
2145 */
2146STATIC I32 /* 0 failure, 1 success */
2147S_regmatch(pTHX_ regnode *prog)
2148{
d6a28714
JH
2149 register regnode *scan; /* Current node. */
2150 regnode *next; /* Next node. */
2151 regnode *inner; /* Next node in internal branch. */
2152 register I32 nextchr; /* renamed nextchr - nextchar colides with
2153 function of same name */
2154 register I32 n; /* no or next */
b7953727
JH
2155 register I32 ln = 0; /* len or last */
2156 register char *s = Nullch; /* operand or save */
d6a28714 2157 register char *locinput = PL_reginput;
b7953727 2158 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2159 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2160 I32 unwind = 0;
b7953727 2161#if 0
02db2b7b 2162 I32 firstcp = PL_savestack_ix;
b7953727 2163#endif
53c4c00c 2164 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2165#ifdef DEBUGGING
ce333219
JH
2166 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2167 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2168 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2169#endif
02db2b7b 2170
d6a28714
JH
2171#ifdef DEBUGGING
2172 PL_regindent++;
2173#endif
2174
2175 /* Note that nextchr is a byte even in UTF */
2176 nextchr = UCHARAT(locinput);
2177 scan = prog;
2178 while (scan != NULL) {
8ba1375e 2179
2a782b5b 2180 DEBUG_r( {
d6a28714
JH
2181 SV *prop = sv_newmortal();
2182 int docolor = *PL_colors[0];
2183 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2184 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2185 /* The part of the string before starttry has one color
2186 (pref0_len chars), between starttry and current
2187 position another one (pref_len - pref0_len chars),
2188 after the current position the third one.
2189 We assume that pref0_len <= pref_len, otherwise we
2190 decrease pref0_len. */
9041c2e3 2191 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2192 ? (5 + taill) - l : locinput - PL_bostr;
2193 int pref0_len;
d6a28714 2194
df1ffd02 2195 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2196 pref_len++;
2197 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2198 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2199 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2200 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2201 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2202 l--;
d6a28714
JH
2203 if (pref0_len < 0)
2204 pref0_len = 0;
2205 if (pref0_len > pref_len)
2206 pref0_len = pref_len;
2207 regprop(prop, scan);
2a782b5b
JH
2208 {
2209 char *s0 =
df1ffd02 2210 do_utf8 ?
2a782b5b 2211 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2212 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2213 locinput - pref_len;
df1ffd02
JH
2214 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2215 char *s1 = do_utf8 ?
2a782b5b 2216 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2217 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2218 locinput - pref_len + pref0_len;
df1ffd02
JH
2219 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2220 char *s2 = do_utf8 ?
2a782b5b 2221 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2222 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2223 locinput;
df1ffd02 2224 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2225 PerlIO_printf(Perl_debug_log,
2226 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2227 (IV)(locinput - PL_bostr),
2228 PL_colors[4],
2229 len0, s0,
2230 PL_colors[5],
2231 PL_colors[2],
2232 len1, s1,
2233 PL_colors[3],
2234 (docolor ? "" : "> <"),
2235 PL_colors[0],
2236 len2, s2,
2237 PL_colors[1],
2238 15 - l - pref_len + 1,
2239 "",
2240 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2241 SvPVX(prop));
2242 }
2243 });
d6a28714
JH
2244
2245 next = scan + NEXT_OFF(scan);
2246 if (next == scan)
2247 next = NULL;
2248
2249 switch (OP(scan)) {
2250 case BOL:
12d33761
HS
2251 if (locinput == PL_bostr || (PL_multiline &&
2252 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2253 {
2254 /* regtill = regbol; */
b8c5462f
JH
2255 break;
2256 }
d6a28714
JH
2257 sayNO;
2258 case MBOL:
12d33761
HS
2259 if (locinput == PL_bostr ||
2260 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2261 {
b8c5462f
JH
2262 break;
2263 }
d6a28714
JH
2264 sayNO;
2265 case SBOL:
c2a73568 2266 if (locinput == PL_bostr)
b8c5462f 2267 break;
d6a28714
JH
2268 sayNO;
2269 case GPOS:
2270 if (locinput == PL_reg_ganch)
2271 break;
2272 sayNO;
2273 case EOL:
2274 if (PL_multiline)
2275 goto meol;
2276 else
2277 goto seol;
2278 case MEOL:
2279 meol:
2280 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2281 sayNO;
b8c5462f 2282 break;
d6a28714
JH
2283 case SEOL:
2284 seol:
2285 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2286 sayNO;
d6a28714 2287 if (PL_regeol - locinput > 1)
b8c5462f 2288 sayNO;
b8c5462f 2289 break;
d6a28714
JH
2290 case EOS:
2291 if (PL_regeol != locinput)
b8c5462f 2292 sayNO;
d6a28714 2293 break;
ffc61ed2 2294 case SANY:
d6a28714 2295 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2296 sayNO;
f33976b4
DB
2297 if (do_utf8) {
2298 locinput += PL_utf8skip[nextchr];
2299 if (locinput > PL_regeol)
2300 sayNO;
2301 nextchr = UCHARAT(locinput);
2302 }
2303 else
2304 nextchr = UCHARAT(++locinput);
2305 break;
2306 case CANY:
2307 if (!nextchr && locinput >= PL_regeol)
2308 sayNO;
b8c5462f 2309 nextchr = UCHARAT(++locinput);
a0d0e21e 2310 break;
ffc61ed2 2311 case REG_ANY:
1aa99e6b
IH
2312 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2313 sayNO;
2314 if (do_utf8) {
b8c5462f 2315 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2316 if (locinput > PL_regeol)
2317 sayNO;
a0ed51b3 2318 nextchr = UCHARAT(locinput);
a0ed51b3 2319 }
1aa99e6b
IH
2320 else
2321 nextchr = UCHARAT(++locinput);
a0ed51b3 2322 break;
d6a28714 2323 case EXACT:
cd439c50
IZ
2324 s = STRING(scan);
2325 ln = STR_LEN(scan);
1aa99e6b 2326 if (do_utf8 != (UTF!=0)) {
bc517b45 2327 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2328 char *l = locinput;
2329 char *e = s + ln;
bc517b45 2330 STRLEN ulen;
a72c7584 2331
5ff6fc6d
JH
2332 if (do_utf8) {
2333 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2334 while (s < e) {
2335 if (l >= PL_regeol)
5ff6fc6d
JH
2336 sayNO;
2337 if (NATIVE_TO_UNI(*(U8*)s) !=
2c237f47 2338 utf8_to_uvuni((U8*)l, &ulen))
5ff6fc6d 2339 sayNO;
bc517b45 2340 l += ulen;
5ff6fc6d 2341 s ++;
1aa99e6b 2342 }
5ff6fc6d
JH
2343 }
2344 else {
2345 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2346 while (s < e) {
2347 if (l >= PL_regeol)
2348 sayNO;
5ff6fc6d 2349 if (NATIVE_TO_UNI(*((U8*)l)) !=
2c237f47 2350 utf8_to_uvuni((U8*)s, &ulen))
1aa99e6b 2351 sayNO;
bc517b45 2352 s += ulen;
a72c7584 2353 l ++;
1aa99e6b 2354 }
5ff6fc6d 2355 }
1aa99e6b
IH
2356 locinput = l;
2357 nextchr = UCHARAT(locinput);
2358 break;
2359 }
bc517b45 2360 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2361 /* Inline the first character, for speed. */
2362 if (UCHARAT(s) != nextchr)
2363 sayNO;
2364 if (PL_regeol - locinput < ln)
2365 sayNO;
2366 if (ln > 1 && memNE(s, locinput, ln))
2367 sayNO;
2368 locinput += ln;
2369 nextchr = UCHARAT(locinput);
2370 break;
2371 case EXACTFL:
b8c5462f
JH
2372 PL_reg_flags |= RF_tainted;
2373 /* FALL THROUGH */
d6a28714 2374 case EXACTF:
cd439c50
IZ
2375 s = STRING(scan);
2376 ln = STR_LEN(scan);
d6a28714 2377
d07ddd77
JH
2378 if (do_utf8 || UTF) {
2379 /* Either target or the pattern are utf8. */
d6a28714 2380 char *l = locinput;
d07ddd77 2381 char *e = PL_regeol;
bc517b45 2382
1feea2c7
JH
2383 if (ibcmp_utf8(s, 0, ln, UTF,
2384 l, &e, 0, do_utf8)) {
5486206c
JH
2385 /* One more case for the sharp s:
2386 * pack("U0U*", 0xDF) =~ /ss/i,
2387 * the 0xC3 0x9F are the UTF-8
2388 * byte sequence for the U+00DF. */
2389 if (!(do_utf8 &&
2390 toLOWER(s[0]) == 's' &&
2391 ln >= 2 &&
2392 toLOWER(s[1]) == 's' &&
2393 (U8)l[0] == 0xC3 &&
2394 e - l >= 2 &&
2395 (U8)l[1] == 0x9F))
2396 sayNO;
2397 }
d07ddd77
JH
2398 locinput = e;
2399 nextchr = UCHARAT(locinput);
2400 break;
a0ed51b3 2401 }
d6a28714 2402
bc517b45
JH
2403 /* Neither the target and the pattern are utf8. */
2404
d6a28714
JH
2405 /* Inline the first character, for speed. */
2406 if (UCHARAT(s) != nextchr &&
2407 UCHARAT(s) != ((OP(scan) == EXACTF)
2408 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2409 sayNO;
d6a28714 2410 if (PL_regeol - locinput < ln)
b8c5462f 2411 sayNO;
d6a28714
JH
2412 if (ln > 1 && (OP(scan) == EXACTF
2413 ? ibcmp(s, locinput, ln)
2414 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2415 sayNO;
d6a28714
JH
2416 locinput += ln;
2417 nextchr = UCHARAT(locinput);
a0d0e21e 2418 break;
d6a28714 2419 case ANYOF:
ffc61ed2 2420 if (do_utf8) {
9e55ce06
JH
2421 STRLEN inclasslen = PL_regeol - locinput;
2422
2423 if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2424 sayNO_ANYOF;
ffc61ed2
JH
2425 if (locinput >= PL_regeol)
2426 sayNO;
9e55ce06 2427 locinput += inclasslen;
b8c5462f 2428 nextchr = UCHARAT(locinput);
e0f9d4a8 2429 break;
ffc61ed2
JH
2430 }
2431 else {
2432 if (nextchr < 0)
2433 nextchr = UCHARAT(locinput);
2434 if (!reginclass(scan, (U8*)locinput, do_utf8))
e0f9d4a8 2435 sayNO_ANYOF;
ffc61ed2
JH
2436 if (!nextchr && locinput >= PL_regeol)
2437 sayNO;
2438 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2439 break;
2440 }
2441 no_anyof:
2442 /* If we might have the case of the German sharp s
2443 * in a casefolding Unicode character class. */
2444
ebc501f0
JH
2445 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2446 locinput += SHARP_S_SKIP;
e0f9d4a8 2447 nextchr = UCHARAT(locinput);
ffc61ed2 2448 }
e0f9d4a8
JH
2449 else
2450 sayNO;
b8c5462f 2451 break;
d6a28714 2452 case ALNUML:
b8c5462f
JH
2453 PL_reg_flags |= RF_tainted;
2454 /* FALL THROUGH */
d6a28714 2455 case ALNUM:
b8c5462f 2456 if (!nextchr)
4633a7c4 2457 sayNO;
ffc61ed2 2458 if (do_utf8) {
ad24be35 2459 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2460 if (!(OP(scan) == ALNUM
3568d838 2461 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2462 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2463 {
2464 sayNO;
a0ed51b3 2465 }
b8c5462f 2466 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2467 nextchr = UCHARAT(locinput);
2468 break;
2469 }
ffc61ed2 2470 if (!(OP(scan) == ALNUM
d6a28714 2471 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2472 sayNO;
b8c5462f 2473 nextchr = UCHARAT(++locinput);
a0d0e21e 2474 break;
d6a28714 2475 case NALNUML:
b8c5462f
JH
2476 PL_reg_flags |= RF_tainted;
2477 /* FALL THROUGH */
d6a28714
JH
2478 case NALNUM:
2479 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2480 sayNO;
ffc61ed2 2481 if (do_utf8) {
8269fa76 2482 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2483 if (OP(scan) == NALNUM
3568d838 2484 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2485 : isALNUM_LC_utf8((U8*)locinput))
2486 {
b8c5462f 2487 sayNO;
d6a28714 2488 }
b8c5462f
JH
2489 locinput += PL_utf8skip[nextchr];
2490 nextchr = UCHARAT(locinput);
2491 break;
2492 }
ffc61ed2 2493 if (OP(scan) == NALNUM
d6a28714 2494 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2495 sayNO;
76e3520e 2496 nextchr = UCHARAT(++locinput);
a0d0e21e 2497 break;
d6a28714
JH
2498 case BOUNDL:
2499 case NBOUNDL:
3280af22 2500 PL_reg_flags |= RF_tainted;
bbce6d69 2501 /* FALL THROUGH */
d6a28714
JH
2502 case BOUND:
2503 case NBOUND:
2504 /* was last char in word? */
ffc61ed2 2505 if (do_utf8) {
12d33761
HS
2506 if (locinput == PL_bostr)
2507 ln = '\n';
ffc61ed2
JH
2508 else {
2509 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2510
2b9d42f0 2511 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2512 }
2513 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2514 ln = isALNUM_uni(ln);
8269fa76 2515 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2516 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2517 }
2518 else {
9041c2e3 2519 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2520 n = isALNUM_LC_utf8((U8*)locinput);
2521 }
a0ed51b3 2522 }
d6a28714 2523 else {
12d33761
HS
2524 ln = (locinput != PL_bostr) ?
2525 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2526 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2527 ln = isALNUM(ln);
2528 n = isALNUM(nextchr);
2529 }
2530 else {
2531 ln = isALNUM_LC(ln);
2532 n = isALNUM_LC(nextchr);
2533 }
d6a28714 2534 }
ffc61ed2
JH
2535 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2536 OP(scan) == BOUNDL))
2537 sayNO;
a0ed51b3 2538 break;
d6a28714 2539 case SPACEL:
3280af22 2540 PL_reg_flags |= RF_tainted;
bbce6d69 2541 /* FALL THROUGH */
d6a28714 2542 case SPACE:
9442cb0e 2543 if (!nextchr)
4633a7c4 2544 sayNO;
1aa99e6b 2545 if (do_utf8) {
fd400ab9 2546 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2547 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2548 if (!(OP(scan) == SPACE
3568d838 2549 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2550 : isSPACE_LC_utf8((U8*)locinput)))
2551 {
2552 sayNO;
2553 }
2554 locinput += PL_utf8skip[nextchr];
2555 nextchr = UCHARAT(locinput);
2556 break;
d6a28714 2557 }
ffc61ed2
JH
2558 if (!(OP(scan) == SPACE
2559 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2560 sayNO;
2561 nextchr = UCHARAT(++locinput);
2562 }
2563 else {
2564 if (!(OP(scan) == SPACE
2565 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2566 sayNO;
2567 nextchr = UCHARAT(++locinput);
a0ed51b3 2568 }
a0ed51b3 2569 break;
d6a28714 2570 case NSPACEL:
3280af22 2571 PL_reg_flags |= RF_tainted;
bbce6d69 2572 /* FALL THROUGH */
d6a28714 2573 case NSPACE:
9442cb0e 2574 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2575 sayNO;
1aa99e6b 2576 if (do_utf8) {
8269fa76 2577 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2578 if (OP(scan) == NSPACE
3568d838 2579 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2580 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2581 {
2582 sayNO;
2583 }
2584 locinput += PL_utf8skip[nextchr];
2585 nextchr = UCHARAT(locinput);
2586 break;
a0ed51b3 2587 }
ffc61ed2 2588 if (OP(scan) == NSPACE
d6a28714 2589 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2590 sayNO;
76e3520e 2591 nextchr = UCHARAT(++locinput);
a0d0e21e 2592 break;
d6a28714 2593 case DIGITL:
a0ed51b3
LW
2594 PL_reg_flags |= RF_tainted;
2595 /* FALL THROUGH */
d6a28714 2596 case DIGIT:
9442cb0e 2597 if (!nextchr)
a0ed51b3 2598 sayNO;
1aa99e6b 2599 if (do_utf8) {
8269fa76 2600 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2601 if (!(OP(scan) == DIGIT
3568d838 2602 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2603 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2604 {
a0ed51b3 2605 sayNO;
dfe13c55 2606 }
6f06b55f 2607 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2608 nextchr = UCHARAT(locinput);
2609 break;
2610 }
ffc61ed2 2611 if (!(OP(scan) == DIGIT
9442cb0e 2612 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2613 sayNO;
2614 nextchr = UCHARAT(++locinput);
2615 break;
d6a28714 2616 case NDIGITL:
b8c5462f
JH
2617 PL_reg_flags |= RF_tainted;
2618 /* FALL THROUGH */
d6a28714 2619 case NDIGIT:
9442cb0e 2620 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2621 sayNO;
1aa99e6b 2622 if (do_utf8) {
8269fa76 2623 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2624 if (OP(scan) == NDIGIT
3568d838 2625 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2626 : isDIGIT_LC_utf8((U8*)locinput))
2627 {
a0ed51b3 2628 sayNO;
9442cb0e 2629 }
6f06b55f 2630 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2631 nextchr = UCHARAT(locinput);
2632 break;
2633 }
ffc61ed2 2634 if (OP(scan) == NDIGIT
9442cb0e 2635 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2636 sayNO;
2637 nextchr = UCHARAT(++locinput);
2638 break;
2639 case CLUMP:
b7c83a7e 2640 if (locinput >= PL_regeol)
a0ed51b3 2641 sayNO;
b7c83a7e
JH
2642 if (do_utf8) {
2643 LOAD_UTF8_CHARCLASS(mark,"~");
2644 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2645 sayNO;
2646 locinput += PL_utf8skip[nextchr];
2647 while (locinput < PL_regeol &&
2648 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2649 locinput += UTF8SKIP(locinput);
2650 if (locinput > PL_regeol)
2651 sayNO;
eb08e2da
JH
2652 }
2653 else
2654 locinput++;
a0ed51b3
LW
2655 nextchr = UCHARAT(locinput);
2656 break;
c8756f30 2657 case REFFL:
3280af22 2658 PL_reg_flags |= RF_tainted;
c8756f30 2659 /* FALL THROUGH */
c277df42 2660 case REF:
c8756f30 2661 case REFF:
c277df42 2662 n = ARG(scan); /* which paren pair */
cf93c79d 2663 ln = PL_regstartp[n];
2c2d71f5 2664 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2665 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2666 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2667 if (ln == PL_regendp[n])
a0d0e21e 2668 break;
a0ed51b3 2669
cf93c79d 2670 s = PL_bostr + ln;
1aa99e6b 2671 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2672 char *l = locinput;
cf93c79d 2673 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2674 /*
2675 * Note that we can't do the "other character" lookup trick as
2676 * in the 8-bit case (no pun intended) because in Unicode we
2677 * have to map both upper and title case to lower case.
2678 */
2679 if (OP(scan) == REFF) {
a2a2844f 2680 STRLEN ulen1, ulen2;
e7ae6809
JH
2681 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2682 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2683 while (s < e) {
2684 if (l >= PL_regeol)
2685 sayNO;
a2a2844f
JH
2686 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2687 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2688 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2689 sayNO;
a2a2844f
JH
2690 s += ulen1;
2691 l += ulen2;
a0ed51b3
LW
2692 }
2693 }
2694 locinput = l;
2695 nextchr = UCHARAT(locinput);
2696 break;
2697 }
2698
a0d0e21e 2699 /* Inline the first character, for speed. */
76e3520e 2700 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2701 (OP(scan) == REF ||
2702 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2703 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2704 sayNO;
cf93c79d 2705 ln = PL_regendp[n] - ln;
3280af22 2706 if (locinput + ln > PL_regeol)
4633a7c4 2707 sayNO;
c8756f30
AK
2708 if (ln > 1 && (OP(scan) == REF
2709 ? memNE(s, locinput, ln)
2710 : (OP(scan) == REFF
2711 ? ibcmp(s, locinput, ln)
2712 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2713 sayNO;
a0d0e21e 2714 locinput += ln;
76e3520e 2715 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2716 break;
2717
2718 case NOTHING:
c277df42 2719 case TAIL:
a0d0e21e
LW
2720 break;
2721 case BACK:
2722 break;
c277df42
IZ
2723 case EVAL:
2724 {
2725 dSP;
533c011a 2726 OP_4tree *oop = PL_op;
3280af22
NIS
2727 COP *ocurcop = PL_curcop;
2728 SV **ocurpad = PL_curpad;
c277df42 2729 SV *ret;
9041c2e3 2730
c277df42 2731 n = ARG(scan);
533c011a 2732 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2733 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2734 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2735 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2736
8e5e9ebe
RGS
2737 {
2738 SV **before = SP;
2739 CALLRUNOPS(aTHX); /* Scalar context. */
2740 SPAGAIN;
2741 if (SP == before)
2742 ret = Nullsv; /* protect against empty (?{}) blocks. */
2743 else {
2744 ret = POPs;
2745 PUTBACK;
2746 }
2747 }
2748
0f5d15d6
IZ
2749 PL_op = oop;
2750 PL_curpad = ocurpad;
2751 PL_curcop = ocurcop;
c277df42 2752 if (logical) {
0f5d15d6
IZ
2753 if (logical == 2) { /* Postponed subexpression. */
2754 regexp *re;
22c35a8c 2755 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2756 re_cc_state state;
0f5d15d6
IZ
2757 CHECKPOINT cp, lastcp;
2758
2759 if(SvROK(ret) || SvRMAGICAL(ret)) {
2760 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2761
2762 if(SvMAGICAL(sv))
14befaf4 2763 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2764 }
2765 if (mg) {
2766 re = (regexp *)mg->mg_obj;
df0003d4 2767 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2768 }
2769 else {
2770 STRLEN len;
2771 char *t = SvPV(ret, len);
2772 PMOP pm;
2773 char *oprecomp = PL_regprecomp;
2774 I32 osize = PL_regsize;
2775 I32 onpar = PL_regnpar;
2776
5fcd1c1b 2777 Zero(&pm, 1, PMOP);
cea2e8a9 2778 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2779 if (!(SvFLAGS(ret)
0f5d15d6 2780 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2781 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2782 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2783 PL_regprecomp = oprecomp;
2784 PL_regsize = osize;
2785 PL_regnpar = onpar;
2786 }
2787 DEBUG_r(
9041c2e3 2788 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2789 "Entering embedded `%s%.60s%s%s'\n",
2790 PL_colors[0],
2791 re->precomp,
2792 PL_colors[1],
2793 (strlen(re->precomp) > 60 ? "..." : ""))
2794 );
2795 state.node = next;
2796 state.prev = PL_reg_call_cc;
2797 state.cc = PL_regcc;
2798 state.re = PL_reg_re;
2799
2ab05381 2800 PL_regcc = 0;
9041c2e3 2801
0f5d15d6 2802 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2803 REGCP_SET(lastcp);
0f5d15d6
IZ
2804 cache_re(re);
2805 state.ss = PL_savestack_ix;
2806 *PL_reglastparen = 0;
a01268b5 2807 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2808 PL_reg_call_cc = &state;
2809 PL_reginput = locinput;
2c2d71f5
JH
2810
2811 /* XXXX This is too dramatic a measure... */
2812 PL_reg_maxiter = 0;
2813
0f5d15d6 2814 if (regmatch(re->program + 1)) {
2c914db6
IZ
2815 /* Even though we succeeded, we need to restore
2816 global variables, since we may be wrapped inside
2817 SUSPEND, thus the match may be not finished yet. */
2818
2819 /* XXXX Do this only if SUSPENDed? */
2820 PL_reg_call_cc = state.prev;
2821 PL_regcc = state.cc;
2822 PL_reg_re = state.re;
2823 cache_re(PL_reg_re);
2824
2825 /* XXXX This is too dramatic a measure... */
2826 PL_reg_maxiter = 0;
2827
2828 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2829 ReREFCNT_dec(re);
2830 regcpblow(cp);
2831 sayYES;
2832 }
0f5d15d6 2833 ReREFCNT_dec(re);
02db2b7b 2834 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2835 regcppop();
2836 PL_reg_call_cc = state.prev;
2837 PL_regcc = state.cc;
2838 PL_reg_re = state.re;
d3790889 2839 cache_re(PL_reg_re);
2c2d71f5
JH
2840
2841 /* XXXX This is too dramatic a measure... */
2842 PL_reg_maxiter = 0;
2843
8e514ae6 2844 logical = 0;
0f5d15d6
IZ
2845 sayNO;
2846 }
c277df42 2847 sw = SvTRUE(ret);
0f5d15d6 2848 logical = 0;
a0ed51b3
LW
2849 }
2850 else
3280af22 2851 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2852 break;
2853 }
a0d0e21e 2854 case OPEN:
c277df42 2855 n = ARG(scan); /* which paren pair */
3280af22
NIS
2856 PL_reg_start_tmp[n] = locinput;
2857 if (n > PL_regsize)
2858 PL_regsize = n;
a0d0e21e
LW
2859 break;
2860 case CLOSE:
c277df42 2861 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2862 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2863 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2864 if (n > *PL_reglastparen)
2865 *PL_reglastparen = n;
a01268b5 2866 *PL_reglastcloseparen = n;
a0d0e21e 2867 break;
c277df42
IZ
2868 case GROUPP:
2869 n = ARG(scan); /* which paren pair */
cf93c79d 2870 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2871 break;
2872 case IFTHEN:
2c2d71f5 2873 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2874 if (sw)
2875 next = NEXTOPER(NEXTOPER(scan));
2876 else {
2877 next = scan + ARG(scan);
2878 if (OP(next) == IFTHEN) /* Fake one. */
2879 next = NEXTOPER(NEXTOPER(next));
2880 }
2881 break;
2882 case LOGICAL:
0f5d15d6 2883 logical = scan->flags;
c277df42 2884 break;
2ab05381
IZ
2885/*******************************************************************
2886 PL_regcc contains infoblock about the innermost (...)* loop, and
2887 a pointer to the next outer infoblock.
2888
2889 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2890
2891 1) After matching X, regnode for CURLYX is processed;
2892
9041c2e3 2893 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2894 regmatch() recursively with the starting point at WHILEM node;
2895
2896 3) Each hit of WHILEM node tries to match A and Z (in the order
2897 depending on the current iteration, min/max of {min,max} and
2898 greediness). The information about where are nodes for "A"
2899 and "Z" is read from the infoblock, as is info on how many times "A"
2900 was already matched, and greediness.
2901
2902 4) After A matches, the same WHILEM node is hit again.
2903
2904 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2905 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2906 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2907 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2908 of the external loop.
2909
2910 Currently present infoblocks form a tree with a stem formed by PL_curcc
2911 and whatever it mentions via ->next, and additional attached trees
2912 corresponding to temporarily unset infoblocks as in "5" above.
2913
9041c2e3 2914 In the following picture infoblocks for outer loop of
2ab05381
IZ
2915 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2916 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2917 infoblocks are drawn below the "reset" infoblock.
2918
2919 In fact in the picture below we do not show failed matches for Z and T
2920 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2921 more obvious *why* one needs to *temporary* unset infoblocks.]
2922
2923 Matched REx position InfoBlocks Comment
2924 (Y(A)*?Z)*?T x
2925 Y(A)*?Z)*?T x <- O
2926 Y (A)*?Z)*?T x <- O
2927 Y A)*?Z)*?T x <- O <- I
2928 YA )*?Z)*?T x <- O <- I
2929 YA A)*?Z)*?T x <- O <- I
2930 YAA )*?Z)*?T x <- O <- I
2931 YAA Z)*?T x <- O # Temporary unset I
2932 I
2933
2934 YAAZ Y(A)*?Z)*?T x <- O
2935 I
2936
2937 YAAZY (A)*?Z)*?T x <- O
2938 I
2939
2940 YAAZY A)*?Z)*?T x <- O <- I
2941 I
2942
2943 YAAZYA )*?Z)*?T x <- O <- I
2944 I
2945
2946 YAAZYA Z)*?T x <- O # Temporary unset I
2947 I,I
2948
2949 YAAZYAZ )*?T x <- O
2950 I,I
2951
2952 YAAZYAZ T x # Temporary unset O
2953 O
2954 I,I
2955
2956 YAAZYAZT x
2957 O
2958 I,I
2959 *******************************************************************/
a0d0e21e
LW
2960 case CURLYX: {
2961 CURCUR cc;
3280af22 2962 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2963 /* No need to save/restore up to this paren */
2964 I32 parenfloor = scan->flags;
c277df42
IZ
2965
2966 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2967 next += ARG(next);
3280af22
NIS
2968 cc.oldcc = PL_regcc;
2969 PL_regcc = &cc;
cb434fcc
IZ
2970 /* XXXX Probably it is better to teach regpush to support
2971 parenfloor > PL_regsize... */
2972 if (parenfloor > *PL_reglastparen)
2973 parenfloor = *PL_reglastparen; /* Pessimization... */
2974 cc.parenfloor = parenfloor;
a0d0e21e
LW
2975 cc.cur = -1;
2976 cc.min = ARG1(scan);
2977 cc.max = ARG2(scan);
c277df42 2978 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2979 cc.next = next;
2980 cc.minmod = minmod;
2981 cc.lastloc = 0;
3280af22 2982 PL_reginput = locinput;
a0d0e21e
LW
2983 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2984 regcpblow(cp);
3280af22 2985 PL_regcc = cc.oldcc;
4633a7c4 2986 saySAME(n);
a0d0e21e
LW
2987 }
2988 /* NOT REACHED */
2989 case WHILEM: {
2990 /*
2991 * This is really hard to understand, because after we match
2992 * what we're trying to match, we must make sure the rest of
2c2d71f5 2993 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2994 * to go back UP the parse tree by recursing ever deeper. And
2995 * if it fails, we have to reset our parent's current state
2996 * that we can try again after backing off.
2997 */
2998
c277df42 2999 CHECKPOINT cp, lastcp;
3280af22 3000 CURCUR* cc = PL_regcc;
c277df42
IZ
3001 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3002
4633a7c4 3003 n = cc->cur + 1; /* how many we know we matched */
3280af22 3004 PL_reginput = locinput;
a0d0e21e 3005
c277df42 3006 DEBUG_r(
9041c2e3
NIS
3007 PerlIO_printf(Perl_debug_log,
3008 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 3009 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3010 (long)n, (long)cc->min,
c277df42
IZ
3011 (long)cc->max, (long)cc)
3012 );
4633a7c4 3013
a0d0e21e
LW
3014 /* If degenerate scan matches "", assume scan done. */
3015
579cf2c3 3016 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3017 PL_regcc = cc->oldcc;
2ab05381
IZ
3018 if (PL_regcc)
3019 ln = PL_regcc->cur;
c277df42 3020 DEBUG_r(
c3464db5
DD
3021 PerlIO_printf(Perl_debug_log,
3022 "%*s empty match detected, try continuation...\n",
3280af22 3023 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3024 );
a0d0e21e 3025 if (regmatch(cc->next))
4633a7c4 3026 sayYES;
2ab05381
IZ
3027 if (PL_regcc)
3028 PL_regcc->cur = ln;
3280af22 3029 PL_regcc = cc;
4633a7c4 3030 sayNO;
a0d0e21e
LW
3031 }
3032
3033 /* First just match a string of min scans. */
3034
3035 if (n < cc->min) {
3036 cc->cur = n;
3037 cc->lastloc = locinput;
4633a7c4
LW
3038 if (regmatch(cc->scan))
3039 sayYES;
3040 cc->cur = n - 1;
c277df42 3041 cc->lastloc = lastloc;
4633a7c4 3042 sayNO;
a0d0e21e
LW
3043 }
3044
2c2d71f5
JH
3045 if (scan->flags) {
3046 /* Check whether we already were at this position.
3047 Postpone detection until we know the match is not
3048 *that* much linear. */
3049 if (!PL_reg_maxiter) {
3050 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3051 PL_reg_leftiter = PL_reg_maxiter;
3052 }
3053 if (PL_reg_leftiter-- == 0) {
3054 I32 size = (PL_reg_maxiter + 7)/8;
3055 if (PL_reg_poscache) {
3056 if (PL_reg_poscache_size < size) {
3057 Renew(PL_reg_poscache, size, char);
3058 PL_reg_poscache_size = size;
3059 }
3060 Zero(PL_reg_poscache, size, char);
3061 }
3062 else {
3063 PL_reg_poscache_size = size;
3064 Newz(29, PL_reg_poscache, size, char);
3065 }
3066 DEBUG_r(
3067 PerlIO_printf(Perl_debug_log,
3068 "%sDetected a super-linear match, switching on caching%s...\n",
3069 PL_colors[4], PL_colors[5])
3070 );
3071 }
3072 if (PL_reg_leftiter < 0) {
3073 I32 o = locinput - PL_bostr, b;
3074
3075 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3076 b = o % 8;
3077 o /= 8;
3078 if (PL_reg_poscache[o] & (1<<b)) {
3079 DEBUG_r(
3080 PerlIO_printf(Perl_debug_log,
3081 "%*s already tried at this position...\n",
3082 REPORT_CODE_OFF+PL_regindent*2, "")
3083 );
7821416a 3084 sayNO_SILENT;
2c2d71f5
JH
3085 }
3086 PL_reg_poscache[o] |= (1<<b);
3087 }
3088 }
3089
a0d0e21e
LW
3090 /* Prefer next over scan for minimal matching. */
3091
3092 if (cc->minmod) {
3280af22 3093 PL_regcc = cc->oldcc;
2ab05381
IZ
3094 if (PL_regcc)
3095 ln = PL_regcc->cur;
5f05dabc 3096 cp = regcppush(cc->parenfloor);
02db2b7b 3097 REGCP_SET(lastcp);
5f05dabc 3098 if (regmatch(cc->next)) {
c277df42 3099 regcpblow(cp);
4633a7c4 3100 sayYES; /* All done. */
5f05dabc 3101 }
02db2b7b 3102 REGCP_UNWIND(lastcp);
5f05dabc 3103 regcppop();
2ab05381
IZ
3104 if (PL_regcc)
3105 PL_regcc->cur = ln;
3280af22 3106 PL_regcc = cc;
a0d0e21e 3107
c277df42 3108 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3109 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3110 && !(PL_reg_flags & RF_warned)) {
3111 PL_reg_flags |= RF_warned;
e476b1b5 3112 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3113 "Complex regular subexpression recursion",
3114 REG_INFTY - 1);
c277df42 3115 }
4633a7c4 3116 sayNO;
c277df42 3117 }
a687059c 3118
c277df42 3119 DEBUG_r(
c3464db5
DD
3120 PerlIO_printf(Perl_debug_log,
3121 "%*s trying longer...\n",
3280af22 3122 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3123 );
a0d0e21e 3124 /* Try scanning more and see if it helps. */
3280af22 3125 PL_reginput = locinput;
a0d0e21e
LW
3126 cc->cur = n;
3127 cc->lastloc = locinput;
5f05dabc 3128 cp = regcppush(cc->parenfloor);
02db2b7b 3129 REGCP_SET(lastcp);
5f05dabc 3130 if (regmatch(cc->scan)) {
c277df42 3131 regcpblow(cp);
4633a7c4 3132 sayYES;
5f05dabc 3133 }
02db2b7b 3134 REGCP_UNWIND(lastcp);
5f05dabc 3135 regcppop();
4633a7c4 3136 cc->cur = n - 1;
c277df42 3137 cc->lastloc = lastloc;
4633a7c4 3138 sayNO;
a0d0e21e
LW
3139 }
3140
3141 /* Prefer scan over next for maximal matching. */
3142
3143 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3144 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3145 cc->cur = n;
3146 cc->lastloc = locinput;
02db2b7b 3147 REGCP_SET(lastcp);
5f05dabc 3148 if (regmatch(cc->scan)) {
c277df42 3149 regcpblow(cp);
4633a7c4 3150 sayYES;
5f05dabc 3151 }
02db2b7b 3152 REGCP_UNWIND(lastcp);
a0d0e21e 3153 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3154 PL_reginput = locinput;
c277df42 3155 DEBUG_r(
c3464db5
DD
3156 PerlIO_printf(Perl_debug_log,
3157 "%*s failed, try continuation...\n",
3280af22 3158 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3159 );
3160 }
9041c2e3 3161 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3162 && !(PL_reg_flags & RF_warned)) {
3280af22 3163 PL_reg_flags |= RF_warned;
e476b1b5 3164 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3165 "Complex regular subexpression recursion",
3166 REG_INFTY - 1);
a0d0e21e
LW
3167 }
3168
3169 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3170 PL_regcc = cc->oldcc;
2ab05381
IZ
3171 if (PL_regcc)
3172 ln = PL_regcc->cur;
a0d0e21e 3173 if (regmatch(cc->next))
4633a7c4 3174 sayYES;
2ab05381
IZ
3175 if (PL_regcc)
3176 PL_regcc->cur = ln;
3280af22 3177 PL_regcc = cc;
4633a7c4 3178 cc->cur = n - 1;
c277df42 3179 cc->lastloc = lastloc;
4633a7c4 3180 sayNO;
a0d0e21e
LW
3181 }
3182 /* NOT REACHED */
9041c2e3 3183 case BRANCHJ:
c277df42
IZ
3184 next = scan + ARG(scan);
3185 if (next == scan)
3186 next = NULL;
3187 inner = NEXTOPER(NEXTOPER(scan));
3188 goto do_branch;
9041c2e3 3189 case BRANCH:
c277df42
IZ
3190 inner = NEXTOPER(scan);
3191 do_branch:
3192 {
c277df42
IZ
3193 c1 = OP(scan);
3194 if (OP(next) != c1) /* No choice. */
3195 next = inner; /* Avoid recursion. */
a0d0e21e 3196 else {
02db2b7b
IZ
3197 I32 lastparen = *PL_reglastparen;
3198 I32 unwind1;
3199 re_unwind_branch_t *uw;
3200
3201 /* Put unwinding data on stack */
3202 unwind1 = SSNEWt(1,re_unwind_branch_t);
3203 uw = SSPTRt(unwind1,re_unwind_branch_t);
3204 uw->prev = unwind;
3205 unwind = unwind1;
3206 uw->type = ((c1 == BRANCH)
3207 ? RE_UNWIND_BRANCH
3208 : RE_UNWIND_BRANCHJ);
3209 uw->lastparen = lastparen;
3210 uw->next = next;
3211 uw->locinput = locinput;
3212 uw->nextchr = nextchr;
3213#ifdef DEBUGGING
3214 uw->regindent = ++PL_regindent;
3215#endif
c277df42 3216
02db2b7b
IZ
3217 REGCP_SET(uw->lastcp);
3218
3219 /* Now go into the first branch */
3220 next = inner;
a687059c 3221 }
a0d0e21e
LW
3222 }
3223 break;
3224 case MINMOD:
3225 minmod = 1;
3226 break;
c277df42
IZ
3227 case CURLYM:
3228 {
00db4c45 3229 I32 l = 0;
c277df42 3230 CHECKPOINT lastcp;
9041c2e3 3231
c277df42
IZ
3232 /* We suppose that the next guy does not need
3233 backtracking: in particular, it is of constant length,
3234 and has no parenths to influence future backrefs. */
3235 ln = ARG1(scan); /* min to match */
3236 n = ARG2(scan); /* max to match */
c277df42
IZ
3237 paren = scan->flags;
3238 if (paren) {
3280af22
NIS
3239 if (paren > PL_regsize)
3240 PL_regsize = paren;
3241 if (paren > *PL_reglastparen)
3242 *PL_reglastparen = paren;
c277df42 3243 }
dc45a647 3244 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3245 if (paren)
3246 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3247 PL_reginput = locinput;
c277df42
IZ
3248 if (minmod) {
3249 minmod = 0;
3250 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3251 sayNO;
f31a99c8
HS
3252 /* if we matched something zero-length we don't need to
3253 backtrack - capturing parens are already defined, so
3254 the caveat in the maximal case doesn't apply
3255
3256 XXXX if ln == 0, we can redo this check first time
3257 through the following loop
3258 */
3259 if (ln && l == 0)
3260 n = ln; /* don't backtrack */
3280af22 3261 locinput = PL_reginput;
cca55fe3 3262 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3263 regnode *text_node = next;
3264
cca55fe3 3265 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3266
cca55fe3 3267 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3268 else {
cca55fe3
JP
3269 if (PL_regkind[(U8)OP(text_node)] == REF) {
3270 I32 n, ln;
3271 n = ARG(text_node); /* which paren pair */
3272 ln = PL_regstartp[n];
3273 /* assume yes if we haven't seen CLOSEn */
3274 if (
3275 *PL_reglastparen < n ||
3276 ln == -1 ||
3277 ln == PL_regendp[n]
3278 ) {
3279 c1 = c2 = -1000;
3280 goto assume_ok_MM;
3281 }
3282 c1 = *(PL_bostr + ln);
3283 }
3284 else { c1 = (U8)*STRING(text_node); }
af5decee 3285 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3286 c2 = PL_fold[c1];
af5decee 3287 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3288 c2 = PL_fold_locale[c1];
3289 else
3290 c2 = c1;
3291 }
a0ed51b3
LW
3292 }
3293 else
c277df42 3294 c1 = c2 = -1000;
cca55fe3 3295 assume_ok_MM:
02db2b7b 3296 REGCP_SET(lastcp);
5f4b28b2 3297 /* This may be improved if l == 0. */
c277df42
IZ
3298 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3299 /* If it could work, try it. */
3300 if (c1 == -1000 ||
3280af22
NIS
3301 UCHARAT(PL_reginput) == c1 ||
3302 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3303 {
3304 if (paren) {
f31a99c8 3305 if (ln) {
cf93c79d
IZ
3306 PL_regstartp[paren] =
3307 HOPc(PL_reginput, -l) - PL_bostr;
3308 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3309 }
3310 else
cf93c79d 3311 PL_regendp[paren] = -1;
c277df42
IZ
3312 }
3313 if (regmatch(next))
3314 sayYES;
02db2b7b 3315 REGCP_UNWIND(lastcp);
c277df42
IZ
3316 }
3317 /* Couldn't or didn't -- move forward. */
3280af22 3318 PL_reginput = locinput;
c277df42
IZ
3319 if (regrepeat_hard(scan, 1, &l)) {
3320 ln++;
3280af22 3321 locinput = PL_reginput;
c277df42
IZ
3322 }
3323 else
3324 sayNO;
3325 }
a0ed51b3
LW
3326 }
3327 else {
c277df42 3328 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3329 /* if we matched something zero-length we don't need to
3330 backtrack, unless the minimum count is zero and we
3331 are capturing the result - in that case the capture
3332 being defined or not may affect later execution
3333 */
3334 if (n != 0 && l == 0 && !(paren && ln == 0))
3335 ln = n; /* don't backtrack */
3280af22 3336 locinput = PL_reginput;
c277df42 3337 DEBUG_r(
5c0ca799 3338 PerlIO_printf(Perl_debug_log,
faccc32b 3339 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3340 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3341 (IV) n, (IV)l)
c277df42
IZ
3342 );
3343 if (n >= ln) {
cca55fe3 3344 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3345 regnode *text_node = next;
3346
cca55fe3 3347 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3348
cca55fe3 3349 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3350 else {
cca55fe3
JP
3351 if (PL_regkind[(U8)OP(text_node)] == REF) {
3352 I32 n, ln;
3353 n = ARG(text_node); /* which paren pair */
3354 ln = PL_regstartp[n];
3355 /* assume yes if we haven't seen CLOSEn */
3356 if (
3357 *PL_reglastparen < n ||
3358 ln == -1 ||
3359 ln == PL_regendp[n]
3360 ) {
3361 c1 = c2 = -1000;
3362 goto assume_ok_REG;
3363 }
3364 c1 = *(PL_bostr + ln);
3365 }
3366 else { c1 = (U8)*STRING(text_node); }
3367
af5decee 3368 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3369 c2 = PL_fold[c1];
af5decee 3370 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3371 c2 = PL_fold_locale[c1];
3372 else
3373 c2 = c1;
3374 }
a0ed51b3
LW
3375 }
3376 else
c277df42
IZ
3377 c1 = c2 = -1000;
3378 }
cca55fe3 3379 assume_ok_REG:
02db2b7b 3380 REGCP_SET(lastcp);
c277df42
IZ
3381 while (n >= ln) {
3382 /* If it could work, try it. */
3383 if (c1 == -1000 ||
3280af22
NIS
3384 UCHARAT(PL_reginput) == c1 ||
3385 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3386 {
3387 DEBUG_r(
c3464db5 3388 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3389 "%*s trying tail with n=%"IVdf"...\n",
3390 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3391 );
3392 if (paren) {
3393 if (n) {
cf93c79d
IZ
3394 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3395 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3396 }
a0ed51b3 3397 else
cf93c79d 3398 PL_regendp[paren] = -1;
c277df42 3399 }
a0ed51b3
LW
3400 if (regmatch(next))
3401 sayYES;
02db2b7b 3402 REGCP_UNWIND(lastcp);
a0ed51b3 3403 }
c277df42
IZ
3404 /* Couldn't or didn't -- back up. */
3405 n--;
dfe13c55 3406 locinput = HOPc(locinput, -l);
3280af22 3407 PL_reginput = locinput;
c277df42
IZ
3408 }
3409 }
3410 sayNO;
3411 break;
3412 }
3413 case CURLYN:
3414 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3415 if (paren > PL_regsize)
3416 PL_regsize = paren;
3417 if (paren > *PL_reglastparen)
3418 *PL_reglastparen = paren;
c277df42
IZ
3419 ln = ARG1(scan); /* min to match */
3420 n = ARG2(scan); /* max to match */
dc45a647 3421 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3422 goto repeat;
a0d0e21e 3423 case CURLY:
c277df42 3424 paren = 0;
a0d0e21e
LW
3425 ln = ARG1(scan); /* min to match */
3426 n = ARG2(scan); /* max to match */
dc45a647 3427 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3428 goto repeat;
3429 case STAR:
3430 ln = 0;
c277df42 3431 n = REG_INFTY;
a0d0e21e 3432 scan = NEXTOPER(scan);
c277df42 3433 paren = 0;
a0d0e21e
LW
3434 goto repeat;
3435 case PLUS:
c277df42
IZ
3436 ln = 1;
3437 n = REG_INFTY;
3438 scan = NEXTOPER(scan);
3439 paren = 0;
3440 repeat:
a0d0e21e
LW
3441 /*
3442 * Lookahead to avoid useless match attempts
3443 * when we know what character comes next.
3444 */
5f80c4cf
JP
3445
3446 /*
3447 * Used to only do .*x and .*?x, but now it allows
3448 * for )'s, ('s and (?{ ... })'s to be in the way
3449 * of the quantifier and the EXACT-like node. -- japhy
3450 */
3451
cca55fe3 3452 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3453 U8 *s;
3454 regnode *text_node = next;
3455
cca55fe3 3456 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3457
cca55fe3 3458 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3459 else {
cca55fe3
JP
3460 if (PL_regkind[(U8)OP(text_node)] == REF) {
3461 I32 n, ln;
3462 n = ARG(text_node); /* which paren pair */
3463 ln = PL_regstartp[n];
3464 /* assume yes if we haven't seen CLOSEn */
3465 if (
3466 *PL_reglastparen < n ||
3467 ln == -1 ||
3468 ln == PL_regendp[n]
3469 ) {
3470 c1 = c2 = -1000;
3471 goto assume_ok_easy;
3472 }
9246c65e 3473 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3474 }
3475 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3476
3477 if (!UTF) {
3478 c2 = c1 = *s;
f65d3ee7 3479 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3480 c2 = PL_fold[c1];
f65d3ee7 3481 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3482 c2 = PL_fold_locale[c1];
1aa99e6b 3483 }
5f80c4cf 3484 else { /* UTF */
f65d3ee7 3485 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3486 STRLEN ulen1, ulen2;
e7ae6809
JH
3487 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3488 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3489
3490 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3491 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3492
3493 c1 = utf8_to_uvuni(tmpbuf1, 0);
3494 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3495 }
3496 else {
3497 c2 = c1 = utf8_to_uvchr(s, NULL);
3498 }
1aa99e6b
IH
3499 }
3500 }
bbce6d69 3501 }
a0d0e21e 3502 else
bbce6d69 3503 c1 = c2 = -1000;
cca55fe3 3504 assume_ok_easy:
3280af22 3505 PL_reginput = locinput;
a0d0e21e 3506 if (minmod) {
c277df42 3507 CHECKPOINT lastcp;
a0d0e21e
LW
3508 minmod = 0;
3509 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3510 sayNO;
a0ed51b3 3511 locinput = PL_reginput;
02db2b7b 3512 REGCP_SET(lastcp);
0fe9bf95 3513 if (c1 != -1000) {
1aa99e6b 3514 char *e; /* Should not check after this */
0fe9bf95
IZ
3515 char *old = locinput;
3516
1aa99e6b 3517 if (n == REG_INFTY) {
0fe9bf95 3518 e = PL_regeol - 1;
1aa99e6b
IH
3519 if (do_utf8)
3520 while (UTF8_IS_CONTINUATION(*(U8*)e))
3521 e--;
3522 }
3523 else if (do_utf8) {
3524 int m = n - ln;
3525 for (e = locinput;
3526 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3527 e += UTF8SKIP(e);
3528 }
3529 else {
3530 e = locinput + n - ln;
3531 if (e >= PL_regeol)
3532 e = PL_regeol - 1;
3533 }
0fe9bf95 3534 while (1) {
1aa99e6b 3535 int count;
0fe9bf95 3536 /* Find place 'next' could work */
1aa99e6b
IH
3537 if (!do_utf8) {
3538 if (c1 == c2) {
a8e8ab15
JH
3539 while (locinput <= e &&
3540 UCHARAT(locinput) != c1)
1aa99e6b
IH
3541 locinput++;
3542 } else {
9041c2e3 3543 while (locinput <= e
a8e8ab15
JH
3544 && UCHARAT(locinput) != c1
3545 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3546 locinput++;
3547 }
3548 count = locinput - old;
3549 }
3550 else {
3551 STRLEN len;
3552 if (c1 == c2) {
3553 for (count = 0;
3554 locinput <= e &&
9041c2e3 3555 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3556 count++)
3557 locinput += len;
3558
3559 } else {
3560 for (count = 0; locinput <= e; count++) {
9041c2e3 3561 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3562 if (c == c1 || c == c2)
3563 break;
9041c2e3 3564 locinput += len;
1aa99e6b
IH
3565 }
3566 }
0fe9bf95 3567 }
9041c2e3 3568 if (locinput > e)
0fe9bf95
IZ
3569 sayNO;
3570 /* PL_reginput == old now */
3571 if (locinput != old) {
3572 ln = 1; /* Did some */
1aa99e6b 3573 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3574 sayNO;
3575 }
3576 /* PL_reginput == locinput now */
29d1e993 3577 TRYPAREN(paren, ln, locinput);
0fe9bf95 3578 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3579 REGCP_UNWIND(lastcp);
0fe9bf95 3580 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3581 old = locinput;
3582 if (do_utf8)
3583 locinput += UTF8SKIP(locinput);
3584 else
3585 locinput++;
0fe9bf95
IZ
3586 }
3587 }
3588 else
c277df42 3589 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3590 UV c;
3591 if (c1 != -1000) {
3592 if (do_utf8)
9041c2e3 3593 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3594 else
9041c2e3 3595 c = UCHARAT(PL_reginput);
2390ecbc
PP
3596 /* If it could work, try it. */
3597 if (c == c1 || c == c2)
3598 {
3599 TRYPAREN(paren, n, PL_reginput);
3600 REGCP_UNWIND(lastcp);
3601 }
1aa99e6b 3602 }
a0d0e21e 3603 /* If it could work, try it. */
2390ecbc 3604 else if (c1 == -1000)
bbce6d69 3605 {
29d1e993 3606 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3607 REGCP_UNWIND(lastcp);
bbce6d69 3608 }
c277df42 3609 /* Couldn't or didn't -- move forward. */
a0ed51b3 3610 PL_reginput = locinput;
a0d0e21e
LW
3611 if (regrepeat(scan, 1)) {
3612 ln++;
a0ed51b3
LW
3613 locinput = PL_reginput;
3614 }
3615 else
4633a7c4 3616 sayNO;
a0d0e21e
LW
3617 }
3618 }
3619 else {
c277df42 3620 CHECKPOINT lastcp;
a0d0e21e 3621 n = regrepeat(scan, n);
a0ed51b3 3622 locinput = PL_reginput;
22c35a8c 3623 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
15272685
HS
3624 ((!PL_multiline && OP(next) != MEOL) ||
3625 OP(next) == SEOL || OP(next) == EOS))
3626 {
a0d0e21e 3627 ln = n; /* why back off? */
1aeab75a
GS
3628 /* ...because $ and \Z can match before *and* after
3629 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3630 We should back off by one in this case. */
3631 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3632 ln--;
3633 }
02db2b7b 3634 REGCP_SET(lastcp);
c277df42 3635 if (paren) {
8fa7f367 3636 UV c = 0;
c277df42 3637 while (n >= ln) {
1aa99e6b
IH
3638 if (c1 != -1000) {
3639 if (do_utf8)
9041c2e3 3640 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3641 else
9041c2e3 3642 c = UCHARAT(PL_reginput);
1aa99e6b 3643 }
c277df42 3644 /* If it could work, try it. */
1aa99e6b 3645 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3646 {
29d1e993 3647 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3648 REGCP_UNWIND(lastcp);
c277df42
IZ
3649 }
3650 /* Couldn't or didn't -- back up. */
3651 n--;
dfe13c55 3652 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3653 }
a0ed51b3
LW
3654 }
3655 else {
8fa7f367 3656 UV c = 0;
c277df42 3657 while (n >= ln) {
1aa99e6b
IH
3658 if (c1 != -1000) {
3659 if (do_utf8)
9041c2e3 3660 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3661 else
9041c2e3 3662 c = UCHARAT(PL_reginput);
1aa99e6b 3663 }
c277df42 3664 /* If it could work, try it. */
1aa99e6b 3665 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3666 {
29d1e993 3667 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3668 REGCP_UNWIND(lastcp);
c277df42
IZ
3669 }
3670 /* Couldn't or didn't -- back up. */
3671 n--;
dfe13c55 3672 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3673 }
a0d0e21e
LW
3674 }
3675 }
4633a7c4 3676 sayNO;
c277df42 3677 break;
a0d0e21e 3678 case END:
0f5d15d6
IZ
3679 if (PL_reg_call_cc) {
3680 re_cc_state *cur_call_cc = PL_reg_call_cc;
3681 CURCUR *cctmp = PL_regcc;
3682 regexp *re = PL_reg_re;
3683 CHECKPOINT cp, lastcp;
3684
3685 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3686 REGCP_SET(lastcp);
0f5d15d6
IZ
3687 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3688 the caller. */
3689 PL_reginput = locinput; /* Make position available to
3690 the callcc. */
3691 cache_re(PL_reg_call_cc->re);
3692 PL_regcc = PL_reg_call_cc->cc;
3693 PL_reg_call_cc = PL_reg_call_cc->prev;
3694 if (regmatch(cur_call_cc->node)) {
3695 PL_reg_call_cc = cur_call_cc;
3696 regcpblow(cp);
3697 sayYES;
3698 }
02db2b7b 3699 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3700 regcppop();
3701 PL_reg_call_cc = cur_call_cc;
3702 PL_regcc = cctmp;
3703 PL_reg_re = re;
3704 cache_re(re);
3705
3706 DEBUG_r(
3707 PerlIO_printf(Perl_debug_log,
3708 "%*s continuation failed...\n",
3709 REPORT_CODE_OFF+PL_regindent*2, "")
3710 );
7821416a 3711 sayNO_SILENT;
0f5d15d6 3712 }
7821416a
IZ
3713 if (locinput < PL_regtill) {
3714 DEBUG_r(PerlIO_printf(Perl_debug_log,
3715 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3716 PL_colors[4],
3717 (long)(locinput - PL_reg_starttry),
3718 (long)(PL_regtill - PL_reg_starttry),
3719 PL_colors[5]));
3720 sayNO_FINAL; /* Cannot match: too short. */
3721 }
3722 PL_reginput = locinput; /* put where regtry can find it */
3723 sayYES_FINAL; /* Success! */
7e5428c5 3724 case SUCCEED:
3280af22 3725 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3726 sayYES_LOUD; /* Success! */
c277df42
IZ
3727 case SUSPEND:
3728 n = 1;
9fe1d20c 3729 PL_reginput = locinput;
9041c2e3 3730 goto do_ifmatch;
a0d0e21e 3731 case UNLESSM:
c277df42 3732 n = 0;
a0ed51b3 3733 if (scan->flags) {
efb30f32
HS
3734 s = HOPBACKc(locinput, scan->flags);
3735 if (!s)
3736 goto say_yes;
3737 PL_reginput = s;
a0ed51b3
LW
3738 }
3739 else
3740 PL_reginput = locinput;
c277df42
IZ
3741 goto do_ifmatch;
3742 case IFMATCH:
3743 n = 1;
a0ed51b3 3744 if (scan->flags) {
efb30f32
HS
3745 s = HOPBACKc(locinput, scan->flags);
3746 if (!s)
3747 goto say_no;
3748 PL_reginput = s;
a0ed51b3
LW
3749 }
3750 else
3751 PL_reginput = locinput;
3752
c277df42 3753 do_ifmatch:
c277df42
IZ
3754 inner = NEXTOPER(NEXTOPER(scan));
3755 if (regmatch(inner) != n) {
3756 say_no:
3757 if (logical) {
3758 logical = 0;
3759 sw = 0;
3760 goto do_longjump;
a0ed51b3
LW
3761 }
3762 else
c277df42
IZ
3763 sayNO;
3764 }
3765 say_yes:
3766 if (logical) {
3767 logical = 0;
3768 sw = 1;
3769 }
fe44a5e8 3770 if (OP(scan) == SUSPEND) {
3280af22 3771 locinput = PL_reginput;
565764a8 3772 nextchr = UCHARAT(locinput);
fe44a5e8 3773 }
c277df42
IZ
3774 /* FALL THROUGH. */
3775 case LONGJMP:
3776 do_longjump:
3777 next = scan + ARG(scan);
3778 if (next == scan)
3779 next = NULL;
a0d0e21e
LW
3780 break;
3781 default:
b900a521 3782 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3783 PTR2UV(scan), OP(scan));
cea2e8a9 3784 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3785 }
02db2b7b 3786 reenter:
a0d0e21e
LW
3787 scan = next;
3788 }
a687059c 3789
a0d0e21e
LW
3790 /*
3791 * We get here only if there's trouble -- normally "case END" is
3792 * the terminating point.
3793 */
cea2e8a9 3794 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3795 /*NOTREACHED*/
4633a7c4
LW
3796 sayNO;
3797
7821416a
IZ
3798yes_loud:
3799 DEBUG_r(
3800 PerlIO_printf(Perl_debug_log,
3801 "%*s %scould match...%s\n",
3802 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3803 );
3804 goto yes;
3805yes_final:
3806 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3807 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3808yes:
3809#ifdef DEBUGGING
3280af22 3810 PL_regindent--;
4633a7c4 3811#endif
02db2b7b
IZ
3812
3813#if 0 /* Breaks $^R */
3814 if (unwind)
3815 regcpblow(firstcp);
3816#endif
4633a7c4
LW
3817 return 1;
3818
3819no:
7821416a
IZ
3820 DEBUG_r(
3821 PerlIO_printf(Perl_debug_log,
3822 "%*s %sfailed...%s\n",
3823 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3824 );
3825 goto do_no;
3826no_final:
3827do_no:
02db2b7b
IZ
3828 if (unwind) {
3829 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3830
3831 switch (uw->type) {
3832 case RE_UNWIND_BRANCH:
3833 case RE_UNWIND_BRANCHJ:
3834 {
3835 re_unwind_branch_t *uwb = &(uw->branch);
3836 I32 lastparen = uwb->lastparen;
9041c2e3 3837
02db2b7b
IZ
3838 REGCP_UNWIND(uwb->lastcp);
3839 for (n = *PL_reglastparen; n > lastparen; n--)
3840 PL_regendp[n] = -1;
3841 *PL_reglastparen = n;
3842 scan = next = uwb->next;
9041c2e3
NIS
3843 if ( !scan ||
3844 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3845 ? BRANCH : BRANCHJ) ) { /* Failure */
3846 unwind = uwb->prev;
3847#ifdef DEBUGGING
3848 PL_regindent--;
3849#endif
3850 goto do_no;
3851 }
3852 /* Have more choice yet. Reuse the same uwb. */
3853 /*SUPPRESS 560*/
3854 if ((n = (uwb->type == RE_UNWIND_BRANCH
3855 ? NEXT_OFF(next) : ARG(next))))
3856 next += n;
3857 else
3858 next = NULL; /* XXXX Needn't unwinding in this case... */
3859 uwb->next = next;
3860 next = NEXTOPER(scan);
3861 if (uwb->type == RE_UNWIND_BRANCHJ)
3862 next = NEXTOPER(next);
3863 locinput = uwb->locinput;
3864 nextchr = uwb->nextchr;
3865#ifdef DEBUGGING
3866 PL_regindent = uwb->regindent;
3867#endif
3868
3869 goto reenter;
3870 }
3871 /* NOT REACHED */
3872 default:
3873 Perl_croak(aTHX_ "regexp unwind memory corruption");
3874 }
3875 /* NOT REACHED */
3876 }
4633a7c4 3877#ifdef DEBUGGING
3280af22 3878 PL_regindent--;
4633a7c4 3879#endif
a0d0e21e 3880 return 0;
a687059c
LW
3881}
3882
3883/*
3884 - regrepeat - repeatedly match something simple, report how many
3885 */
3886/*
3887 * [This routine now assumes that it will only match on things of length 1.
3888 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3889 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3890 */
76e3520e 3891STATIC I32
cea2e8a9 3892S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3893{
a0d0e21e 3894 register char *scan;
a0d0e21e 3895 register I32 c;
3280af22 3896 register char *loceol = PL_regeol;
a0ed51b3 3897 register I32 hardcount = 0;
53c4c00c 3898 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3899
3280af22 3900 scan = PL_reginput;
c277df42 3901 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3902 loceol = scan + max;
a0d0e21e 3903 switch (OP(p)) {
22c35a8c 3904 case REG_ANY:
1aa99e6b 3905 if (do_utf8) {
ffc61ed2 3906 loceol = PL_regeol;
1aa99e6b 3907 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3908 scan += UTF8SKIP(scan);
3909 hardcount++;
3910 }
3911 } else {
3912 while (scan < loceol && *scan != '\n')
3913 scan++;
a0ed51b3
LW
3914 }
3915 break;
ffc61ed2 3916 case SANY:
def8e4ea
JH
3917 if (do_utf8) {
3918 loceol = PL_regeol;
a0804c9e 3919 while (scan < loceol && hardcount < max) {
def8e4ea
JH
3920 scan += UTF8SKIP(scan);
3921 hardcount++;
3922 }
3923 }
3924 else
3925 scan = loceol;
a0ed51b3 3926 break;
f33976b4
DB
3927 case CANY:
3928 scan = loceol;
3929 break;
bbce6d69 3930 case EXACT: /* length of string is 1 */
cd439c50 3931 c = (U8)*STRING(p);
bbce6d69 3932 while (scan < loceol && UCHARAT(scan) == c)
3933 scan++;
3934 break;
3935 case EXACTF: /* length of string is 1 */
cd439c50 3936 c = (U8)*STRING(p);
bbce6d69 3937 while (scan < loceol &&
22c35a8c 3938 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3939 scan++;
3940 break;
3941 case EXACTFL: /* length of string is 1 */
3280af22 3942 PL_reg_flags |= RF_tainted;
cd439c50 3943 c = (U8)*STRING(p);
bbce6d69 3944 while (scan < loceol &&
22c35a8c 3945 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3946 scan++;
3947 break;
3948 case ANYOF:
ffc61ed2
JH
3949 if (do_utf8) {
3950 loceol = PL_regeol;
1aa99e6b
IH
3951 while (hardcount < max && scan < loceol &&
3952 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3953 scan += UTF8SKIP(scan);
3954 hardcount++;
3955 }
3956 } else {
3957 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3958 scan++;
3959 }
a0d0e21e
LW
3960 break;
3961 case ALNUM:
1aa99e6b 3962 if (do_utf8) {
ffc61ed2 3963 loceol = PL_regeol;
8269fa76 3964 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3965 while (hardcount < max && scan < loceol &&
3568d838 3966 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3967 scan += UTF8SKIP(scan);
3968 hardcount++;
3969 }
3970 } else {
3971 while (scan < loceol && isALNUM(*scan))
3972 scan++;
a0ed51b3
LW
3973 }
3974 break;
bbce6d69 3975 case ALNUML:
3280af22 3976 PL_reg_flags |= RF_tainted;
1aa99e6b 3977 if (do_utf8) {
ffc61ed2 3978 loceol = PL_regeol;
1aa99e6b
IH
3979 while (hardcount < max && scan < loceol &&
3980 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3981 scan += UTF8SKIP(scan);
3982 hardcount++;
3983 }
3984 } else {
3985 while (scan < loceol && isALNUM_LC(*scan))
3986 scan++;
a0ed51b3
LW
3987 }
3988 break;
a0d0e21e 3989 case NALNUM:
1aa99e6b 3990 if (do_utf8) {
ffc61ed2 3991 loceol = PL_regeol;
8269fa76 3992 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3993 while (hardcount < max && scan < loceol &&
3568d838 3994 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3995 scan += UTF8SKIP(scan);
3996 hardcount++;
3997 }
3998 } else {
3999 while (scan < loceol && !isALNUM(*scan))
4000 scan++;
a0ed51b3
LW
4001 }
4002 break;
bbce6d69 4003 case NALNUML:
3280af22 4004 PL_reg_flags |= RF_tainted;
1aa99e6b 4005 if (do_utf8) {
ffc61ed2 4006 loceol = PL_regeol;
1aa99e6b
IH
4007 while (hardcount < max && scan < loceol &&
4008 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4009 scan += UTF8SKIP(scan);
4010 hardcount++;
4011 }
4012 } else {
4013 while (scan < loceol && !isALNUM_LC(*scan))
4014 scan++;
a0ed51b3
LW
4015 }
4016 break;
a0d0e21e 4017 case SPACE:
1aa99e6b 4018 if (do_utf8) {
ffc61ed2 4019 loceol = PL_regeol;
8269fa76 4020 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4021 while (hardcount < max && scan < loceol &&
3568d838
JH
4022 (*scan == ' ' ||
4023 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4024 scan += UTF8SKIP(scan);
4025 hardcount++;
4026 }
4027 } else {
4028 while (scan < loceol && isSPACE(*scan))
4029 scan++;
a0ed51b3
LW
4030 }
4031 break;
bbce6d69 4032 case SPACEL:
3280af22 4033 PL_reg_flags |= RF_tainted;
1aa99e6b 4034 if (do_utf8) {
ffc61ed2 4035 loceol = PL_regeol;
1aa99e6b 4036 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4037 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4038 scan += UTF8SKIP(scan);
4039 hardcount++;
4040 }
4041 } else {
4042 while (scan < loceol && isSPACE_LC(*scan))
4043 scan++;
a0ed51b3
LW
4044 }
4045 break;
a0d0e21e 4046 case NSPACE:
1aa99e6b 4047 if (do_utf8) {
ffc61ed2 4048 loceol = PL_regeol;
8269fa76 4049 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4050 while (hardcount < max && scan < loceol &&
3568d838
JH
4051 !(*scan == ' ' ||
4052 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4053 scan += UTF8SKIP(scan);
4054 hardcount++;
4055 }
4056 } else {
4057 while (scan < loceol && !isSPACE(*scan))
4058 scan++;
4059 break;
a0ed51b3 4060 }
bbce6d69 4061 case NSPACEL:
3280af22 4062 PL_reg_flags |= RF_tainted;
1aa99e6b 4063 if (do_utf8) {
ffc61ed2 4064 loceol = PL_regeol;
1aa99e6b 4065 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4066 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4067 scan += UTF8SKIP(scan);
4068 hardcount++;
4069 }
4070 } else {
4071 while (scan < loceol && !isSPACE_LC(*scan))
4072 scan++;
a0ed51b3
LW
4073 }
4074 break;
a0d0e21e 4075 case DIGIT:
1aa99e6b 4076 if (do_utf8) {
ffc61ed2 4077 loceol = PL_regeol;
8269fa76 4078 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4079 while (hardcount < max && scan < loceol &&
3568d838 4080 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4081 scan += UTF8SKIP(scan);
4082 hardcount++;
4083 }
4084 } else {
4085 while (scan < loceol && isDIGIT(*scan))
4086 scan++;
a0ed51b3
LW
4087 }
4088 break;
a0d0e21e 4089 case NDIGIT:
1aa99e6b 4090 if (do_utf8) {
ffc61ed2 4091 loceol = PL_regeol;
8269fa76 4092 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4093 while (hardcount < max && scan < loceol &&
3568d838 4094 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4095 scan += UTF8SKIP(scan);
4096 hardcount++;
4097 }
4098 } else {
4099 while (scan < loceol && !isDIGIT(*scan))
4100 scan++;
a0ed51b3
LW
4101 }
4102 break;
a0d0e21e
LW
4103 default: /* Called on something of 0 width. */
4104 break; /* So match right here or not at all. */
4105 }
a687059c 4106
a0ed51b3
LW
4107 if (hardcount)
4108 c = hardcount;
4109 else
4110 c = scan - PL_reginput;
3280af22 4111 PL_reginput = scan;
a687059c 4112
9041c2e3 4113 DEBUG_r(
c277df42
IZ
4114 {
4115 SV *prop = sv_newmortal();
4116
4117 regprop(prop, p);
9041c2e3
NIS
4118 PerlIO_printf(Perl_debug_log,
4119 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4120 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4121 });
9041c2e3 4122
a0d0e21e 4123 return(c);
a687059c
LW
4124}
4125
4126/*
c277df42 4127 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4128 *
c277df42
IZ
4129 * The repeater is supposed to have constant length.
4130 */
4131
76e3520e 4132STATIC I32
cea2e8a9 4133S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4134{
b7953727 4135 register char *scan = Nullch;
c277df42 4136 register char *start;
3280af22 4137 register char *loceol = PL_regeol;
a0ed51b3 4138 I32 l = 0;
708e3b05 4139 I32 count = 0, res = 1;
a0ed51b3
LW
4140
4141 if (!max)
4142 return 0;
c277df42 4143
3280af22 4144 start = PL_reginput;
53c4c00c 4145 if (PL_reg_match_utf8) {
708e3b05 4146 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4147 if (!count++) {
4148 l = 0;
4149 while (start < PL_reginput) {
4150 l++;
4151 start += UTF8SKIP(start);
4152 }
4153 *lp = l;
4154 if (l == 0)
4155 return max;
4156 }
4157 if (count == max)
4158 return count;
4159 }
4160 }
4161 else {
708e3b05 4162 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4163 if (!count++) {
4164 *lp = l = PL_reginput - start;
4165 if (max != REG_INFTY && l*max < loceol - scan)
4166 loceol = scan + l*max;
4167 if (l == 0)
4168 return max;
c277df42
IZ
4169 }
4170 }
4171 }
708e3b05 4172 if (!res)
3280af22 4173 PL_reginput = scan;
9041c2e3 4174
a0ed51b3 4175 return count;
c277df42
IZ
4176}
4177
4178/*
ffc61ed2
JH
4179- regclass_swash - prepare the utf8 swash
4180*/
4181
4182SV *
9e55ce06 4183Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4184{
9e55ce06
JH
4185 SV *sw = NULL;
4186 SV *si = NULL;
4187 SV *alt = NULL;
ffc61ed2
JH
4188
4189 if (PL_regdata && PL_regdata->count) {
4190 U32 n = ARG(node);
4191
4192 if (PL_regdata->what[n] == 's') {
4193 SV *rv = (SV*)PL_regdata->data[n];
4194 AV *av = (AV*)SvRV((SV*)rv);
9e55ce06 4195 SV **a, **b;
9041c2e3 4196
9e55ce06
JH
4197 /* See the end of regcomp.c:S_reglass() for
4198 * documentation of these array elements. */
4199
4200 si = *av_fetch(av, 0, FALSE);
4201 a = av_fetch(av, 1, FALSE);
4202 b = av_fetch(av, 2, FALSE);
9041c2e3 4203
ffc61ed2
JH
4204 if (a)
4205 sw = *a;
4206 else if (si && doinit) {
4207 sw = swash_init("utf8", "", si, 1, 0);
4208 (void)av_store(av, 1, sw);
4209 }
9e55ce06
JH
4210 if (b)
4211 alt = *b;
ffc61ed2
JH
4212 }
4213 }
4214
9e55ce06
JH
4215 if (listsvp)
4216 *listsvp = si;
4217 if (altsvp)
4218 *altsvp = alt;
ffc61ed2
JH
4219
4220 return sw;
4221}
4222
4223/*
832705d4
JH
4224 - reginclasslen - determine if a character falls into a character class
4225
4226 The n is the ANYOF regnode, the p is the target string, lenp
4227 is pointer to the maximum length of how far to go in the p
4228 (if the lenp is zero, UTF8SKIP(p) is used),
4229 do_utf8 tells whether the target string is in UTF-8.
4230
bbce6d69 4231 */
4232
76e3520e 4233STATIC bool
9e55ce06 4234S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4235{
ffc61ed2 4236 char flags = ANYOF_FLAGS(n);
bbce6d69 4237 bool match = FALSE;
1aa99e6b 4238 UV c;
3568d838 4239 STRLEN len = 0;
9e55ce06 4240 STRLEN plen;
1aa99e6b 4241
3568d838 4242 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4243
9e55ce06 4244 plen = lenp ? *lenp : UNISKIP(c);
ffc61ed2 4245 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4246 if (lenp)
4247 *lenp = 0;
ffc61ed2 4248 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4249 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4250 match = TRUE;
bbce6d69 4251 }
3568d838 4252 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4253 match = TRUE;
ffc61ed2 4254 if (!match) {
9e55ce06
JH
4255 AV *av;
4256 SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4257
4258 if (sw) {
3568d838 4259 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4260 match = TRUE;
4261 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4262 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4263 STRLEN tmplen;
4264
4265 if (!match && lenp && av) {
4266 I32 i;
4267
4268 for (i = 0; i <= av_len(av); i++) {
4269 SV* sv = *av_fetch(av, i, FALSE);
4270 STRLEN len;
4271 char *s = SvPV(sv, len);
4272
4273 if (len <= plen && memEQ(s, p, len)) {
4274 *lenp = len;
4275 match = TRUE;
4276 break;
4277 }
4278 }
4279 }
4280 if (!match) {
4281 to_utf8_fold(p, tmpbuf, &tmplen);
4282 if (swash_fetch(sw, tmpbuf, do_utf8))
4283 match = TRUE;
4284 }
4285 if (!match) {
4286 to_utf8_upper(p, tmpbuf, &tmplen);
4287 if (swash_fetch(sw, tmpbuf, do_utf8))
4288 match = TRUE;
4289 }
ffc61ed2
JH
4290 }
4291 }
bbce6d69 4292 }
9e55ce06
JH
4293 if (match && lenp && *lenp == 0)
4294 *lenp = UNISKIP(c);
bbce6d69 4295 }
1aa99e6b 4296 if (!match && c < 256) {
ffc61ed2
JH
4297 if (ANYOF_BITMAP_TEST(n, c))
4298 match = TRUE;
4299 else if (flags & ANYOF_FOLD) {
3568d838 4300 I32 f;
a0ed51b3 4301
ffc61ed2
JH
4302 if (flags & ANYOF_LOCALE) {
4303 PL_reg_flags |= RF_tainted;
4304 f = PL_fold_locale[c];
4305 }
4306 else
4307 f = PL_fold[c];
4308 if (f != c && ANYOF_BITMAP_TEST(n, f))
4309 match = TRUE;
4310 }
4311
4312 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4313 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4314 if (
4315 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4316 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4317 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4318 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4319 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4320 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4321 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4322 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4323 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4324 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4325 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4326 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4327 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4328 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4329 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4330 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4331 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4332 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4333 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4334 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4335 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4336 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4337 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4338 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4339 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4340 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4341 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4342 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4343 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4344 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4345 ) /* How's that for a conditional? */
4346 {
4347 match = TRUE;
4348 }
a0ed51b3 4349 }
a0ed51b3
LW
4350 }
4351
a0ed51b3
LW
4352 return (flags & ANYOF_INVERT) ? !match : match;
4353}
161b471a 4354
832705d4
JH
4355/*
4356 - reginclass - determine if a character falls into a character class
4357
4358 The n is the ANYOF regnode, the p is the target string, do_utf8 tells
4359 whether the target string is in UTF-8.
4360
4361 */
4362
9e55ce06
JH
4363STATIC bool
4364S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
4365{
4366 return S_reginclasslen(aTHX_ n, p, 0, do_utf8);
4367}
4368
dfe13c55 4369STATIC U8 *
cea2e8a9 4370S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4371{
1aa99e6b
IH
4372 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4373}
4374
4375STATIC U8 *
4376S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4377{
a0ed51b3 4378 if (off >= 0) {
1aa99e6b 4379 while (off-- && s < lim) {
ffc61ed2 4380 /* XXX could check well-formedness here */
a0ed51b3 4381 s += UTF8SKIP(s);
ffc61ed2 4382 }
a0ed51b3
LW
4383 }
4384 else {
4385 while (off++) {
1aa99e6b 4386 if (s > lim) {
a0ed51b3 4387 s--;
ffc61ed2 4388 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4389 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4390 s--;
ffc61ed2
JH
4391 }
4392 /* XXX could check well-formedness here */
a0ed51b3
LW
4393 }
4394 }
4395 }
4396 return s;
4397}
161b471a 4398
dfe13c55 4399STATIC U8 *
1aa99e6b 4400S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4401{
1aa99e6b
IH
4402 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4403}
4404
4405STATIC U8 *
4406S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4407{
4408 if (off >= 0) {
1aa99e6b 4409 while (off-- && s < lim) {
ffc61ed2 4410 /* XXX could check well-formedness here */
a0ed51b3 4411 s += UTF8SKIP(s);
ffc61ed2 4412 }
a0ed51b3
LW
4413 if (off >= 0)
4414 return 0;
4415 }
4416 else {
4417 while (off++) {
1aa99e6b 4418 if (s > lim) {
a0ed51b3 4419 s--;
ffc61ed2 4420 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4421 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4422 s--;
ffc61ed2
JH
4423 }
4424 /* XXX could check well-formedness here */
a0ed51b3
LW
4425 }
4426 else
4427 break;
4428 }
4429 if (off <= 0)
4430 return 0;
4431 }
4432 return s;
4433}
51371543 4434
51371543 4435static void
acfe0abc 4436restore_pos(pTHX_ void *arg)
51371543 4437{
51371543
GS
4438 if (PL_reg_eval_set) {
4439 if (PL_reg_oldsaved) {
4440 PL_reg_re->subbeg = PL_reg_oldsaved;
4441 PL_reg_re->sublen = PL_reg_oldsavedlen;
4442 RX_MATCH_COPIED_on(PL_reg_re);
4443 }
4444 PL_reg_magic->mg_len = PL_reg_oldpos;
4445 PL_reg_eval_set = 0;
4446 PL_curpm = PL_reg_oldcurpm;
4447 }
4448}