This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Win32 status - progress !
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
e54858b0 114 (PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
a84d97b6
HS
143/*
144 Search for mandatory following text node; for lookahead, the text must
145 follow but for lookbehind (rn->flags != 0) we skip to the next step.
146*/
cca55fe3 147#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 148 while (JUMPABLE(rn)) \
a84d97b6 149 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 150 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
151 else if (OP(rn) == PLUS) \
152 rn = NEXTOPER(rn); \
a84d97b6
HS
153 else if (OP(rn) == IFMATCH) \
154 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 155 else rn += NEXT_OFF(rn); \
5f80c4cf 156} STMT_END
74750237 157
acfe0abc 158static void restore_pos(pTHX_ void *arg);
51371543 159
76e3520e 160STATIC CHECKPOINT
cea2e8a9 161S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 162{
3280af22 163 int retval = PL_savestack_ix;
b1ce53c5
JH
164#define REGCP_PAREN_ELEMS 4
165 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
166 int p;
167
e49a9654
IH
168 if (paren_elems_to_push < 0)
169 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
170
a01268b5 171#define REGCP_OTHER_ELEMS 6
b1ce53c5 172 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 173 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 174/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
175 SSPUSHINT(PL_regendp[p]);
176 SSPUSHINT(PL_regstartp[p]);
3280af22 177 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
178 SSPUSHINT(p);
179 }
b1ce53c5 180/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
181 SSPUSHINT(PL_regsize);
182 SSPUSHINT(*PL_reglastparen);
a01268b5 183 SSPUSHINT(*PL_reglastcloseparen);
3280af22 184 SSPUSHPTR(PL_reginput);
41123dfd
JH
185#define REGCP_FRAME_ELEMS 2
186/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
187 * are needed for the regexp context stack bookkeeping. */
188 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 189 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 190
a0d0e21e
LW
191 return retval;
192}
193
c277df42 194/* These are needed since we do not localize EVAL nodes: */
02db2b7b 195# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 197 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 198
02db2b7b 199# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 200 PerlIO_printf(Perl_debug_log, \
faccc32b 201 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 202 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 203
76e3520e 204STATIC char *
cea2e8a9 205S_regcppop(pTHX)
a0d0e21e 206{
b1ce53c5 207 I32 i;
a0d0e21e
LW
208 U32 paren = 0;
209 char *input;
cf93c79d 210 I32 tmps;
b1ce53c5
JH
211
212 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 213 i = SSPOPINT;
b1ce53c5
JH
214 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
215 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 216 input = (char *) SSPOPPTR;
a01268b5 217 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
218 *PL_reglastparen = SSPOPINT;
219 PL_regsize = SSPOPINT;
b1ce53c5
JH
220
221 /* Now restore the parentheses context. */
41123dfd
JH
222 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
223 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 224 paren = (U32)SSPOPINT;
3280af22 225 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
226 PL_regstartp[paren] = SSPOPINT;
227 tmps = SSPOPINT;
3280af22
NIS
228 if (paren <= *PL_reglastparen)
229 PL_regendp[paren] = tmps;
c277df42 230 DEBUG_r(
c3464db5 231 PerlIO_printf(Perl_debug_log,
b900a521 232 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 233 (UV)paren, (IV)PL_regstartp[paren],
b900a521 234 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 235 (IV)PL_regendp[paren],
3280af22 236 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 237 );
a0d0e21e 238 }
c277df42 239 DEBUG_r(
3280af22 240 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 241 PerlIO_printf(Perl_debug_log,
faccc32b
JH
242 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
243 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
244 }
245 );
daf18116 246#if 1
dafc8851
JH
247 /* It would seem that the similar code in regtry()
248 * already takes care of this, and in fact it is in
249 * a better location to since this code can #if 0-ed out
250 * but the code in regtry() is needed or otherwise tests
251 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
252 * (as of patchlevel 7877) will fail. Then again,
253 * this code seems to be necessary or otherwise
254 * building DynaLoader will fail:
255 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
256 * --jhi */
3280af22
NIS
257 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
258 if (paren > PL_regsize)
cf93c79d
IZ
259 PL_regstartp[paren] = -1;
260 PL_regendp[paren] = -1;
a0d0e21e 261 }
dafc8851 262#endif
a0d0e21e
LW
263 return input;
264}
265
0f5d15d6 266STATIC char *
cea2e8a9 267S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
268{
269 I32 tmp = PL_savestack_ix;
270
271 PL_savestack_ix = ss;
272 regcppop();
273 PL_savestack_ix = tmp;
942e002e 274 return Nullch;
0f5d15d6
IZ
275}
276
277typedef struct re_cc_state
278{
279 I32 ss;
280 regnode *node;
281 struct re_cc_state *prev;
282 CURCUR *cc;
283 regexp *re;
284} re_cc_state;
285
02db2b7b 286#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 287
29d1e993
HS
288#define TRYPAREN(paren, n, input) { \
289 if (paren) { \
290 if (n) { \
291 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
292 PL_regendp[paren] = input - PL_bostr; \
293 } \
294 else \
295 PL_regendp[paren] = -1; \
296 } \
297 if (regmatch(next)) \
298 sayYES; \
299 if (paren && n) \
300 PL_regendp[paren] = -1; \
301}
302
303
a687059c 304/*
e50aee73 305 * pregexec and friends
a687059c
LW
306 */
307
308/*
c277df42 309 - pregexec - match a regexp against a string
a687059c 310 */
c277df42 311I32
864dbfa3 312Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 313 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
314/* strend: pointer to null at end of string */
315/* strbeg: real beginning of string */
316/* minend: end of match must be >=minend after stringarg. */
317/* nosave: For optimizations. */
318{
319 return
9041c2e3 320 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
321 nosave ? 0 : REXEC_COPY_STR);
322}
0f5d15d6
IZ
323
324STATIC void
cea2e8a9 325S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
326{
327 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
328#ifdef DEBUGGING
329 PL_regprogram = prog->program;
330#endif
331 PL_regnpar = prog->nparens;
9041c2e3
NIS
332 PL_regdata = prog->data;
333 PL_reg_re = prog;
0f5d15d6 334}
22e551b9 335
9041c2e3 336/*
cad2e5aa
JH
337 * Need to implement the following flags for reg_anch:
338 *
339 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
340 * USE_INTUIT_ML
341 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
342 * INTUIT_AUTORITATIVE_ML
343 * INTUIT_ONCE_NOML - Intuit can match in one location only.
344 * INTUIT_ONCE_ML
345 *
346 * Another flag for this function: SECOND_TIME (so that float substrs
347 * with giant delta may be not rechecked).
348 */
349
350/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
351
2c2d71f5 352/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
353 Otherwise, only SvCUR(sv) is used to get strbeg. */
354
355/* XXXX We assume that strpos is strbeg unless sv. */
356
6eb5f6b9
JH
357/* XXXX Some places assume that there is a fixed substring.
358 An update may be needed if optimizer marks as "INTUITable"
359 RExen without fixed substrings. Similarly, it is assumed that
360 lengths of all the strings are no more than minlen, thus they
361 cannot come from lookahead.
362 (Or minlen should take into account lookahead.) */
363
2c2d71f5
JH
364/* A failure to find a constant substring means that there is no need to make
365 an expensive call to REx engine, thus we celebrate a failure. Similarly,
366 finding a substring too deep into the string means that less calls to
30944b6d
IZ
367 regtry() should be needed.
368
369 REx compiler's optimizer found 4 possible hints:
370 a) Anchored substring;
371 b) Fixed substring;
372 c) Whether we are anchored (beginning-of-line or \G);
373 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 374 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
375 string which does not contradict any of them.
376 */
2c2d71f5 377
6eb5f6b9
JH
378/* Most of decisions we do here should have been done at compile time.
379 The nodes of the REx which we used for the search should have been
380 deleted from the finite automaton. */
381
cad2e5aa
JH
382char *
383Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
384 char *strend, U32 flags, re_scream_pos_data *data)
385{
b7953727 386 register I32 start_shift = 0;
cad2e5aa 387 /* Should be nonnegative! */
b7953727 388 register I32 end_shift = 0;
2c2d71f5
JH
389 register char *s;
390 register SV *check;
a1933d95 391 char *strbeg;
cad2e5aa
JH
392 char *t;
393 I32 ml_anch;
6eb5f6b9 394 register char *other_last = Nullch; /* other substr checked before this */
b7953727 395 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
396#ifdef DEBUGGING
397 char *i_strpos = strpos;
ce333219 398 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 399#endif
cad2e5aa 400
b8d68ded
JH
401 if (prog->reganch & ROPT_UTF8) {
402 DEBUG_r(PerlIO_printf(Perl_debug_log,
403 "UTF-8 regex...\n"));
404 PL_reg_flags |= RF_utf8;
405 }
406
2a782b5b 407 DEBUG_r({
b8d68ded 408 char *s = PL_reg_match_utf8 ?
c728cb41
JH
409 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
410 strpos;
b8d68ded
JH
411 int len = PL_reg_match_utf8 ?
412 strlen(s) : strend - strpos;
2a782b5b
JH
413 if (!PL_colorset)
414 reginitcolors();
b8d68ded
JH
415 if (PL_reg_match_utf8)
416 DEBUG_r(PerlIO_printf(Perl_debug_log,
417 "UTF-8 target...\n"));
2a782b5b
JH
418 PerlIO_printf(Perl_debug_log,
419 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
420 PL_colors[4],PL_colors[5],PL_colors[0],
421 prog->precomp,
422 PL_colors[1],
423 (strlen(prog->precomp) > 60 ? "..." : ""),
424 PL_colors[0],
425 (int)(len > 60 ? 60 : len),
426 s, PL_colors[1],
427 (len > 60 ? "..." : "")
428 );
429 });
cad2e5aa 430
1aa99e6b 431 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
432 DEBUG_r(PerlIO_printf(Perl_debug_log,
433 "String too short... [re_intuit_start]\n"));
cad2e5aa 434 goto fail;
2c2d71f5 435 }
a1933d95 436 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 437 PL_regeol = strend;
653099ff 438 check = prog->check_substr;
2c2d71f5 439 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
440 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
441 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 442 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 443
7e25d62c
JH
444 if (!ml_anch) {
445 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
446 | ROPT_IMPLICIT)) /* not a real BOL */
447 /* SvCUR is not set on references: SvRV and SvPVX overlap */
448 && sv && !SvROK(sv)
449 && (strpos != strbeg)) {
450 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
451 goto fail;
452 }
453 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 454 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 455 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
456 I32 slen;
457
1aa99e6b 458 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
459 if (SvTAIL(check)) {
460 slen = SvCUR(check); /* >= 1 */
cad2e5aa 461
9041c2e3 462 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
463 || (strend - s == slen && strend[-1] != '\n')) {
464 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
465 goto fail_finish;
cad2e5aa
JH
466 }
467 /* Now should match s[0..slen-2] */
468 slen--;
653099ff 469 if (slen && (*SvPVX(check) != *s
cad2e5aa 470 || (slen > 1
653099ff 471 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
472 report_neq:
473 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
474 goto fail_finish;
475 }
cad2e5aa 476 }
653099ff
GS
477 else if (*SvPVX(check) != *s
478 || ((slen = SvCUR(check)) > 1
479 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
480 goto report_neq;
481 goto success_at_start;
7e25d62c 482 }
cad2e5aa 483 }
2c2d71f5 484 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 485 s = strpos;
2c2d71f5 486 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 487 end_shift = prog->minlen - start_shift -
653099ff 488 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 489 if (!ml_anch) {
653099ff
GS
490 I32 end = prog->check_offset_max + CHR_SVLEN(check)
491 - (SvTAIL(check) != 0);
1aa99e6b 492 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
493
494 if (end_shift < eshift)
495 end_shift = eshift;
496 }
cad2e5aa 497 }
2c2d71f5 498 else { /* Can match at random position */
cad2e5aa
JH
499 ml_anch = 0;
500 s = strpos;
2c2d71f5
JH
501 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
502 /* Should be nonnegative! */
503 end_shift = prog->minlen - start_shift -
653099ff 504 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
505 }
506
2c2d71f5 507#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 508 if (end_shift < 0)
6bbae5e6 509 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
510#endif
511
2c2d71f5
JH
512 restart:
513 /* Find a possible match in the region s..strend by looking for
514 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 515 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
516 I32 p = -1; /* Internal iterator of scream. */
517 I32 *pp = data ? data->scream_pos : &p;
518
2c2d71f5
JH
519 if (PL_screamfirst[BmRARE(check)] >= 0
520 || ( BmRARE(check) == '\n'
521 && (BmPREVIOUS(check) == SvCUR(check) - 1)
522 && SvTAIL(check) ))
9041c2e3 523 s = screaminstr(sv, check,
2c2d71f5 524 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 525 else
2c2d71f5 526 goto fail_finish;
cad2e5aa
JH
527 if (data)
528 *data->scream_olds = s;
529 }
f33976b4 530 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
531 s = fbm_instr((U8*)(s + start_shift),
532 (U8*)(strend - end_shift),
533 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 534 else
1aa99e6b
IH
535 s = fbm_instr(HOP3(s, start_shift, strend),
536 HOP3(strend, -end_shift, strbeg),
2c2d71f5 537 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
538
539 /* Update the count-of-usability, remove useless subpatterns,
540 unshift s. */
2c2d71f5
JH
541
542 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
543 (s ? "Found" : "Did not find"),
544 ((check == prog->anchored_substr) ? "anchored" : "floating"),
545 PL_colors[0],
7b0972df
JH
546 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
547 SvPVX(check),
2c2d71f5
JH
548 PL_colors[1], (SvTAIL(check) ? "$" : ""),
549 (s ? " at offset " : "...\n") ) );
550
551 if (!s)
552 goto fail_finish;
553
6eb5f6b9
JH
554 check_at = s;
555
2c2d71f5 556 /* Finish the diagnostic message */
30944b6d 557 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
558
559 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
560 Start with the other substr.
561 XXXX no SCREAM optimization yet - and a very coarse implementation
562 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
563 *always* match. Probably should be marked during compile...
564 Probably it is right to do no SCREAM here...
565 */
566
567 if (prog->float_substr && prog->anchored_substr) {
30944b6d 568 /* Take into account the "other" substring. */
2c2d71f5
JH
569 /* XXXX May be hopelessly wrong for UTF... */
570 if (!other_last)
6eb5f6b9 571 other_last = strpos;
2c2d71f5 572 if (check == prog->float_substr) {
30944b6d
IZ
573 do_other_anchored:
574 {
1aa99e6b 575 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
576 char *s1 = s;
577
2c2d71f5
JH
578 t = s - prog->check_offset_max;
579 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
580 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 581 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 582 && t > strpos)))
30944b6d 583 /* EMPTY */;
2c2d71f5
JH
584 else
585 t = strpos;
1aa99e6b 586 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
587 if (t < other_last) /* These positions already checked */
588 t = other_last;
1aa99e6b 589 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
590 if (last < last1)
591 last1 = last;
592 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
593 /* On end-of-str: see comment below. */
594 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
595 HOP3(HOP3(last1, prog->anchored_offset, strend)
596 + SvCUR(prog->anchored_substr),
597 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
598 prog->anchored_substr,
599 PL_multiline ? FBMrf_MULTILINE : 0);
600 DEBUG_r(PerlIO_printf(Perl_debug_log,
601 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
602 (s ? "Found" : "Contradicts"),
603 PL_colors[0],
7b0972df
JH
604 (int)(SvCUR(prog->anchored_substr)
605 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
606 SvPVX(prog->anchored_substr),
607 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
608 if (!s) {
609 if (last1 >= last2) {
610 DEBUG_r(PerlIO_printf(Perl_debug_log,
611 ", giving up...\n"));
612 goto fail_finish;
613 }
614 DEBUG_r(PerlIO_printf(Perl_debug_log,
615 ", trying floating at offset %ld...\n",
1aa99e6b
IH
616 (long)(HOP3c(s1, 1, strend) - i_strpos)));
617 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
618 s = HOP3c(last, 1, strend);
2c2d71f5
JH
619 goto restart;
620 }
621 else {
622 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 623 (long)(s - i_strpos)));
1aa99e6b
IH
624 t = HOP3c(s, -prog->anchored_offset, strbeg);
625 other_last = HOP3c(s, 1, strend);
30944b6d 626 s = s1;
2c2d71f5
JH
627 if (t == strpos)
628 goto try_at_start;
2c2d71f5
JH
629 goto try_at_offset;
630 }
30944b6d 631 }
2c2d71f5
JH
632 }
633 else { /* Take into account the floating substring. */
634 char *last, *last1;
635 char *s1 = s;
636
1aa99e6b
IH
637 t = HOP3c(s, -start_shift, strbeg);
638 last1 = last =
639 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
640 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
641 last = HOP3c(t, prog->float_max_offset, strend);
642 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
643 if (s < other_last)
644 s = other_last;
2c2d71f5
JH
645 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
646 /* fbm_instr() takes into account exact value of end-of-str
647 if the check is SvTAIL(ed). Since false positives are OK,
648 and end-of-str is not later than strend we are OK. */
649 s = fbm_instr((unsigned char*)s,
650 (unsigned char*)last + SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0),
652 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
653 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
654 (s ? "Found" : "Contradicts"),
655 PL_colors[0],
7b0972df
JH
656 (int)(SvCUR(prog->float_substr)
657 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
658 SvPVX(prog->float_substr),
659 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
660 if (!s) {
661 if (last1 == last) {
662 DEBUG_r(PerlIO_printf(Perl_debug_log,
663 ", giving up...\n"));
664 goto fail_finish;
665 }
666 DEBUG_r(PerlIO_printf(Perl_debug_log,
667 ", trying anchored starting at offset %ld...\n",
30944b6d 668 (long)(s1 + 1 - i_strpos)));
803ff556 669 other_last = last;
1aa99e6b 670 s = HOP3c(t, 1, strend);
2c2d71f5
JH
671 goto restart;
672 }
673 else {
674 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 675 (long)(s - i_strpos)));
803ff556 676 other_last = s; /* Fix this later. --Hugo */
30944b6d 677 s = s1;
2c2d71f5
JH
678 if (t == strpos)
679 goto try_at_start;
2c2d71f5
JH
680 goto try_at_offset;
681 }
682 }
cad2e5aa 683 }
2c2d71f5
JH
684
685 t = s - prog->check_offset_max;
2c2d71f5
JH
686 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
687 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
688 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
689 && t > strpos))) {
2c2d71f5
JH
690 /* Fixed substring is found far enough so that the match
691 cannot start at strpos. */
692 try_at_offset:
cad2e5aa 693 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
694 /* Eventually fbm_*() should handle this, but often
695 anchored_offset is not 0, so this check will not be wasted. */
696 /* XXXX In the code below we prefer to look for "^" even in
697 presence of anchored substrings. And we search even
698 beyond the found float position. These pessimizations
699 are historical artefacts only. */
700 find_anchor:
2c2d71f5 701 while (t < strend - prog->minlen) {
cad2e5aa 702 if (*t == '\n') {
4ee3650e 703 if (t < check_at - prog->check_offset_min) {
30944b6d 704 if (prog->anchored_substr) {
4ee3650e
GS
705 /* Since we moved from the found position,
706 we definitely contradict the found anchored
30944b6d
IZ
707 substr. Due to the above check we do not
708 contradict "check" substr.
709 Thus we can arrive here only if check substr
710 is float. Redo checking for "other"=="fixed".
711 */
9041c2e3 712 strpos = t + 1;
30944b6d
IZ
713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
714 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
715 goto do_other_anchored;
716 }
4ee3650e
GS
717 /* We don't contradict the found floating substring. */
718 /* XXXX Why not check for STCLASS? */
cad2e5aa 719 s = t + 1;
2c2d71f5 720 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 721 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
722 goto set_useful;
723 }
4ee3650e
GS
724 /* Position contradicts check-string */
725 /* XXXX probably better to look for check-string
726 than for "\n", so one should lower the limit for t? */
727 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 728 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 729 other_last = strpos = s = t + 1;
cad2e5aa
JH
730 goto restart;
731 }
732 t++;
733 }
2c2d71f5
JH
734 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
735 PL_colors[0],PL_colors[1]));
736 goto fail_finish;
cad2e5aa 737 }
f5952150
GS
738 else {
739 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
740 PL_colors[0],PL_colors[1]));
741 }
cad2e5aa
JH
742 s = t;
743 set_useful:
2c2d71f5 744 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
745 }
746 else {
f5952150 747 /* The found string does not prohibit matching at strpos,
2c2d71f5 748 - no optimization of calling REx engine can be performed,
f5952150
GS
749 unless it was an MBOL and we are not after MBOL,
750 or a future STCLASS check will fail this. */
2c2d71f5
JH
751 try_at_start:
752 /* Even in this situation we may use MBOL flag if strpos is offset
753 wrt the start of the string. */
05b4157f 754 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 755 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
756 /* May be due to an implicit anchor of m{.*foo} */
757 && !(prog->reganch & ROPT_IMPLICIT))
758 {
cad2e5aa
JH
759 t = strpos;
760 goto find_anchor;
761 }
30944b6d 762 DEBUG_r( if (ml_anch)
f5952150
GS
763 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
764 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 765 );
2c2d71f5 766 success_at_start:
30944b6d 767 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 768 && prog->check_substr /* Could be deleted already */
cad2e5aa 769 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
770 && prog->check_substr == prog->float_substr)
771 {
cad2e5aa 772 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
774 SvREFCNT_dec(prog->check_substr);
775 prog->check_substr = Nullsv; /* disable */
776 prog->float_substr = Nullsv; /* clear */
5e39e1e5 777 check = Nullsv; /* abort */
cad2e5aa 778 s = strpos;
3cf5c195
IZ
779 /* XXXX This is a remnant of the old implementation. It
780 looks wasteful, since now INTUIT can use many
6eb5f6b9 781 other heuristics. */
cad2e5aa
JH
782 prog->reganch &= ~RE_USE_INTUIT;
783 }
784 else
785 s = strpos;
786 }
787
6eb5f6b9
JH
788 /* Last resort... */
789 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
790 if (prog->regstclass) {
791 /* minlen == 0 is possible if regstclass is \b or \B,
792 and the fixed substr is ''$.
793 Since minlen is already taken into account, s+1 is before strend;
794 accidentally, minlen >= 1 guaranties no false positives at s + 1
795 even for \b or \B. But (minlen? 1 : 0) below assumes that
796 regstclass does not come from lookahead... */
797 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
798 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 799 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 800 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 801 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 802 : 1);
6eb5f6b9 803 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
804 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
805 : (prog->float_substr
806 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
807 cl_l, strend)
808 : strend);
a1933d95 809 char *startpos = strbeg;
6eb5f6b9
JH
810
811 t = s;
76384e4a 812 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 813 PL_regdata = prog->data;
76384e4a
GS
814 PL_bostr = startpos;
815 }
f33976b4 816 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
817 if (!s) {
818#ifdef DEBUGGING
b7953727 819 char *what = 0;
6eb5f6b9
JH
820#endif
821 if (endpos == strend) {
822 DEBUG_r( PerlIO_printf(Perl_debug_log,
823 "Could not match STCLASS...\n") );
824 goto fail;
825 }
66e933ab
GS
826 DEBUG_r( PerlIO_printf(Perl_debug_log,
827 "This position contradicts STCLASS...\n") );
653099ff
GS
828 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
829 goto fail;
6eb5f6b9
JH
830 /* Contradict one of substrings */
831 if (prog->anchored_substr) {
6eb5f6b9
JH
832 if (prog->anchored_substr == check) {
833 DEBUG_r( what = "anchored" );
834 hop_and_restart:
1aa99e6b 835 s = HOP3c(t, 1, strend);
66e933ab
GS
836 if (s + start_shift + end_shift > strend) {
837 /* XXXX Should be taken into account earlier? */
838 DEBUG_r( PerlIO_printf(Perl_debug_log,
839 "Could not match STCLASS...\n") );
840 goto fail;
841 }
5e39e1e5
HS
842 if (!check)
843 goto giveup;
6eb5f6b9 844 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 845 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
846 what, (long)(s + start_shift - i_strpos)) );
847 goto restart;
848 }
66e933ab 849 /* Have both, check_string is floating */
6eb5f6b9
JH
850 if (t + start_shift >= check_at) /* Contradicts floating=check */
851 goto retry_floating_check;
852 /* Recheck anchored substring, but not floating... */
9041c2e3 853 s = check_at;
5e39e1e5
HS
854 if (!check)
855 goto giveup;
6eb5f6b9 856 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 857 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
858 (long)(other_last - i_strpos)) );
859 goto do_other_anchored;
860 }
60e71179
GS
861 /* Another way we could have checked stclass at the
862 current position only: */
863 if (ml_anch) {
864 s = t = t + 1;
5e39e1e5
HS
865 if (!check)
866 goto giveup;
60e71179 867 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
868 "Looking for /%s^%s/m starting at offset %ld...\n",
869 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 870 goto try_at_offset;
66e933ab 871 }
60e71179
GS
872 if (!prog->float_substr) /* Could have been deleted */
873 goto fail;
6eb5f6b9
JH
874 /* Check is floating subtring. */
875 retry_floating_check:
876 t = check_at - start_shift;
877 DEBUG_r( what = "floating" );
878 goto hop_and_restart;
879 }
b7953727
JH
880 if (t != s) {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 882 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
883 (long)(t - i_strpos), (long)(s - i_strpos))
884 );
885 }
886 else {
887 DEBUG_r(PerlIO_printf(Perl_debug_log,
888 "Does not contradict STCLASS...\n");
889 );
890 }
6eb5f6b9 891 }
5e39e1e5
HS
892 giveup:
893 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
894 PL_colors[4], (check ? "Guessed" : "Giving up"),
895 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 896 return s;
2c2d71f5
JH
897
898 fail_finish: /* Substring not found */
66e933ab
GS
899 if (prog->check_substr) /* could be removed already */
900 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 901 fail:
2c2d71f5 902 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
903 PL_colors[4],PL_colors[5]));
904 return Nullch;
905}
9661b544 906
6eb5f6b9 907/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
908STATIC char *
909S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 910{
6eb5f6b9
JH
911 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
912 char *m;
d8093b23
G
913 STRLEN ln;
914 unsigned int c1;
915 unsigned int c2;
6eb5f6b9
JH
916 char *e;
917 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 918 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 919
6eb5f6b9
JH
920 /* We know what class it must start with. */
921 switch (OP(c)) {
6eb5f6b9 922 case ANYOF:
a0ed51b3 923 while (s < strend) {
e0f9d4a8
JH
924 STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
925
926 if (reginclass(c, (U8*)s, do_utf8) ||
927 (ANYOF_UNICODE_FOLD_SHARP_S(c, s, strend) &&
928 (skip = 2))) {
6eb5f6b9
JH
929 if (tmp && (norun || regtry(prog, s)))
930 goto got_it;
931 else
932 tmp = doevery;
a0ed51b3 933 }
e0f9d4a8
JH
934 else
935 tmp = 1;
936 s += skip;
a0d0e21e 937 }
6eb5f6b9 938 break;
f33976b4
DB
939 case CANY:
940 while (s < strend) {
941 if (tmp && (norun || regtry(prog, s)))
942 goto got_it;
943 else
944 tmp = doevery;
945 s++;
946 }
947 break;
6eb5f6b9
JH
948 case EXACTF:
949 m = STRING(c);
950 ln = STR_LEN(c);
1aa99e6b 951 if (UTF) {
a2a2844f 952 STRLEN ulen1, ulen2;
e7ae6809
JH
953 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
954 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
955
956 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
957 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
958
959 c1 = utf8_to_uvuni(tmpbuf1, 0);
960 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
961 }
962 else {
963 c1 = *(U8*)m;
964 c2 = PL_fold[c1];
965 }
6eb5f6b9
JH
966 goto do_exactf;
967 case EXACTFL:
968 m = STRING(c);
969 ln = STR_LEN(c);
d8093b23 970 c1 = *(U8*)m;
6eb5f6b9
JH
971 c2 = PL_fold_locale[c1];
972 do_exactf:
93c50829 973 e = do_utf8 ? s + ln : strend - ln;
b3c9acc1 974
6eb5f6b9
JH
975 if (norun && e < s)
976 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 977
60a8b682
JH
978 /* The idea in the EXACTF* cases is to first find the
979 * first character of the EXACTF* node and then, if
980 * necessary, case-insensitively compare the full
981 * text of the node. The c1 and c2 are the first
982 * characters (though in Unicode it gets a bit
983 * more complicated because there are more cases
984 * than just upper and lower: one is really supposed
985 * to use the so-called folding case for case-insensitive
986 * matching (called "loose matching" in Unicode). */
987
1aa99e6b 988 if (do_utf8) {
575cac57
JH
989 UV c, f;
990 U8 tmpbuf [UTF8_MAXLEN+1];
991 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
992 STRLEN len, foldlen;
d7f013c8 993
09091399 994 if (c1 == c2) {
1aa99e6b 995 while (s <= e) {
575cac57 996 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
997 if ( c == c1
998 && (ln == len ||
66423254
JH
999 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1000 m, (char **)0, ln, UTF))
55da9344 1001 && (norun || regtry(prog, s)) )
1aa99e6b 1002 goto got_it;
80aecb99
JH
1003 else {
1004 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1005 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1006 if ( f != c
1007 && (f == c1 || f == c2)
1008 && (ln == foldlen ||
66423254
JH
1009 !ibcmp_utf8((char *) foldbuf,
1010 (char **)0, foldlen, do_utf8,
d07ddd77 1011 m,
66423254 1012 (char **)0, ln, UTF))
80aecb99
JH
1013 && (norun || regtry(prog, s)) )
1014 goto got_it;
1015 }
1aa99e6b
IH
1016 s += len;
1017 }
09091399
JH
1018 }
1019 else {
1aa99e6b 1020 while (s <= e) {
575cac57 1021 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1022
60a8b682 1023 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1024 * Note that not all the possible combinations
1025 * are handled here: some of them are handled
1026 * by the standard folding rules, and some of
1027 * them (the character class or ANYOF cases)
1028 * are handled during compiletime in
1029 * regexec.c:S_regclass(). */
880bd946
JH
1030 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1031 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1032 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1033
1034 if ( (c == c1 || c == c2)
1035 && (ln == len ||
66423254
JH
1036 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1037 m, (char **)0, ln, UTF))
55da9344 1038 && (norun || regtry(prog, s)) )
1aa99e6b 1039 goto got_it;
80aecb99
JH
1040 else {
1041 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1042 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1043 if ( f != c
1044 && (f == c1 || f == c2)
1045 && (ln == foldlen ||
1046 !ibcmp_utf8((char *)foldbuf,
66423254 1047 (char **)0, foldlen, do_utf8,
d07ddd77 1048 m,
66423254 1049 (char **)0, ln, UTF))
80aecb99
JH
1050 && (norun || regtry(prog, s)) )
1051 goto got_it;
1052 }
1aa99e6b
IH
1053 s += len;
1054 }
09091399 1055 }
1aa99e6b
IH
1056 }
1057 else {
1058 if (c1 == c2)
1059 while (s <= e) {
1060 if ( *(U8*)s == c1
1061 && (ln == 1 || !(OP(c) == EXACTF
1062 ? ibcmp(s, m, ln)
1063 : ibcmp_locale(s, m, ln)))
1064 && (norun || regtry(prog, s)) )
1065 goto got_it;
1066 s++;
1067 }
1068 else
1069 while (s <= e) {
1070 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1071 && (ln == 1 || !(OP(c) == EXACTF
1072 ? ibcmp(s, m, ln)
1073 : ibcmp_locale(s, m, ln)))
1074 && (norun || regtry(prog, s)) )
1075 goto got_it;
1076 s++;
1077 }
b3c9acc1
IZ
1078 }
1079 break;
bbce6d69 1080 case BOUNDL:
3280af22 1081 PL_reg_flags |= RF_tainted;
bbce6d69 1082 /* FALL THROUGH */
a0d0e21e 1083 case BOUND:
ffc61ed2 1084 if (do_utf8) {
12d33761 1085 if (s == PL_bostr)
ffc61ed2
JH
1086 tmp = '\n';
1087 else {
1aa99e6b 1088 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1089
0064a8a9
JH
1090 if (s > (char*)r)
1091 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1092 }
1093 tmp = ((OP(c) == BOUND ?
9041c2e3 1094 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1095 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1096 while (s < strend) {
1097 if (tmp == !(OP(c) == BOUND ?
3568d838 1098 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1099 isALNUM_LC_utf8((U8*)s)))
1100 {
1101 tmp = !tmp;
1102 if ((norun || regtry(prog, s)))
1103 goto got_it;
1104 }
1105 s += UTF8SKIP(s);
a687059c 1106 }
a0d0e21e 1107 }
667bb95a 1108 else {
12d33761 1109 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1110 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1111 while (s < strend) {
1112 if (tmp ==
1113 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1114 tmp = !tmp;
1115 if ((norun || regtry(prog, s)))
1116 goto got_it;
1117 }
1118 s++;
a0ed51b3 1119 }
a0ed51b3 1120 }
6eb5f6b9 1121 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1122 goto got_it;
1123 break;
bbce6d69 1124 case NBOUNDL:
3280af22 1125 PL_reg_flags |= RF_tainted;
bbce6d69 1126 /* FALL THROUGH */
a0d0e21e 1127 case NBOUND:
ffc61ed2 1128 if (do_utf8) {
12d33761 1129 if (s == PL_bostr)
ffc61ed2
JH
1130 tmp = '\n';
1131 else {
1aa99e6b 1132 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1133
0064a8a9
JH
1134 if (s > (char*)r)
1135 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1136 }
1137 tmp = ((OP(c) == NBOUND ?
9041c2e3 1138 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1139 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1140 while (s < strend) {
1141 if (tmp == !(OP(c) == NBOUND ?
3568d838 1142 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1143 isALNUM_LC_utf8((U8*)s)))
1144 tmp = !tmp;
1145 else if ((norun || regtry(prog, s)))
1146 goto got_it;
1147 s += UTF8SKIP(s);
1148 }
a0d0e21e 1149 }
667bb95a 1150 else {
12d33761 1151 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1152 tmp = ((OP(c) == NBOUND ?
1153 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1154 while (s < strend) {
1155 if (tmp ==
1156 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1157 tmp = !tmp;
1158 else if ((norun || regtry(prog, s)))
1159 goto got_it;
1160 s++;
1161 }
a0ed51b3 1162 }
6eb5f6b9 1163 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1164 goto got_it;
1165 break;
a0d0e21e 1166 case ALNUM:
ffc61ed2 1167 if (do_utf8) {
8269fa76 1168 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1169 while (s < strend) {
3568d838 1170 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1171 if (tmp && (norun || regtry(prog, s)))
1172 goto got_it;
1173 else
1174 tmp = doevery;
1175 }
bbce6d69 1176 else
ffc61ed2
JH
1177 tmp = 1;
1178 s += UTF8SKIP(s);
bbce6d69 1179 }
bbce6d69 1180 }
ffc61ed2
JH
1181 else {
1182 while (s < strend) {
1183 if (isALNUM(*s)) {
1184 if (tmp && (norun || regtry(prog, s)))
1185 goto got_it;
1186 else
1187 tmp = doevery;
1188 }
a0ed51b3 1189 else
ffc61ed2
JH
1190 tmp = 1;
1191 s++;
a0ed51b3 1192 }
a0ed51b3
LW
1193 }
1194 break;
bbce6d69 1195 case ALNUML:
3280af22 1196 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1197 if (do_utf8) {
1198 while (s < strend) {
1199 if (isALNUM_LC_utf8((U8*)s)) {
1200 if (tmp && (norun || regtry(prog, s)))
1201 goto got_it;
1202 else
1203 tmp = doevery;
1204 }
a687059c 1205 else
ffc61ed2
JH
1206 tmp = 1;
1207 s += UTF8SKIP(s);
a0d0e21e 1208 }
a0d0e21e 1209 }
ffc61ed2
JH
1210 else {
1211 while (s < strend) {
1212 if (isALNUM_LC(*s)) {
1213 if (tmp && (norun || regtry(prog, s)))
1214 goto got_it;
1215 else
1216 tmp = doevery;
1217 }
a0ed51b3 1218 else
ffc61ed2
JH
1219 tmp = 1;
1220 s++;
a0ed51b3 1221 }
a0ed51b3
LW
1222 }
1223 break;
a0d0e21e 1224 case NALNUM:
ffc61ed2 1225 if (do_utf8) {
8269fa76 1226 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1227 while (s < strend) {
3568d838 1228 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1229 if (tmp && (norun || regtry(prog, s)))
1230 goto got_it;
1231 else
1232 tmp = doevery;
1233 }
bbce6d69 1234 else
ffc61ed2
JH
1235 tmp = 1;
1236 s += UTF8SKIP(s);
bbce6d69 1237 }
bbce6d69 1238 }
ffc61ed2
JH
1239 else {
1240 while (s < strend) {
1241 if (!isALNUM(*s)) {
1242 if (tmp && (norun || regtry(prog, s)))
1243 goto got_it;
1244 else
1245 tmp = doevery;
1246 }
a0ed51b3 1247 else
ffc61ed2
JH
1248 tmp = 1;
1249 s++;
a0ed51b3 1250 }
a0ed51b3
LW
1251 }
1252 break;
bbce6d69 1253 case NALNUML:
3280af22 1254 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1255 if (do_utf8) {
1256 while (s < strend) {
1257 if (!isALNUM_LC_utf8((U8*)s)) {
1258 if (tmp && (norun || regtry(prog, s)))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
a687059c 1263 else
ffc61ed2
JH
1264 tmp = 1;
1265 s += UTF8SKIP(s);
a687059c 1266 }
a0d0e21e 1267 }
ffc61ed2
JH
1268 else {
1269 while (s < strend) {
1270 if (!isALNUM_LC(*s)) {
1271 if (tmp && (norun || regtry(prog, s)))
1272 goto got_it;
1273 else
1274 tmp = doevery;
1275 }
a0ed51b3 1276 else
ffc61ed2
JH
1277 tmp = 1;
1278 s++;
a0ed51b3 1279 }
a0ed51b3
LW
1280 }
1281 break;
a0d0e21e 1282 case SPACE:
ffc61ed2 1283 if (do_utf8) {
8269fa76 1284 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1285 while (s < strend) {
3568d838 1286 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1287 if (tmp && (norun || regtry(prog, s)))
1288 goto got_it;
1289 else
1290 tmp = doevery;
1291 }
a0d0e21e 1292 else
ffc61ed2
JH
1293 tmp = 1;
1294 s += UTF8SKIP(s);
2304df62 1295 }
a0d0e21e 1296 }
ffc61ed2
JH
1297 else {
1298 while (s < strend) {
1299 if (isSPACE(*s)) {
1300 if (tmp && (norun || regtry(prog, s)))
1301 goto got_it;
1302 else
1303 tmp = doevery;
1304 }
a0ed51b3 1305 else
ffc61ed2
JH
1306 tmp = 1;
1307 s++;
a0ed51b3 1308 }
a0ed51b3
LW
1309 }
1310 break;
bbce6d69 1311 case SPACEL:
3280af22 1312 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1313 if (do_utf8) {
1314 while (s < strend) {
1315 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1316 if (tmp && (norun || regtry(prog, s)))
1317 goto got_it;
1318 else
1319 tmp = doevery;
1320 }
bbce6d69 1321 else
ffc61ed2
JH
1322 tmp = 1;
1323 s += UTF8SKIP(s);
bbce6d69 1324 }
bbce6d69 1325 }
ffc61ed2
JH
1326 else {
1327 while (s < strend) {
1328 if (isSPACE_LC(*s)) {
1329 if (tmp && (norun || regtry(prog, s)))
1330 goto got_it;
1331 else
1332 tmp = doevery;
1333 }
a0ed51b3 1334 else
ffc61ed2
JH
1335 tmp = 1;
1336 s++;
a0ed51b3 1337 }
a0ed51b3
LW
1338 }
1339 break;
a0d0e21e 1340 case NSPACE:
ffc61ed2 1341 if (do_utf8) {
8269fa76 1342 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1343 while (s < strend) {
3568d838 1344 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1345 if (tmp && (norun || regtry(prog, s)))
1346 goto got_it;
1347 else
1348 tmp = doevery;
1349 }
a0d0e21e 1350 else
ffc61ed2
JH
1351 tmp = 1;
1352 s += UTF8SKIP(s);
a687059c 1353 }
a0d0e21e 1354 }
ffc61ed2
JH
1355 else {
1356 while (s < strend) {
1357 if (!isSPACE(*s)) {
1358 if (tmp && (norun || regtry(prog, s)))
1359 goto got_it;
1360 else
1361 tmp = doevery;
1362 }
a0ed51b3 1363 else
ffc61ed2
JH
1364 tmp = 1;
1365 s++;
a0ed51b3 1366 }
a0ed51b3
LW
1367 }
1368 break;
bbce6d69 1369 case NSPACEL:
3280af22 1370 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1371 if (do_utf8) {
1372 while (s < strend) {
1373 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1374 if (tmp && (norun || regtry(prog, s)))
1375 goto got_it;
1376 else
1377 tmp = doevery;
1378 }
bbce6d69 1379 else
ffc61ed2
JH
1380 tmp = 1;
1381 s += UTF8SKIP(s);
bbce6d69 1382 }
bbce6d69 1383 }
ffc61ed2
JH
1384 else {
1385 while (s < strend) {
1386 if (!isSPACE_LC(*s)) {
1387 if (tmp && (norun || regtry(prog, s)))
1388 goto got_it;
1389 else
1390 tmp = doevery;
1391 }
a0ed51b3 1392 else
ffc61ed2
JH
1393 tmp = 1;
1394 s++;
a0ed51b3 1395 }
a0ed51b3
LW
1396 }
1397 break;
a0d0e21e 1398 case DIGIT:
ffc61ed2 1399 if (do_utf8) {
8269fa76 1400 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1401 while (s < strend) {
3568d838 1402 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1403 if (tmp && (norun || regtry(prog, s)))
1404 goto got_it;
1405 else
1406 tmp = doevery;
1407 }
a0d0e21e 1408 else
ffc61ed2
JH
1409 tmp = 1;
1410 s += UTF8SKIP(s);
2b69d0c2 1411 }
a0d0e21e 1412 }
ffc61ed2
JH
1413 else {
1414 while (s < strend) {
1415 if (isDIGIT(*s)) {
1416 if (tmp && (norun || regtry(prog, s)))
1417 goto got_it;
1418 else
1419 tmp = doevery;
1420 }
a0ed51b3 1421 else
ffc61ed2
JH
1422 tmp = 1;
1423 s++;
a0ed51b3 1424 }
a0ed51b3
LW
1425 }
1426 break;
b8c5462f
JH
1427 case DIGITL:
1428 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1429 if (do_utf8) {
1430 while (s < strend) {
1431 if (isDIGIT_LC_utf8((U8*)s)) {
1432 if (tmp && (norun || regtry(prog, s)))
1433 goto got_it;
1434 else
1435 tmp = doevery;
1436 }
b8c5462f 1437 else
ffc61ed2
JH
1438 tmp = 1;
1439 s += UTF8SKIP(s);
b8c5462f 1440 }
b8c5462f 1441 }
ffc61ed2
JH
1442 else {
1443 while (s < strend) {
1444 if (isDIGIT_LC(*s)) {
1445 if (tmp && (norun || regtry(prog, s)))
1446 goto got_it;
1447 else
1448 tmp = doevery;
1449 }
b8c5462f 1450 else
ffc61ed2
JH
1451 tmp = 1;
1452 s++;
b8c5462f 1453 }
b8c5462f
JH
1454 }
1455 break;
a0d0e21e 1456 case NDIGIT:
ffc61ed2 1457 if (do_utf8) {
8269fa76 1458 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1459 while (s < strend) {
3568d838 1460 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1461 if (tmp && (norun || regtry(prog, s)))
1462 goto got_it;
1463 else
1464 tmp = doevery;
1465 }
a0d0e21e 1466 else
ffc61ed2
JH
1467 tmp = 1;
1468 s += UTF8SKIP(s);
a687059c 1469 }
a0d0e21e 1470 }
ffc61ed2
JH
1471 else {
1472 while (s < strend) {
1473 if (!isDIGIT(*s)) {
1474 if (tmp && (norun || regtry(prog, s)))
1475 goto got_it;
1476 else
1477 tmp = doevery;
1478 }
a0ed51b3 1479 else
ffc61ed2
JH
1480 tmp = 1;
1481 s++;
a0ed51b3 1482 }
a0ed51b3
LW
1483 }
1484 break;
b8c5462f
JH
1485 case NDIGITL:
1486 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1487 if (do_utf8) {
1488 while (s < strend) {
1489 if (!isDIGIT_LC_utf8((U8*)s)) {
1490 if (tmp && (norun || regtry(prog, s)))
1491 goto got_it;
1492 else
1493 tmp = doevery;
1494 }
b8c5462f 1495 else
ffc61ed2
JH
1496 tmp = 1;
1497 s += UTF8SKIP(s);
b8c5462f 1498 }
a0ed51b3 1499 }
ffc61ed2
JH
1500 else {
1501 while (s < strend) {
1502 if (!isDIGIT_LC(*s)) {
1503 if (tmp && (norun || regtry(prog, s)))
1504 goto got_it;
1505 else
1506 tmp = doevery;
1507 }
cf93c79d 1508 else
ffc61ed2
JH
1509 tmp = 1;
1510 s++;
b8c5462f 1511 }
b8c5462f
JH
1512 }
1513 break;
b3c9acc1 1514 default:
3c3eec57
GS
1515 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1516 break;
d6a28714 1517 }
6eb5f6b9
JH
1518 return 0;
1519 got_it:
1520 return s;
1521}
1522
1523/*
1524 - regexec_flags - match a regexp against a string
1525 */
1526I32
1527Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1528 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1529/* strend: pointer to null at end of string */
1530/* strbeg: real beginning of string */
1531/* minend: end of match must be >=minend after stringarg. */
1532/* data: May be used for some additional optimizations. */
1533/* nosave: For optimizations. */
1534{
6eb5f6b9
JH
1535 register char *s;
1536 register regnode *c;
1537 register char *startpos = stringarg;
6eb5f6b9
JH
1538 I32 minlen; /* must match at least this many chars */
1539 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1540 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1541 constant substr. */ /* CC */
1542 I32 end_shift = 0; /* Same for the end. */ /* CC */
1543 I32 scream_pos = -1; /* Internal iterator of scream. */
1544 char *scream_olds;
1545 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1546 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1547#ifdef DEBUGGING
9e55ce06
JH
1548 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1549 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1550#endif
6eb5f6b9
JH
1551
1552 PL_regcc = 0;
1553
1554 cache_re(prog);
1555#ifdef DEBUGGING
aea4f609 1556 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1557#endif
1558
1559 /* Be paranoid... */
1560 if (prog == NULL || startpos == NULL) {
1561 Perl_croak(aTHX_ "NULL regexp parameter");
1562 return 0;
1563 }
1564
1565 minlen = prog->minlen;
61a36c01 1566 if (strend - startpos < minlen) {
a72c7584
JH
1567 DEBUG_r(PerlIO_printf(Perl_debug_log,
1568 "String too short [regexec_flags]...\n"));
1569 goto phooey;
1aa99e6b 1570 }
6eb5f6b9 1571
6eb5f6b9
JH
1572 /* Check validity of program. */
1573 if (UCHARAT(prog->program) != REG_MAGIC) {
1574 Perl_croak(aTHX_ "corrupted regexp program");
1575 }
1576
1577 PL_reg_flags = 0;
1578 PL_reg_eval_set = 0;
1579 PL_reg_maxiter = 0;
1580
1581 if (prog->reganch & ROPT_UTF8)
1582 PL_reg_flags |= RF_utf8;
1583
1584 /* Mark beginning of line for ^ and lookbehind. */
1585 PL_regbol = startpos;
1586 PL_bostr = strbeg;
1587 PL_reg_sv = sv;
1588
1589 /* Mark end of line for $ (and such) */
1590 PL_regeol = strend;
1591
1592 /* see how far we have to get to not match where we matched before */
1593 PL_regtill = startpos+minend;
1594
1595 /* We start without call_cc context. */
1596 PL_reg_call_cc = 0;
1597
1598 /* If there is a "must appear" string, look for it. */
1599 s = startpos;
1600
1601 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1602 MAGIC *mg;
1603
1604 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1605 PL_reg_ganch = startpos;
1606 else if (sv && SvTYPE(sv) >= SVt_PVMG
1607 && SvMAGIC(sv)
14befaf4
DM
1608 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1609 && mg->mg_len >= 0) {
6eb5f6b9
JH
1610 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1611 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1612 if (s > PL_reg_ganch)
6eb5f6b9
JH
1613 goto phooey;
1614 s = PL_reg_ganch;
1615 }
1616 }
1617 else /* pos() not defined */
1618 PL_reg_ganch = strbeg;
1619 }
1620
699c3c34
JH
1621 if (do_utf8 == (UTF!=0) &&
1622 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1623 re_scream_pos_data d;
1624
1625 d.scream_olds = &scream_olds;
1626 d.scream_pos = &scream_pos;
1627 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1628 if (!s) {
1629 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1630 goto phooey; /* not present */
3fa9c3d7 1631 }
6eb5f6b9
JH
1632 }
1633
2a782b5b 1634 DEBUG_r({
9e55ce06
JH
1635 char *s0 = UTF ?
1636 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1637 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1638 prog->precomp;
1639 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1640 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1641 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1642 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1643 if (!PL_colorset)
1644 reginitcolors();
1645 PerlIO_printf(Perl_debug_log,
9e55ce06 1646 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1647 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1648 len0, len0, s0,
2a782b5b 1649 PL_colors[1],
9e55ce06 1650 len0 > 60 ? "..." : "",
2a782b5b 1651 PL_colors[0],
9e55ce06
JH
1652 (int)(len1 > 60 ? 60 : len1),
1653 s1, PL_colors[1],
1654 (len1 > 60 ? "..." : "")
2a782b5b
JH
1655 );
1656 });
6eb5f6b9
JH
1657
1658 /* Simplest case: anchored match need be tried only once. */
1659 /* [unless only anchor is BOL and multiline is set] */
1660 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1661 if (s == startpos && regtry(prog, startpos))
1662 goto got_it;
1663 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1664 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1665 {
1666 char *end;
1667
1668 if (minlen)
1669 dontbother = minlen - 1;
1aa99e6b 1670 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1671 /* for multiline we only have to try after newlines */
1672 if (prog->check_substr) {
1673 if (s == startpos)
1674 goto after_try;
1675 while (1) {
1676 if (regtry(prog, s))
1677 goto got_it;
1678 after_try:
1679 if (s >= end)
1680 goto phooey;
1681 if (prog->reganch & RE_USE_INTUIT) {
1682 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1683 if (!s)
1684 goto phooey;
1685 }
1686 else
1687 s++;
1688 }
1689 } else {
1690 if (s > startpos)
1691 s--;
1692 while (s < end) {
1693 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1694 if (regtry(prog, s))
1695 goto got_it;
1696 }
1697 }
1698 }
1699 }
1700 goto phooey;
1701 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1702 if (regtry(prog, PL_reg_ganch))
1703 goto got_it;
1704 goto phooey;
1705 }
1706
1707 /* Messy cases: unanchored match. */
9041c2e3 1708 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1709 /* we have /x+whatever/ */
1710 /* it must be a one character string (XXXX Except UTF?) */
1711 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1712#ifdef DEBUGGING
1713 int did_match = 0;
1714#endif
1715
1aa99e6b 1716 if (do_utf8) {
6eb5f6b9
JH
1717 while (s < strend) {
1718 if (*s == ch) {
bf93d4cc 1719 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1720 if (regtry(prog, s)) goto got_it;
1721 s += UTF8SKIP(s);
1722 while (s < strend && *s == ch)
1723 s += UTF8SKIP(s);
1724 }
1725 s += UTF8SKIP(s);
1726 }
1727 }
1728 else {
1729 while (s < strend) {
1730 if (*s == ch) {
bf93d4cc 1731 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1732 if (regtry(prog, s)) goto got_it;
1733 s++;
1734 while (s < strend && *s == ch)
1735 s++;
1736 }
1737 s++;
1738 }
1739 }
b7953727 1740 DEBUG_r(if (!did_match)
bf93d4cc 1741 PerlIO_printf(Perl_debug_log,
b7953727
JH
1742 "Did not find anchored character...\n")
1743 );
6eb5f6b9
JH
1744 }
1745 /*SUPPRESS 560*/
1aa99e6b
IH
1746 else if (do_utf8 == (UTF!=0) &&
1747 (prog->anchored_substr != Nullsv
9041c2e3 1748 || (prog->float_substr != Nullsv
1aa99e6b 1749 && prog->float_max_offset < strend - s))) {
9041c2e3 1750 SV *must = prog->anchored_substr
6eb5f6b9 1751 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1752 I32 back_max =
6eb5f6b9 1753 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1754 I32 back_min =
6eb5f6b9 1755 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1756 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1757 -(I32)(CHR_SVLEN(must)
1aa99e6b 1758 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1759 char *last1; /* Last position checked before */
bf93d4cc
GS
1760#ifdef DEBUGGING
1761 int did_match = 0;
1762#endif
6eb5f6b9
JH
1763
1764 if (s > PL_bostr)
1765 last1 = HOPc(s, -1);
1766 else
1767 last1 = s - 1; /* bogus */
1768
1769 /* XXXX check_substr already used to find `s', can optimize if
1770 check_substr==must. */
1771 scream_pos = -1;
1772 dontbother = end_shift;
1773 strend = HOPc(strend, -dontbother);
1774 while ( (s <= last) &&
9041c2e3 1775 ((flags & REXEC_SCREAM)
1aa99e6b 1776 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1777 end_shift, &scream_pos, 0))
1aa99e6b 1778 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1779 (unsigned char*)strend, must,
6eb5f6b9 1780 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1781 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1782 if (HOPc(s, -back_max) > last1) {
1783 last1 = HOPc(s, -back_min);
1784 s = HOPc(s, -back_max);
1785 }
1786 else {
1787 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1788
1789 last1 = HOPc(s, -back_min);
1790 s = t;
1791 }
1aa99e6b 1792 if (do_utf8) {
6eb5f6b9
JH
1793 while (s <= last1) {
1794 if (regtry(prog, s))
1795 goto got_it;
1796 s += UTF8SKIP(s);
1797 }
1798 }
1799 else {
1800 while (s <= last1) {
1801 if (regtry(prog, s))
1802 goto got_it;
1803 s++;
1804 }
1805 }
1806 }
b7953727
JH
1807 DEBUG_r(if (!did_match)
1808 PerlIO_printf(Perl_debug_log,
1809 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1810 ((must == prog->anchored_substr)
1811 ? "anchored" : "floating"),
1812 PL_colors[0],
1813 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1814 SvPVX(must),
b7953727
JH
1815 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1816 );
6eb5f6b9
JH
1817 goto phooey;
1818 }
155aba94 1819 else if ((c = prog->regstclass)) {
66e933ab
GS
1820 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1821 /* don't bother with what can't match */
6eb5f6b9 1822 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1823 DEBUG_r({
1824 SV *prop = sv_newmortal();
9e55ce06
JH
1825 char *s0;
1826 char *s1;
1827 int len0;
1828 int len1;
1829
ffc61ed2 1830 regprop(prop, c);
9e55ce06
JH
1831 s0 = UTF ?
1832 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1833 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1834 SvPVX(prop);
1835 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1836 s1 = UTF ?
c728cb41 1837 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1838 len1 = UTF ? SvCUR(dsv1) : strend - s;
1839 PerlIO_printf(Perl_debug_log,
1840 "Matching stclass `%*.*s' against `%*.*s'\n",
1841 len0, len0, s0,
1842 len1, len1, s1);
ffc61ed2 1843 });
6eb5f6b9
JH
1844 if (find_byclass(prog, c, s, strend, startpos, 0))
1845 goto got_it;
bf93d4cc 1846 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1847 }
1848 else {
1849 dontbother = 0;
1850 if (prog->float_substr != Nullsv) { /* Trim the end. */
1851 char *last;
d6a28714
JH
1852
1853 if (flags & REXEC_SCREAM) {
1854 last = screaminstr(sv, prog->float_substr, s - strbeg,
1855 end_shift, &scream_pos, 1); /* last one */
1856 if (!last)
ffc61ed2 1857 last = scream_olds; /* Only one occurrence. */
b8c5462f 1858 }
d6a28714
JH
1859 else {
1860 STRLEN len;
1861 char *little = SvPV(prog->float_substr, len);
1862
1863 if (SvTAIL(prog->float_substr)) {
1864 if (memEQ(strend - len + 1, little, len - 1))
1865 last = strend - len + 1;
1866 else if (!PL_multiline)
9041c2e3 1867 last = memEQ(strend - len, little, len)
d6a28714 1868 ? strend - len : Nullch;
b8c5462f 1869 else
d6a28714
JH
1870 goto find_last;
1871 } else {
1872 find_last:
9041c2e3 1873 if (len)
d6a28714 1874 last = rninstr(s, strend, little, little + len);
b8c5462f 1875 else
d6a28714 1876 last = strend; /* matching `$' */
b8c5462f 1877 }
b8c5462f 1878 }
bf93d4cc
GS
1879 if (last == NULL) {
1880 DEBUG_r(PerlIO_printf(Perl_debug_log,
1881 "%sCan't trim the tail, match fails (should not happen)%s\n",
1882 PL_colors[4],PL_colors[5]));
1883 goto phooey; /* Should not happen! */
1884 }
d6a28714
JH
1885 dontbother = strend - last + prog->float_min_offset;
1886 }
1887 if (minlen && (dontbother < minlen))
1888 dontbother = minlen - 1;
1889 strend -= dontbother; /* this one's always in bytes! */
1890 /* We don't know much -- general case. */
1aa99e6b 1891 if (do_utf8) {
d6a28714
JH
1892 for (;;) {
1893 if (regtry(prog, s))
1894 goto got_it;
1895 if (s >= strend)
1896 break;
b8c5462f 1897 s += UTF8SKIP(s);
d6a28714
JH
1898 };
1899 }
1900 else {
1901 do {
1902 if (regtry(prog, s))
1903 goto got_it;
1904 } while (s++ < strend);
1905 }
1906 }
1907
1908 /* Failure. */
1909 goto phooey;
1910
1911got_it:
1912 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1913
1914 if (PL_reg_eval_set) {
1915 /* Preserve the current value of $^R */
1916 if (oreplsv != GvSV(PL_replgv))
1917 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1918 restored, the value remains
1919 the same. */
acfe0abc 1920 restore_pos(aTHX_ 0);
d6a28714
JH
1921 }
1922
1923 /* make sure $`, $&, $', and $digit will work later */
1924 if ( !(flags & REXEC_NOT_FIRST) ) {
1925 if (RX_MATCH_COPIED(prog)) {
1926 Safefree(prog->subbeg);
1927 RX_MATCH_COPIED_off(prog);
1928 }
1929 if (flags & REXEC_COPY_STR) {
1930 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1931
1932 s = savepvn(strbeg, i);
1933 prog->subbeg = s;
1934 prog->sublen = i;
1935 RX_MATCH_COPIED_on(prog);
1936 }
1937 else {
1938 prog->subbeg = strbeg;
1939 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1940 }
1941 }
9041c2e3 1942
d6a28714
JH
1943 return 1;
1944
1945phooey:
bf93d4cc
GS
1946 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1947 PL_colors[4],PL_colors[5]));
d6a28714 1948 if (PL_reg_eval_set)
acfe0abc 1949 restore_pos(aTHX_ 0);
d6a28714
JH
1950 return 0;
1951}
1952
1953/*
1954 - regtry - try match at specific point
1955 */
1956STATIC I32 /* 0 failure, 1 success */
1957S_regtry(pTHX_ regexp *prog, char *startpos)
1958{
d6a28714
JH
1959 register I32 i;
1960 register I32 *sp;
1961 register I32 *ep;
1962 CHECKPOINT lastcp;
1963
02db2b7b
IZ
1964#ifdef DEBUGGING
1965 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1966#endif
d6a28714
JH
1967 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1968 MAGIC *mg;
1969
1970 PL_reg_eval_set = RS_init;
1971 DEBUG_r(DEBUG_s(
b900a521
JH
1972 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1973 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1974 ));
e8347627 1975 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1976 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1977 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1978 SAVETMPS;
1979 /* Apparently this is not needed, judging by wantarray. */
e8347627 1980 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1981 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1982
1983 if (PL_reg_sv) {
1984 /* Make $_ available to executed code. */
1985 if (PL_reg_sv != DEFSV) {
4d1ff10f 1986 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1987 SAVESPTR(DEFSV);
1988 DEFSV = PL_reg_sv;
b8c5462f 1989 }
d6a28714 1990
9041c2e3 1991 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1992 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1993 /* prepare for quick setting of pos */
14befaf4
DM
1994 sv_magic(PL_reg_sv, (SV*)0,
1995 PERL_MAGIC_regex_global, Nullch, 0);
1996 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1997 mg->mg_len = -1;
b8c5462f 1998 }
d6a28714
JH
1999 PL_reg_magic = mg;
2000 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2001 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2002 }
09687e5a 2003 if (!PL_reg_curpm) {
0f79a09d 2004 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
2005#ifdef USE_ITHREADS
2006 {
2007 SV* repointer = newSViv(0);
577e12cc 2008 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2009 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2010 av_push(PL_regex_padav,repointer);
2011 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2012 PL_regex_pad = AvARRAY(PL_regex_padav);
2013 }
2014#endif
2015 }
aaa362c4 2016 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2017 PL_reg_oldcurpm = PL_curpm;
2018 PL_curpm = PL_reg_curpm;
2019 if (RX_MATCH_COPIED(prog)) {
2020 /* Here is a serious problem: we cannot rewrite subbeg,
2021 since it may be needed if this match fails. Thus
2022 $` inside (?{}) could fail... */
2023 PL_reg_oldsaved = prog->subbeg;
2024 PL_reg_oldsavedlen = prog->sublen;
2025 RX_MATCH_COPIED_off(prog);
2026 }
2027 else
2028 PL_reg_oldsaved = Nullch;
2029 prog->subbeg = PL_bostr;
2030 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2031 }
2032 prog->startp[0] = startpos - PL_bostr;
2033 PL_reginput = startpos;
2034 PL_regstartp = prog->startp;
2035 PL_regendp = prog->endp;
2036 PL_reglastparen = &prog->lastparen;
a01268b5 2037 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
2038 prog->lastparen = 0;
2039 PL_regsize = 0;
2040 DEBUG_r(PL_reg_starttry = startpos);
2041 if (PL_reg_start_tmpl <= prog->nparens) {
2042 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2043 if(PL_reg_start_tmp)
2044 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2045 else
2046 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2047 }
2048
128e8167
JH
2049#ifdef DEBUGGING
2050 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2051 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2052 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2053#endif
2054
d6a28714
JH
2055 /* XXXX What this code is doing here?!!! There should be no need
2056 to do this again and again, PL_reglastparen should take care of
3dd2943c 2057 this! --ilya*/
dafc8851
JH
2058
2059 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2060 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2061 * PL_reglastparen), is not needed at all by the test suite
2062 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2063 * enough, for building DynaLoader, or otherwise this
2064 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2065 * will happen. Meanwhile, this code *is* needed for the
2066 * above-mentioned test suite tests to succeed. The common theme
2067 * on those tests seems to be returning null fields from matches.
2068 * --jhi */
dafc8851 2069#if 1
d6a28714
JH
2070 sp = prog->startp;
2071 ep = prog->endp;
2072 if (prog->nparens) {
09e8ae3b 2073 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2074 *++sp = -1;
2075 *++ep = -1;
2076 }
2077 }
dafc8851 2078#endif
02db2b7b 2079 REGCP_SET(lastcp);
d6a28714
JH
2080 if (regmatch(prog->program + 1)) {
2081 prog->endp[0] = PL_reginput - PL_bostr;
2082 return 1;
2083 }
02db2b7b 2084 REGCP_UNWIND(lastcp);
d6a28714
JH
2085 return 0;
2086}
2087
02db2b7b
IZ
2088#define RE_UNWIND_BRANCH 1
2089#define RE_UNWIND_BRANCHJ 2
2090
2091union re_unwind_t;
2092
2093typedef struct { /* XX: makes sense to enlarge it... */
2094 I32 type;
2095 I32 prev;
2096 CHECKPOINT lastcp;
2097} re_unwind_generic_t;
2098
2099typedef struct {
2100 I32 type;
2101 I32 prev;
2102 CHECKPOINT lastcp;
2103 I32 lastparen;
2104 regnode *next;
2105 char *locinput;
2106 I32 nextchr;
2107#ifdef DEBUGGING
2108 int regindent;
2109#endif
2110} re_unwind_branch_t;
2111
2112typedef union re_unwind_t {
2113 I32 type;
2114 re_unwind_generic_t generic;
2115 re_unwind_branch_t branch;
2116} re_unwind_t;
2117
8ba1375e
MJD
2118#define sayYES goto yes
2119#define sayNO goto no
e0f9d4a8 2120#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2121#define sayYES_FINAL goto yes_final
2122#define sayYES_LOUD goto yes_loud
2123#define sayNO_FINAL goto no_final
2124#define sayNO_SILENT goto do_no
2125#define saySAME(x) if (x) goto yes; else goto no
2126
2127#define REPORT_CODE_OFF 24
2128
d6a28714
JH
2129/*
2130 - regmatch - main matching routine
2131 *
2132 * Conceptually the strategy is simple: check to see whether the current
2133 * node matches, call self recursively to see whether the rest matches,
2134 * and then act accordingly. In practice we make some effort to avoid
2135 * recursion, in particular by going through "ordinary" nodes (that don't
2136 * need to know whether the rest of the match failed) by a loop instead of
2137 * by recursion.
2138 */
2139/* [lwall] I've hoisted the register declarations to the outer block in order to
2140 * maybe save a little bit of pushing and popping on the stack. It also takes
2141 * advantage of machines that use a register save mask on subroutine entry.
2142 */
2143STATIC I32 /* 0 failure, 1 success */
2144S_regmatch(pTHX_ regnode *prog)
2145{
d6a28714
JH
2146 register regnode *scan; /* Current node. */
2147 regnode *next; /* Next node. */
2148 regnode *inner; /* Next node in internal branch. */
2149 register I32 nextchr; /* renamed nextchr - nextchar colides with
2150 function of same name */
2151 register I32 n; /* no or next */
b7953727
JH
2152 register I32 ln = 0; /* len or last */
2153 register char *s = Nullch; /* operand or save */
d6a28714 2154 register char *locinput = PL_reginput;
b7953727 2155 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2156 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2157 I32 unwind = 0;
b7953727 2158#if 0
02db2b7b 2159 I32 firstcp = PL_savestack_ix;
b7953727 2160#endif
53c4c00c 2161 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2162#ifdef DEBUGGING
ce333219
JH
2163 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2164 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2165 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2166#endif
02db2b7b 2167
d6a28714
JH
2168#ifdef DEBUGGING
2169 PL_regindent++;
2170#endif
2171
2172 /* Note that nextchr is a byte even in UTF */
2173 nextchr = UCHARAT(locinput);
2174 scan = prog;
2175 while (scan != NULL) {
8ba1375e 2176
2a782b5b 2177 DEBUG_r( {
d6a28714
JH
2178 SV *prop = sv_newmortal();
2179 int docolor = *PL_colors[0];
2180 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2181 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2182 /* The part of the string before starttry has one color
2183 (pref0_len chars), between starttry and current
2184 position another one (pref_len - pref0_len chars),
2185 after the current position the third one.
2186 We assume that pref0_len <= pref_len, otherwise we
2187 decrease pref0_len. */
9041c2e3 2188 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2189 ? (5 + taill) - l : locinput - PL_bostr;
2190 int pref0_len;
d6a28714 2191
df1ffd02 2192 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2193 pref_len++;
2194 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2195 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2196 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2197 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2198 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2199 l--;
d6a28714
JH
2200 if (pref0_len < 0)
2201 pref0_len = 0;
2202 if (pref0_len > pref_len)
2203 pref0_len = pref_len;
2204 regprop(prop, scan);
2a782b5b
JH
2205 {
2206 char *s0 =
df1ffd02 2207 do_utf8 ?
2a782b5b 2208 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2209 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2210 locinput - pref_len;
df1ffd02
JH
2211 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2212 char *s1 = do_utf8 ?
2a782b5b 2213 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2214 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2215 locinput - pref_len + pref0_len;
df1ffd02
JH
2216 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2217 char *s2 = do_utf8 ?
2a782b5b 2218 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2219 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2220 locinput;
df1ffd02 2221 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2222 PerlIO_printf(Perl_debug_log,
2223 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2224 (IV)(locinput - PL_bostr),
2225 PL_colors[4],
2226 len0, s0,
2227 PL_colors[5],
2228 PL_colors[2],
2229 len1, s1,
2230 PL_colors[3],
2231 (docolor ? "" : "> <"),
2232 PL_colors[0],
2233 len2, s2,
2234 PL_colors[1],
2235 15 - l - pref_len + 1,
2236 "",
2237 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2238 SvPVX(prop));
2239 }
2240 });
d6a28714
JH
2241
2242 next = scan + NEXT_OFF(scan);
2243 if (next == scan)
2244 next = NULL;
2245
2246 switch (OP(scan)) {
2247 case BOL:
12d33761
HS
2248 if (locinput == PL_bostr || (PL_multiline &&
2249 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2250 {
2251 /* regtill = regbol; */
b8c5462f
JH
2252 break;
2253 }
d6a28714
JH
2254 sayNO;
2255 case MBOL:
12d33761
HS
2256 if (locinput == PL_bostr ||
2257 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2258 {
b8c5462f
JH
2259 break;
2260 }
d6a28714
JH
2261 sayNO;
2262 case SBOL:
c2a73568 2263 if (locinput == PL_bostr)
b8c5462f 2264 break;
d6a28714
JH
2265 sayNO;
2266 case GPOS:
2267 if (locinput == PL_reg_ganch)
2268 break;
2269 sayNO;
2270 case EOL:
2271 if (PL_multiline)
2272 goto meol;
2273 else
2274 goto seol;
2275 case MEOL:
2276 meol:
2277 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2278 sayNO;
b8c5462f 2279 break;
d6a28714
JH
2280 case SEOL:
2281 seol:
2282 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2283 sayNO;
d6a28714 2284 if (PL_regeol - locinput > 1)
b8c5462f 2285 sayNO;
b8c5462f 2286 break;
d6a28714
JH
2287 case EOS:
2288 if (PL_regeol != locinput)
b8c5462f 2289 sayNO;
d6a28714 2290 break;
ffc61ed2 2291 case SANY:
d6a28714 2292 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2293 sayNO;
f33976b4
DB
2294 if (do_utf8) {
2295 locinput += PL_utf8skip[nextchr];
2296 if (locinput > PL_regeol)
2297 sayNO;
2298 nextchr = UCHARAT(locinput);
2299 }
2300 else
2301 nextchr = UCHARAT(++locinput);
2302 break;
2303 case CANY:
2304 if (!nextchr && locinput >= PL_regeol)
2305 sayNO;
b8c5462f 2306 nextchr = UCHARAT(++locinput);
a0d0e21e 2307 break;
ffc61ed2 2308 case REG_ANY:
1aa99e6b
IH
2309 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2310 sayNO;
2311 if (do_utf8) {
b8c5462f 2312 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2313 if (locinput > PL_regeol)
2314 sayNO;
a0ed51b3 2315 nextchr = UCHARAT(locinput);
a0ed51b3 2316 }
1aa99e6b
IH
2317 else
2318 nextchr = UCHARAT(++locinput);
a0ed51b3 2319 break;
d6a28714 2320 case EXACT:
cd439c50
IZ
2321 s = STRING(scan);
2322 ln = STR_LEN(scan);
1aa99e6b 2323 if (do_utf8 != (UTF!=0)) {
bc517b45 2324 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2325 char *l = locinput;
2326 char *e = s + ln;
bc517b45 2327 STRLEN ulen;
a72c7584 2328
5ff6fc6d
JH
2329 if (do_utf8) {
2330 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2331 while (s < e) {
2332 if (l >= PL_regeol)
5ff6fc6d
JH
2333 sayNO;
2334 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2335 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2336 sayNO;
bc517b45 2337 l += ulen;
5ff6fc6d 2338 s ++;
1aa99e6b 2339 }
5ff6fc6d
JH
2340 }
2341 else {
2342 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2343 while (s < e) {
2344 if (l >= PL_regeol)
2345 sayNO;
5ff6fc6d 2346 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2347 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2348 sayNO;
bc517b45 2349 s += ulen;
a72c7584 2350 l ++;
1aa99e6b 2351 }
5ff6fc6d 2352 }
1aa99e6b
IH
2353 locinput = l;
2354 nextchr = UCHARAT(locinput);
2355 break;
2356 }
bc517b45 2357 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2358 /* Inline the first character, for speed. */
2359 if (UCHARAT(s) != nextchr)
2360 sayNO;
2361 if (PL_regeol - locinput < ln)
2362 sayNO;
2363 if (ln > 1 && memNE(s, locinput, ln))
2364 sayNO;
2365 locinput += ln;
2366 nextchr = UCHARAT(locinput);
2367 break;
2368 case EXACTFL:
b8c5462f
JH
2369 PL_reg_flags |= RF_tainted;
2370 /* FALL THROUGH */
d6a28714 2371 case EXACTF:
cd439c50
IZ
2372 s = STRING(scan);
2373 ln = STR_LEN(scan);
d6a28714 2374
d07ddd77
JH
2375 if (do_utf8 || UTF) {
2376 /* Either target or the pattern are utf8. */
d6a28714 2377 char *l = locinput;
d07ddd77 2378 char *e = PL_regeol;
bc517b45 2379
d07ddd77 2380 if (ibcmp_utf8(s, 0, ln, do_utf8,
5486206c
JH
2381 l, &e, 0, UTF)) {
2382 /* One more case for the sharp s:
2383 * pack("U0U*", 0xDF) =~ /ss/i,
2384 * the 0xC3 0x9F are the UTF-8
2385 * byte sequence for the U+00DF. */
2386 if (!(do_utf8 &&
2387 toLOWER(s[0]) == 's' &&
2388 ln >= 2 &&
2389 toLOWER(s[1]) == 's' &&
2390 (U8)l[0] == 0xC3 &&
2391 e - l >= 2 &&
2392 (U8)l[1] == 0x9F))
2393 sayNO;
2394 }
d07ddd77
JH
2395 locinput = e;
2396 nextchr = UCHARAT(locinput);
2397 break;
a0ed51b3 2398 }
d6a28714 2399
bc517b45
JH
2400 /* Neither the target and the pattern are utf8. */
2401
d6a28714
JH
2402 /* Inline the first character, for speed. */
2403 if (UCHARAT(s) != nextchr &&
2404 UCHARAT(s) != ((OP(scan) == EXACTF)
2405 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2406 sayNO;
d6a28714 2407 if (PL_regeol - locinput < ln)
b8c5462f 2408 sayNO;
d6a28714
JH
2409 if (ln > 1 && (OP(scan) == EXACTF
2410 ? ibcmp(s, locinput, ln)
2411 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2412 sayNO;
d6a28714
JH
2413 locinput += ln;
2414 nextchr = UCHARAT(locinput);
a0d0e21e 2415 break;
d6a28714 2416 case ANYOF:
ffc61ed2 2417 if (do_utf8) {
9e55ce06
JH
2418 STRLEN inclasslen = PL_regeol - locinput;
2419
2420 if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2421 sayNO_ANYOF;
ffc61ed2
JH
2422 if (locinput >= PL_regeol)
2423 sayNO;
9e55ce06 2424 locinput += inclasslen;
b8c5462f 2425 nextchr = UCHARAT(locinput);
e0f9d4a8 2426 break;
ffc61ed2
JH
2427 }
2428 else {
2429 if (nextchr < 0)
2430 nextchr = UCHARAT(locinput);
2431 if (!reginclass(scan, (U8*)locinput, do_utf8))
e0f9d4a8 2432 sayNO_ANYOF;
ffc61ed2
JH
2433 if (!nextchr && locinput >= PL_regeol)
2434 sayNO;
2435 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2436 break;
2437 }
2438 no_anyof:
2439 /* If we might have the case of the German sharp s
2440 * in a casefolding Unicode character class. */
2441
2442 if (ANYOF_UNICODE_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2443 locinput += 2;
2444 nextchr = UCHARAT(locinput);
ffc61ed2 2445 }
e0f9d4a8
JH
2446 else
2447 sayNO;
b8c5462f 2448 break;
d6a28714 2449 case ALNUML:
b8c5462f
JH
2450 PL_reg_flags |= RF_tainted;
2451 /* FALL THROUGH */
d6a28714 2452 case ALNUM:
b8c5462f 2453 if (!nextchr)
4633a7c4 2454 sayNO;
ffc61ed2 2455 if (do_utf8) {
ad24be35 2456 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2457 if (!(OP(scan) == ALNUM
3568d838 2458 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2459 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2460 {
2461 sayNO;
a0ed51b3 2462 }
b8c5462f 2463 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2464 nextchr = UCHARAT(locinput);
2465 break;
2466 }
ffc61ed2 2467 if (!(OP(scan) == ALNUM
d6a28714 2468 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2469 sayNO;
b8c5462f 2470 nextchr = UCHARAT(++locinput);
a0d0e21e 2471 break;
d6a28714 2472 case NALNUML:
b8c5462f
JH
2473 PL_reg_flags |= RF_tainted;
2474 /* FALL THROUGH */
d6a28714
JH
2475 case NALNUM:
2476 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2477 sayNO;
ffc61ed2 2478 if (do_utf8) {
8269fa76 2479 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2480 if (OP(scan) == NALNUM
3568d838 2481 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2482 : isALNUM_LC_utf8((U8*)locinput))
2483 {
b8c5462f 2484 sayNO;
d6a28714 2485 }
b8c5462f
JH
2486 locinput += PL_utf8skip[nextchr];
2487 nextchr = UCHARAT(locinput);
2488 break;
2489 }
ffc61ed2 2490 if (OP(scan) == NALNUM
d6a28714 2491 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2492 sayNO;
76e3520e 2493 nextchr = UCHARAT(++locinput);
a0d0e21e 2494 break;
d6a28714
JH
2495 case BOUNDL:
2496 case NBOUNDL:
3280af22 2497 PL_reg_flags |= RF_tainted;
bbce6d69 2498 /* FALL THROUGH */
d6a28714
JH
2499 case BOUND:
2500 case NBOUND:
2501 /* was last char in word? */
ffc61ed2 2502 if (do_utf8) {
12d33761
HS
2503 if (locinput == PL_bostr)
2504 ln = '\n';
ffc61ed2
JH
2505 else {
2506 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2507
2b9d42f0 2508 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2509 }
2510 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2511 ln = isALNUM_uni(ln);
8269fa76 2512 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2513 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2514 }
2515 else {
9041c2e3 2516 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2517 n = isALNUM_LC_utf8((U8*)locinput);
2518 }
a0ed51b3 2519 }
d6a28714 2520 else {
12d33761
HS
2521 ln = (locinput != PL_bostr) ?
2522 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2523 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2524 ln = isALNUM(ln);
2525 n = isALNUM(nextchr);
2526 }
2527 else {
2528 ln = isALNUM_LC(ln);
2529 n = isALNUM_LC(nextchr);
2530 }
d6a28714 2531 }
ffc61ed2
JH
2532 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2533 OP(scan) == BOUNDL))
2534 sayNO;
a0ed51b3 2535 break;
d6a28714 2536 case SPACEL:
3280af22 2537 PL_reg_flags |= RF_tainted;
bbce6d69 2538 /* FALL THROUGH */
d6a28714 2539 case SPACE:
9442cb0e 2540 if (!nextchr)
4633a7c4 2541 sayNO;
1aa99e6b 2542 if (do_utf8) {
fd400ab9 2543 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2544 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2545 if (!(OP(scan) == SPACE
3568d838 2546 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2547 : isSPACE_LC_utf8((U8*)locinput)))
2548 {
2549 sayNO;
2550 }
2551 locinput += PL_utf8skip[nextchr];
2552 nextchr = UCHARAT(locinput);
2553 break;
d6a28714 2554 }
ffc61ed2
JH
2555 if (!(OP(scan) == SPACE
2556 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2557 sayNO;
2558 nextchr = UCHARAT(++locinput);
2559 }
2560 else {
2561 if (!(OP(scan) == SPACE
2562 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2563 sayNO;
2564 nextchr = UCHARAT(++locinput);
a0ed51b3 2565 }
a0ed51b3 2566 break;
d6a28714 2567 case NSPACEL:
3280af22 2568 PL_reg_flags |= RF_tainted;
bbce6d69 2569 /* FALL THROUGH */
d6a28714 2570 case NSPACE:
9442cb0e 2571 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2572 sayNO;
1aa99e6b 2573 if (do_utf8) {
8269fa76 2574 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2575 if (OP(scan) == NSPACE
3568d838 2576 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2577 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2578 {
2579 sayNO;
2580 }
2581 locinput += PL_utf8skip[nextchr];
2582 nextchr = UCHARAT(locinput);
2583 break;
a0ed51b3 2584 }
ffc61ed2 2585 if (OP(scan) == NSPACE
d6a28714 2586 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2587 sayNO;
76e3520e 2588 nextchr = UCHARAT(++locinput);
a0d0e21e 2589 break;
d6a28714 2590 case DIGITL:
a0ed51b3
LW
2591 PL_reg_flags |= RF_tainted;
2592 /* FALL THROUGH */
d6a28714 2593 case DIGIT:
9442cb0e 2594 if (!nextchr)
a0ed51b3 2595 sayNO;
1aa99e6b 2596 if (do_utf8) {
8269fa76 2597 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2598 if (!(OP(scan) == DIGIT
3568d838 2599 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2600 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2601 {
a0ed51b3 2602 sayNO;
dfe13c55 2603 }
6f06b55f 2604 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2605 nextchr = UCHARAT(locinput);
2606 break;
2607 }
ffc61ed2 2608 if (!(OP(scan) == DIGIT
9442cb0e 2609 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2610 sayNO;
2611 nextchr = UCHARAT(++locinput);
2612 break;
d6a28714 2613 case NDIGITL:
b8c5462f
JH
2614 PL_reg_flags |= RF_tainted;
2615 /* FALL THROUGH */
d6a28714 2616 case NDIGIT:
9442cb0e 2617 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2618 sayNO;
1aa99e6b 2619 if (do_utf8) {
8269fa76 2620 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2621 if (OP(scan) == NDIGIT
3568d838 2622 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2623 : isDIGIT_LC_utf8((U8*)locinput))
2624 {
a0ed51b3 2625 sayNO;
9442cb0e 2626 }
6f06b55f 2627 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2628 nextchr = UCHARAT(locinput);
2629 break;
2630 }
ffc61ed2 2631 if (OP(scan) == NDIGIT
9442cb0e 2632 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2633 sayNO;
2634 nextchr = UCHARAT(++locinput);
2635 break;
2636 case CLUMP:
b7c83a7e 2637 if (locinput >= PL_regeol)
a0ed51b3 2638 sayNO;
b7c83a7e
JH
2639 if (do_utf8) {
2640 LOAD_UTF8_CHARCLASS(mark,"~");
2641 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2642 sayNO;
2643 locinput += PL_utf8skip[nextchr];
2644 while (locinput < PL_regeol &&
2645 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2646 locinput += UTF8SKIP(locinput);
2647 if (locinput > PL_regeol)
2648 sayNO;
eb08e2da
JH
2649 }
2650 else
2651 locinput++;
a0ed51b3
LW
2652 nextchr = UCHARAT(locinput);
2653 break;
c8756f30 2654 case REFFL:
3280af22 2655 PL_reg_flags |= RF_tainted;
c8756f30 2656 /* FALL THROUGH */
c277df42 2657 case REF:
c8756f30 2658 case REFF:
c277df42 2659 n = ARG(scan); /* which paren pair */
cf93c79d 2660 ln = PL_regstartp[n];
2c2d71f5 2661 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2662 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2663 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2664 if (ln == PL_regendp[n])
a0d0e21e 2665 break;
a0ed51b3 2666
cf93c79d 2667 s = PL_bostr + ln;
1aa99e6b 2668 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2669 char *l = locinput;
cf93c79d 2670 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2671 /*
2672 * Note that we can't do the "other character" lookup trick as
2673 * in the 8-bit case (no pun intended) because in Unicode we
2674 * have to map both upper and title case to lower case.
2675 */
2676 if (OP(scan) == REFF) {
a2a2844f 2677 STRLEN ulen1, ulen2;
e7ae6809
JH
2678 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2679 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2680 while (s < e) {
2681 if (l >= PL_regeol)
2682 sayNO;
a2a2844f
JH
2683 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2684 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2685 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2686 sayNO;
a2a2844f
JH
2687 s += ulen1;
2688 l += ulen2;
a0ed51b3
LW
2689 }
2690 }
2691 locinput = l;
2692 nextchr = UCHARAT(locinput);
2693 break;
2694 }
2695
a0d0e21e 2696 /* Inline the first character, for speed. */
76e3520e 2697 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2698 (OP(scan) == REF ||
2699 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2700 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2701 sayNO;
cf93c79d 2702 ln = PL_regendp[n] - ln;
3280af22 2703 if (locinput + ln > PL_regeol)
4633a7c4 2704 sayNO;
c8756f30
AK
2705 if (ln > 1 && (OP(scan) == REF
2706 ? memNE(s, locinput, ln)
2707 : (OP(scan) == REFF
2708 ? ibcmp(s, locinput, ln)
2709 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2710 sayNO;
a0d0e21e 2711 locinput += ln;
76e3520e 2712 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2713 break;
2714
2715 case NOTHING:
c277df42 2716 case TAIL:
a0d0e21e
LW
2717 break;
2718 case BACK:
2719 break;
c277df42
IZ
2720 case EVAL:
2721 {
2722 dSP;
533c011a 2723 OP_4tree *oop = PL_op;
3280af22
NIS
2724 COP *ocurcop = PL_curcop;
2725 SV **ocurpad = PL_curpad;
c277df42 2726 SV *ret;
9041c2e3 2727
c277df42 2728 n = ARG(scan);
533c011a 2729 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2730 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2731 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2732 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2733
8e5e9ebe
RGS
2734 {
2735 SV **before = SP;
2736 CALLRUNOPS(aTHX); /* Scalar context. */
2737 SPAGAIN;
2738 if (SP == before)
2739 ret = Nullsv; /* protect against empty (?{}) blocks. */
2740 else {
2741 ret = POPs;
2742 PUTBACK;
2743 }
2744 }
2745
0f5d15d6
IZ
2746 PL_op = oop;
2747 PL_curpad = ocurpad;
2748 PL_curcop = ocurcop;
c277df42 2749 if (logical) {
0f5d15d6
IZ
2750 if (logical == 2) { /* Postponed subexpression. */
2751 regexp *re;
22c35a8c 2752 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2753 re_cc_state state;
0f5d15d6
IZ
2754 CHECKPOINT cp, lastcp;
2755
2756 if(SvROK(ret) || SvRMAGICAL(ret)) {
2757 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2758
2759 if(SvMAGICAL(sv))
14befaf4 2760 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2761 }
2762 if (mg) {
2763 re = (regexp *)mg->mg_obj;
df0003d4 2764 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2765 }
2766 else {
2767 STRLEN len;
2768 char *t = SvPV(ret, len);
2769 PMOP pm;
2770 char *oprecomp = PL_regprecomp;
2771 I32 osize = PL_regsize;
2772 I32 onpar = PL_regnpar;
2773
5fcd1c1b 2774 Zero(&pm, 1, PMOP);
cea2e8a9 2775 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2776 if (!(SvFLAGS(ret)
0f5d15d6 2777 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2778 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2779 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2780 PL_regprecomp = oprecomp;
2781 PL_regsize = osize;
2782 PL_regnpar = onpar;
2783 }
2784 DEBUG_r(
9041c2e3 2785 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2786 "Entering embedded `%s%.60s%s%s'\n",
2787 PL_colors[0],
2788 re->precomp,
2789 PL_colors[1],
2790 (strlen(re->precomp) > 60 ? "..." : ""))
2791 );
2792 state.node = next;
2793 state.prev = PL_reg_call_cc;
2794 state.cc = PL_regcc;
2795 state.re = PL_reg_re;
2796
2ab05381 2797 PL_regcc = 0;
9041c2e3 2798
0f5d15d6 2799 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2800 REGCP_SET(lastcp);
0f5d15d6
IZ
2801 cache_re(re);
2802 state.ss = PL_savestack_ix;
2803 *PL_reglastparen = 0;
a01268b5 2804 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2805 PL_reg_call_cc = &state;
2806 PL_reginput = locinput;
2c2d71f5
JH
2807
2808 /* XXXX This is too dramatic a measure... */
2809 PL_reg_maxiter = 0;
2810
0f5d15d6 2811 if (regmatch(re->program + 1)) {
2c914db6
IZ
2812 /* Even though we succeeded, we need to restore
2813 global variables, since we may be wrapped inside
2814 SUSPEND, thus the match may be not finished yet. */
2815
2816 /* XXXX Do this only if SUSPENDed? */
2817 PL_reg_call_cc = state.prev;
2818 PL_regcc = state.cc;
2819 PL_reg_re = state.re;
2820 cache_re(PL_reg_re);
2821
2822 /* XXXX This is too dramatic a measure... */
2823 PL_reg_maxiter = 0;
2824
2825 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2826 ReREFCNT_dec(re);
2827 regcpblow(cp);
2828 sayYES;
2829 }
0f5d15d6 2830 ReREFCNT_dec(re);
02db2b7b 2831 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2832 regcppop();
2833 PL_reg_call_cc = state.prev;
2834 PL_regcc = state.cc;
2835 PL_reg_re = state.re;
d3790889 2836 cache_re(PL_reg_re);
2c2d71f5
JH
2837
2838 /* XXXX This is too dramatic a measure... */
2839 PL_reg_maxiter = 0;
2840
8e514ae6 2841 logical = 0;
0f5d15d6
IZ
2842 sayNO;
2843 }
c277df42 2844 sw = SvTRUE(ret);
0f5d15d6 2845 logical = 0;
a0ed51b3
LW
2846 }
2847 else
3280af22 2848 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2849 break;
2850 }
a0d0e21e 2851 case OPEN:
c277df42 2852 n = ARG(scan); /* which paren pair */
3280af22
NIS
2853 PL_reg_start_tmp[n] = locinput;
2854 if (n > PL_regsize)
2855 PL_regsize = n;
a0d0e21e
LW
2856 break;
2857 case CLOSE:
c277df42 2858 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2859 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2860 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2861 if (n > *PL_reglastparen)
2862 *PL_reglastparen = n;
a01268b5 2863 *PL_reglastcloseparen = n;
a0d0e21e 2864 break;
c277df42
IZ
2865 case GROUPP:
2866 n = ARG(scan); /* which paren pair */
cf93c79d 2867 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2868 break;
2869 case IFTHEN:
2c2d71f5 2870 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2871 if (sw)
2872 next = NEXTOPER(NEXTOPER(scan));
2873 else {
2874 next = scan + ARG(scan);
2875 if (OP(next) == IFTHEN) /* Fake one. */
2876 next = NEXTOPER(NEXTOPER(next));
2877 }
2878 break;
2879 case LOGICAL:
0f5d15d6 2880 logical = scan->flags;
c277df42 2881 break;
2ab05381
IZ
2882/*******************************************************************
2883 PL_regcc contains infoblock about the innermost (...)* loop, and
2884 a pointer to the next outer infoblock.
2885
2886 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2887
2888 1) After matching X, regnode for CURLYX is processed;
2889
9041c2e3 2890 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2891 regmatch() recursively with the starting point at WHILEM node;
2892
2893 3) Each hit of WHILEM node tries to match A and Z (in the order
2894 depending on the current iteration, min/max of {min,max} and
2895 greediness). The information about where are nodes for "A"
2896 and "Z" is read from the infoblock, as is info on how many times "A"
2897 was already matched, and greediness.
2898
2899 4) After A matches, the same WHILEM node is hit again.
2900
2901 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2902 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2903 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2904 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2905 of the external loop.
2906
2907 Currently present infoblocks form a tree with a stem formed by PL_curcc
2908 and whatever it mentions via ->next, and additional attached trees
2909 corresponding to temporarily unset infoblocks as in "5" above.
2910
9041c2e3 2911 In the following picture infoblocks for outer loop of
2ab05381
IZ
2912 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2913 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2914 infoblocks are drawn below the "reset" infoblock.
2915
2916 In fact in the picture below we do not show failed matches for Z and T
2917 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2918 more obvious *why* one needs to *temporary* unset infoblocks.]
2919
2920 Matched REx position InfoBlocks Comment
2921 (Y(A)*?Z)*?T x
2922 Y(A)*?Z)*?T x <- O
2923 Y (A)*?Z)*?T x <- O
2924 Y A)*?Z)*?T x <- O <- I
2925 YA )*?Z)*?T x <- O <- I
2926 YA A)*?Z)*?T x <- O <- I
2927 YAA )*?Z)*?T x <- O <- I
2928 YAA Z)*?T x <- O # Temporary unset I
2929 I
2930
2931 YAAZ Y(A)*?Z)*?T x <- O
2932 I
2933
2934 YAAZY (A)*?Z)*?T x <- O
2935 I
2936
2937 YAAZY A)*?Z)*?T x <- O <- I
2938 I
2939
2940 YAAZYA )*?Z)*?T x <- O <- I
2941 I
2942
2943 YAAZYA Z)*?T x <- O # Temporary unset I
2944 I,I
2945
2946 YAAZYAZ )*?T x <- O
2947 I,I
2948
2949 YAAZYAZ T x # Temporary unset O
2950 O
2951 I,I
2952
2953 YAAZYAZT x
2954 O
2955 I,I
2956 *******************************************************************/
a0d0e21e
LW
2957 case CURLYX: {
2958 CURCUR cc;
3280af22 2959 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2960 /* No need to save/restore up to this paren */
2961 I32 parenfloor = scan->flags;
c277df42
IZ
2962
2963 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2964 next += ARG(next);
3280af22
NIS
2965 cc.oldcc = PL_regcc;
2966 PL_regcc = &cc;
cb434fcc
IZ
2967 /* XXXX Probably it is better to teach regpush to support
2968 parenfloor > PL_regsize... */
2969 if (parenfloor > *PL_reglastparen)
2970 parenfloor = *PL_reglastparen; /* Pessimization... */
2971 cc.parenfloor = parenfloor;
a0d0e21e
LW
2972 cc.cur = -1;
2973 cc.min = ARG1(scan);
2974 cc.max = ARG2(scan);
c277df42 2975 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2976 cc.next = next;
2977 cc.minmod = minmod;
2978 cc.lastloc = 0;
3280af22 2979 PL_reginput = locinput;
a0d0e21e
LW
2980 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2981 regcpblow(cp);
3280af22 2982 PL_regcc = cc.oldcc;
4633a7c4 2983 saySAME(n);
a0d0e21e
LW
2984 }
2985 /* NOT REACHED */
2986 case WHILEM: {
2987 /*
2988 * This is really hard to understand, because after we match
2989 * what we're trying to match, we must make sure the rest of
2c2d71f5 2990 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2991 * to go back UP the parse tree by recursing ever deeper. And
2992 * if it fails, we have to reset our parent's current state
2993 * that we can try again after backing off.
2994 */
2995
c277df42 2996 CHECKPOINT cp, lastcp;
3280af22 2997 CURCUR* cc = PL_regcc;
c277df42
IZ
2998 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2999
4633a7c4 3000 n = cc->cur + 1; /* how many we know we matched */
3280af22 3001 PL_reginput = locinput;
a0d0e21e 3002
c277df42 3003 DEBUG_r(
9041c2e3
NIS
3004 PerlIO_printf(Perl_debug_log,
3005 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 3006 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3007 (long)n, (long)cc->min,
c277df42
IZ
3008 (long)cc->max, (long)cc)
3009 );
4633a7c4 3010
a0d0e21e
LW
3011 /* If degenerate scan matches "", assume scan done. */
3012
579cf2c3 3013 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3014 PL_regcc = cc->oldcc;
2ab05381
IZ
3015 if (PL_regcc)
3016 ln = PL_regcc->cur;
c277df42 3017 DEBUG_r(
c3464db5
DD
3018 PerlIO_printf(Perl_debug_log,
3019 "%*s empty match detected, try continuation...\n",
3280af22 3020 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3021 );
a0d0e21e 3022 if (regmatch(cc->next))
4633a7c4 3023 sayYES;
2ab05381
IZ
3024 if (PL_regcc)
3025 PL_regcc->cur = ln;
3280af22 3026 PL_regcc = cc;
4633a7c4 3027 sayNO;
a0d0e21e
LW
3028 }
3029
3030 /* First just match a string of min scans. */
3031
3032 if (n < cc->min) {
3033 cc->cur = n;
3034 cc->lastloc = locinput;
4633a7c4
LW
3035 if (regmatch(cc->scan))
3036 sayYES;
3037 cc->cur = n - 1;
c277df42 3038 cc->lastloc = lastloc;
4633a7c4 3039 sayNO;
a0d0e21e
LW
3040 }
3041
2c2d71f5
JH
3042 if (scan->flags) {
3043 /* Check whether we already were at this position.
3044 Postpone detection until we know the match is not
3045 *that* much linear. */
3046 if (!PL_reg_maxiter) {
3047 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3048 PL_reg_leftiter = PL_reg_maxiter;
3049 }
3050 if (PL_reg_leftiter-- == 0) {
3051 I32 size = (PL_reg_maxiter + 7)/8;
3052 if (PL_reg_poscache) {
3053 if (PL_reg_poscache_size < size) {
3054 Renew(PL_reg_poscache, size, char);
3055 PL_reg_poscache_size = size;
3056 }
3057 Zero(PL_reg_poscache, size, char);
3058 }
3059 else {
3060 PL_reg_poscache_size = size;
3061 Newz(29, PL_reg_poscache, size, char);
3062 }
3063 DEBUG_r(
3064 PerlIO_printf(Perl_debug_log,
3065 "%sDetected a super-linear match, switching on caching%s...\n",
3066 PL_colors[4], PL_colors[5])
3067 );
3068 }
3069 if (PL_reg_leftiter < 0) {
3070 I32 o = locinput - PL_bostr, b;
3071
3072 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3073 b = o % 8;
3074 o /= 8;
3075 if (PL_reg_poscache[o] & (1<<b)) {
3076 DEBUG_r(
3077 PerlIO_printf(Perl_debug_log,
3078 "%*s already tried at this position...\n",
3079 REPORT_CODE_OFF+PL_regindent*2, "")
3080 );
7821416a 3081 sayNO_SILENT;
2c2d71f5
JH
3082 }
3083 PL_reg_poscache[o] |= (1<<b);
3084 }
3085 }
3086
a0d0e21e
LW
3087 /* Prefer next over scan for minimal matching. */
3088
3089 if (cc->minmod) {
3280af22 3090 PL_regcc = cc->oldcc;
2ab05381
IZ
3091 if (PL_regcc)
3092 ln = PL_regcc->cur;
5f05dabc 3093 cp = regcppush(cc->parenfloor);
02db2b7b 3094 REGCP_SET(lastcp);
5f05dabc 3095 if (regmatch(cc->next)) {
c277df42 3096 regcpblow(cp);
4633a7c4 3097 sayYES; /* All done. */
5f05dabc 3098 }
02db2b7b 3099 REGCP_UNWIND(lastcp);
5f05dabc 3100 regcppop();
2ab05381
IZ
3101 if (PL_regcc)
3102 PL_regcc->cur = ln;
3280af22 3103 PL_regcc = cc;
a0d0e21e 3104
c277df42 3105 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3106 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3107 && !(PL_reg_flags & RF_warned)) {
3108 PL_reg_flags |= RF_warned;
e476b1b5 3109 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3110 "Complex regular subexpression recursion",
3111 REG_INFTY - 1);
c277df42 3112 }
4633a7c4 3113 sayNO;
c277df42 3114 }
a687059c 3115
c277df42 3116 DEBUG_r(
c3464db5
DD
3117 PerlIO_printf(Perl_debug_log,
3118 "%*s trying longer...\n",
3280af22 3119 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3120 );
a0d0e21e 3121 /* Try scanning more and see if it helps. */
3280af22 3122 PL_reginput = locinput;
a0d0e21e
LW
3123 cc->cur = n;
3124 cc->lastloc = locinput;
5f05dabc 3125 cp = regcppush(cc->parenfloor);
02db2b7b 3126 REGCP_SET(lastcp);
5f05dabc 3127 if (regmatch(cc->scan)) {
c277df42 3128 regcpblow(cp);
4633a7c4 3129 sayYES;
5f05dabc 3130 }
02db2b7b 3131 REGCP_UNWIND(lastcp);
5f05dabc 3132 regcppop();
4633a7c4 3133 cc->cur = n - 1;
c277df42 3134 cc->lastloc = lastloc;
4633a7c4 3135 sayNO;
a0d0e21e
LW
3136 }
3137
3138 /* Prefer scan over next for maximal matching. */
3139
3140 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3141 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3142 cc->cur = n;
3143 cc->lastloc = locinput;
02db2b7b 3144 REGCP_SET(lastcp);
5f05dabc 3145 if (regmatch(cc->scan)) {
c277df42 3146 regcpblow(cp);
4633a7c4 3147 sayYES;
5f05dabc 3148 }
02db2b7b 3149 REGCP_UNWIND(lastcp);
a0d0e21e 3150 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3151 PL_reginput = locinput;
c277df42 3152 DEBUG_r(
c3464db5
DD
3153 PerlIO_printf(Perl_debug_log,
3154 "%*s failed, try continuation...\n",
3280af22 3155 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3156 );
3157 }
9041c2e3 3158 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3159 && !(PL_reg_flags & RF_warned)) {
3280af22 3160 PL_reg_flags |= RF_warned;
e476b1b5 3161 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3162 "Complex regular subexpression recursion",
3163 REG_INFTY - 1);
a0d0e21e
LW
3164 }
3165
3166 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3167 PL_regcc = cc->oldcc;
2ab05381
IZ
3168 if (PL_regcc)
3169 ln = PL_regcc->cur;
a0d0e21e 3170 if (regmatch(cc->next))
4633a7c4 3171 sayYES;
2ab05381
IZ
3172 if (PL_regcc)
3173 PL_regcc->cur = ln;
3280af22 3174 PL_regcc = cc;
4633a7c4 3175 cc->cur = n - 1;
c277df42 3176 cc->lastloc = lastloc;
4633a7c4 3177 sayNO;
a0d0e21e
LW
3178 }
3179 /* NOT REACHED */
9041c2e3 3180 case BRANCHJ:
c277df42
IZ
3181 next = scan + ARG(scan);
3182 if (next == scan)
3183 next = NULL;
3184 inner = NEXTOPER(NEXTOPER(scan));
3185 goto do_branch;
9041c2e3 3186 case BRANCH:
c277df42
IZ
3187 inner = NEXTOPER(scan);
3188 do_branch:
3189 {
c277df42
IZ
3190 c1 = OP(scan);
3191 if (OP(next) != c1) /* No choice. */
3192 next = inner; /* Avoid recursion. */
a0d0e21e 3193 else {
02db2b7b
IZ
3194 I32 lastparen = *PL_reglastparen;
3195 I32 unwind1;
3196 re_unwind_branch_t *uw;
3197
3198 /* Put unwinding data on stack */
3199 unwind1 = SSNEWt(1,re_unwind_branch_t);
3200 uw = SSPTRt(unwind1,re_unwind_branch_t);
3201 uw->prev = unwind;
3202 unwind = unwind1;
3203 uw->type = ((c1 == BRANCH)
3204 ? RE_UNWIND_BRANCH
3205 : RE_UNWIND_BRANCHJ);
3206 uw->lastparen = lastparen;
3207 uw->next = next;
3208 uw->locinput = locinput;
3209 uw->nextchr = nextchr;
3210#ifdef DEBUGGING
3211 uw->regindent = ++PL_regindent;
3212#endif
c277df42 3213
02db2b7b
IZ
3214 REGCP_SET(uw->lastcp);
3215
3216 /* Now go into the first branch */
3217 next = inner;
a687059c 3218 }
a0d0e21e
LW
3219 }
3220 break;
3221 case MINMOD:
3222 minmod = 1;
3223 break;
c277df42
IZ
3224 case CURLYM:
3225 {
00db4c45 3226 I32 l = 0;
c277df42 3227 CHECKPOINT lastcp;
9041c2e3 3228
c277df42
IZ
3229 /* We suppose that the next guy does not need
3230 backtracking: in particular, it is of constant length,
3231 and has no parenths to influence future backrefs. */
3232 ln = ARG1(scan); /* min to match */
3233 n = ARG2(scan); /* max to match */
c277df42
IZ
3234 paren = scan->flags;
3235 if (paren) {
3280af22
NIS
3236 if (paren > PL_regsize)
3237 PL_regsize = paren;
3238 if (paren > *PL_reglastparen)
3239 *PL_reglastparen = paren;
c277df42 3240 }
dc45a647 3241 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3242 if (paren)
3243 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3244 PL_reginput = locinput;
c277df42
IZ
3245 if (minmod) {
3246 minmod = 0;
3247 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3248 sayNO;
f31a99c8
HS
3249 /* if we matched something zero-length we don't need to
3250 backtrack - capturing parens are already defined, so
3251 the caveat in the maximal case doesn't apply
3252
3253 XXXX if ln == 0, we can redo this check first time
3254 through the following loop
3255 */
3256 if (ln && l == 0)
3257 n = ln; /* don't backtrack */
3280af22 3258 locinput = PL_reginput;
cca55fe3 3259 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3260 regnode *text_node = next;
3261
cca55fe3 3262 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3263
cca55fe3 3264 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3265 else {
cca55fe3
JP
3266 if (PL_regkind[(U8)OP(text_node)] == REF) {
3267 I32 n, ln;
3268 n = ARG(text_node); /* which paren pair */
3269 ln = PL_regstartp[n];
3270 /* assume yes if we haven't seen CLOSEn */
3271 if (
3272 *PL_reglastparen < n ||
3273 ln == -1 ||
3274 ln == PL_regendp[n]
3275 ) {
3276 c1 = c2 = -1000;
3277 goto assume_ok_MM;
3278 }
3279 c1 = *(PL_bostr + ln);
3280 }
3281 else { c1 = (U8)*STRING(text_node); }
af5decee 3282 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3283 c2 = PL_fold[c1];
af5decee 3284 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3285 c2 = PL_fold_locale[c1];
3286 else
3287 c2 = c1;
3288 }
a0ed51b3
LW
3289 }
3290 else
c277df42 3291 c1 = c2 = -1000;
cca55fe3 3292 assume_ok_MM:
02db2b7b 3293 REGCP_SET(lastcp);
5f4b28b2 3294 /* This may be improved if l == 0. */
c277df42
IZ
3295 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3296 /* If it could work, try it. */
3297 if (c1 == -1000 ||
3280af22
NIS
3298 UCHARAT(PL_reginput) == c1 ||
3299 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3300 {
3301 if (paren) {
f31a99c8 3302 if (ln) {
cf93c79d
IZ
3303 PL_regstartp[paren] =
3304 HOPc(PL_reginput, -l) - PL_bostr;
3305 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3306 }
3307 else
cf93c79d 3308 PL_regendp[paren] = -1;
c277df42
IZ
3309 }
3310 if (regmatch(next))
3311 sayYES;
02db2b7b 3312 REGCP_UNWIND(lastcp);
c277df42
IZ
3313 }
3314 /* Couldn't or didn't -- move forward. */
3280af22 3315 PL_reginput = locinput;
c277df42
IZ
3316 if (regrepeat_hard(scan, 1, &l)) {
3317 ln++;
3280af22 3318 locinput = PL_reginput;
c277df42
IZ
3319 }
3320 else
3321 sayNO;
3322 }
a0ed51b3
LW
3323 }
3324 else {
c277df42 3325 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3326 /* if we matched something zero-length we don't need to
3327 backtrack, unless the minimum count is zero and we
3328 are capturing the result - in that case the capture
3329 being defined or not may affect later execution
3330 */
3331 if (n != 0 && l == 0 && !(paren && ln == 0))
3332 ln = n; /* don't backtrack */
3280af22 3333 locinput = PL_reginput;
c277df42 3334 DEBUG_r(
5c0ca799 3335 PerlIO_printf(Perl_debug_log,
faccc32b 3336 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3337 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3338 (IV) n, (IV)l)
c277df42
IZ
3339 );
3340 if (n >= ln) {
cca55fe3 3341 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3342 regnode *text_node = next;
3343
cca55fe3 3344 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3345
cca55fe3 3346 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3347 else {
cca55fe3
JP
3348 if (PL_regkind[(U8)OP(text_node)] == REF) {
3349 I32 n, ln;
3350 n = ARG(text_node); /* which paren pair */
3351 ln = PL_regstartp[n];
3352 /* assume yes if we haven't seen CLOSEn */
3353 if (
3354 *PL_reglastparen < n ||
3355 ln == -1 ||
3356 ln == PL_regendp[n]
3357 ) {
3358 c1 = c2 = -1000;
3359 goto assume_ok_REG;
3360 }
3361 c1 = *(PL_bostr + ln);
3362 }
3363 else { c1 = (U8)*STRING(text_node); }
3364
af5decee 3365 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3366 c2 = PL_fold[c1];
af5decee 3367 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3368 c2 = PL_fold_locale[c1];
3369 else
3370 c2 = c1;
3371 }
a0ed51b3
LW
3372 }
3373 else
c277df42
IZ
3374 c1 = c2 = -1000;
3375 }
cca55fe3 3376 assume_ok_REG:
02db2b7b 3377 REGCP_SET(lastcp);
c277df42
IZ
3378 while (n >= ln) {
3379 /* If it could work, try it. */
3380 if (c1 == -1000 ||
3280af22
NIS
3381 UCHARAT(PL_reginput) == c1 ||
3382 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3383 {
3384 DEBUG_r(
c3464db5 3385 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3386 "%*s trying tail with n=%"IVdf"...\n",
3387 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3388 );
3389 if (paren) {
3390 if (n) {
cf93c79d
IZ
3391 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3392 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3393 }
a0ed51b3 3394 else
cf93c79d 3395 PL_regendp[paren] = -1;
c277df42 3396 }
a0ed51b3
LW
3397 if (regmatch(next))
3398 sayYES;
02db2b7b 3399 REGCP_UNWIND(lastcp);
a0ed51b3 3400 }
c277df42
IZ
3401 /* Couldn't or didn't -- back up. */
3402 n--;
dfe13c55 3403 locinput = HOPc(locinput, -l);
3280af22 3404 PL_reginput = locinput;
c277df42
IZ
3405 }
3406 }
3407 sayNO;
3408 break;
3409 }
3410 case CURLYN:
3411 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3412 if (paren > PL_regsize)
3413 PL_regsize = paren;
3414 if (paren > *PL_reglastparen)
3415 *PL_reglastparen = paren;
c277df42
IZ
3416 ln = ARG1(scan); /* min to match */
3417 n = ARG2(scan); /* max to match */
dc45a647 3418 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3419 goto repeat;
a0d0e21e 3420 case CURLY:
c277df42 3421 paren = 0;
a0d0e21e
LW
3422 ln = ARG1(scan); /* min to match */
3423 n = ARG2(scan); /* max to match */
dc45a647 3424 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3425 goto repeat;
3426 case STAR:
3427 ln = 0;
c277df42 3428 n = REG_INFTY;
a0d0e21e 3429 scan = NEXTOPER(scan);
c277df42 3430 paren = 0;
a0d0e21e
LW
3431 goto repeat;
3432 case PLUS:
c277df42
IZ
3433 ln = 1;
3434 n = REG_INFTY;
3435 scan = NEXTOPER(scan);
3436 paren = 0;
3437 repeat:
a0d0e21e
LW
3438 /*
3439 * Lookahead to avoid useless match attempts
3440 * when we know what character comes next.
3441 */
5f80c4cf
JP
3442
3443 /*
3444 * Used to only do .*x and .*?x, but now it allows
3445 * for )'s, ('s and (?{ ... })'s to be in the way
3446 * of the quantifier and the EXACT-like node. -- japhy
3447 */
3448
cca55fe3 3449 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3450 U8 *s;
3451 regnode *text_node = next;
3452
cca55fe3 3453 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3454
cca55fe3 3455 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3456 else {
cca55fe3
JP
3457 if (PL_regkind[(U8)OP(text_node)] == REF) {
3458 I32 n, ln;
3459 n = ARG(text_node); /* which paren pair */
3460 ln = PL_regstartp[n];
3461 /* assume yes if we haven't seen CLOSEn */
3462 if (
3463 *PL_reglastparen < n ||
3464 ln == -1 ||
3465 ln == PL_regendp[n]
3466 ) {
3467 c1 = c2 = -1000;
3468 goto assume_ok_easy;
3469 }
9246c65e 3470 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3471 }
3472 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3473
3474 if (!UTF) {
3475 c2 = c1 = *s;
f65d3ee7 3476 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3477 c2 = PL_fold[c1];
f65d3ee7 3478 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3479 c2 = PL_fold_locale[c1];
1aa99e6b 3480 }
5f80c4cf 3481 else { /* UTF */
f65d3ee7 3482 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3483 STRLEN ulen1, ulen2;
e7ae6809
JH
3484 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3485 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3486
3487 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3488 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3489
3490 c1 = utf8_to_uvuni(tmpbuf1, 0);
3491 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3492 }
3493 else {
3494 c2 = c1 = utf8_to_uvchr(s, NULL);
3495 }
1aa99e6b
IH
3496 }
3497 }
bbce6d69 3498 }
a0d0e21e 3499 else
bbce6d69 3500 c1 = c2 = -1000;
cca55fe3 3501 assume_ok_easy:
3280af22 3502 PL_reginput = locinput;
a0d0e21e 3503 if (minmod) {
c277df42 3504 CHECKPOINT lastcp;
a0d0e21e
LW
3505 minmod = 0;
3506 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3507 sayNO;
a0ed51b3 3508 locinput = PL_reginput;
02db2b7b 3509 REGCP_SET(lastcp);
0fe9bf95 3510 if (c1 != -1000) {
1aa99e6b 3511 char *e; /* Should not check after this */
0fe9bf95
IZ
3512 char *old = locinput;
3513
1aa99e6b 3514 if (n == REG_INFTY) {
0fe9bf95 3515 e = PL_regeol - 1;
1aa99e6b
IH
3516 if (do_utf8)
3517 while (UTF8_IS_CONTINUATION(*(U8*)e))
3518 e--;
3519 }
3520 else if (do_utf8) {
3521 int m = n - ln;
3522 for (e = locinput;
3523 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3524 e += UTF8SKIP(e);
3525 }
3526 else {
3527 e = locinput + n - ln;
3528 if (e >= PL_regeol)
3529 e = PL_regeol - 1;
3530 }
0fe9bf95 3531 while (1) {
1aa99e6b 3532 int count;
0fe9bf95 3533 /* Find place 'next' could work */
1aa99e6b
IH
3534 if (!do_utf8) {
3535 if (c1 == c2) {
a8e8ab15
JH
3536 while (locinput <= e &&
3537 UCHARAT(locinput) != c1)
1aa99e6b
IH
3538 locinput++;
3539 } else {
9041c2e3 3540 while (locinput <= e
a8e8ab15
JH
3541 && UCHARAT(locinput) != c1
3542 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3543 locinput++;
3544 }
3545 count = locinput - old;
3546 }
3547 else {
3548 STRLEN len;
3549 if (c1 == c2) {
3550 for (count = 0;
3551 locinput <= e &&
9041c2e3 3552 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3553 count++)
3554 locinput += len;
3555
3556 } else {
3557 for (count = 0; locinput <= e; count++) {
9041c2e3 3558 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3559 if (c == c1 || c == c2)
3560 break;
9041c2e3 3561 locinput += len;
1aa99e6b
IH
3562 }
3563 }
0fe9bf95 3564 }
9041c2e3 3565 if (locinput > e)
0fe9bf95
IZ
3566 sayNO;
3567 /* PL_reginput == old now */
3568 if (locinput != old) {
3569 ln = 1; /* Did some */
1aa99e6b 3570 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3571 sayNO;
3572 }
3573 /* PL_reginput == locinput now */
29d1e993 3574 TRYPAREN(paren, ln, locinput);
0fe9bf95 3575 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3576 REGCP_UNWIND(lastcp);
0fe9bf95 3577 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3578 old = locinput;
3579 if (do_utf8)
3580 locinput += UTF8SKIP(locinput);
3581 else
3582 locinput++;
0fe9bf95
IZ
3583 }
3584 }
3585 else
c277df42 3586 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3587 UV c;
3588 if (c1 != -1000) {
3589 if (do_utf8)
9041c2e3 3590 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3591 else
9041c2e3 3592 c = UCHARAT(PL_reginput);
2390ecbc
PP
3593 /* If it could work, try it. */
3594 if (c == c1 || c == c2)
3595 {
3596 TRYPAREN(paren, n, PL_reginput);
3597 REGCP_UNWIND(lastcp);
3598 }
1aa99e6b 3599 }
a0d0e21e 3600 /* If it could work, try it. */
2390ecbc 3601 else if (c1 == -1000)
bbce6d69 3602 {
29d1e993 3603 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3604 REGCP_UNWIND(lastcp);
bbce6d69 3605 }
c277df42 3606 /* Couldn't or didn't -- move forward. */
a0ed51b3 3607 PL_reginput = locinput;
a0d0e21e
LW
3608 if (regrepeat(scan, 1)) {
3609 ln++;
a0ed51b3
LW
3610 locinput = PL_reginput;
3611 }
3612 else
4633a7c4 3613 sayNO;
a0d0e21e
LW
3614 }
3615 }
3616 else {
c277df42 3617 CHECKPOINT lastcp;
a0d0e21e 3618 n = regrepeat(scan, n);
a0ed51b3 3619 locinput = PL_reginput;
22c35a8c 3620 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
15272685
HS
3621 ((!PL_multiline && OP(next) != MEOL) ||
3622 OP(next) == SEOL || OP(next) == EOS))
3623 {
a0d0e21e 3624 ln = n; /* why back off? */
1aeab75a
GS
3625 /* ...because $ and \Z can match before *and* after
3626 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3627 We should back off by one in this case. */
3628 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3629 ln--;
3630 }
02db2b7b 3631 REGCP_SET(lastcp);
c277df42 3632 if (paren) {
8fa7f367 3633 UV c = 0;
c277df42 3634 while (n >= ln) {
1aa99e6b
IH
3635 if (c1 != -1000) {
3636 if (do_utf8)
9041c2e3 3637 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3638 else
9041c2e3 3639 c = UCHARAT(PL_reginput);
1aa99e6b 3640 }
c277df42 3641 /* If it could work, try it. */
1aa99e6b 3642 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3643 {
29d1e993 3644 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3645 REGCP_UNWIND(lastcp);
c277df42
IZ
3646 }
3647 /* Couldn't or didn't -- back up. */
3648 n--;
dfe13c55 3649 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3650 }
a0ed51b3
LW
3651 }
3652 else {
8fa7f367 3653 UV c = 0;
c277df42 3654 while (n >= ln) {
1aa99e6b
IH
3655 if (c1 != -1000) {
3656 if (do_utf8)
9041c2e3 3657 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3658 else
9041c2e3 3659 c = UCHARAT(PL_reginput);
1aa99e6b 3660 }
c277df42 3661 /* If it could work, try it. */
1aa99e6b 3662 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3663 {
29d1e993 3664 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3665 REGCP_UNWIND(lastcp);
c277df42
IZ
3666 }
3667 /* Couldn't or didn't -- back up. */
3668 n--;
dfe13c55 3669 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3670 }
a0d0e21e
LW
3671 }
3672 }
4633a7c4 3673 sayNO;
c277df42 3674 break;
a0d0e21e 3675 case END:
0f5d15d6
IZ
3676 if (PL_reg_call_cc) {
3677 re_cc_state *cur_call_cc = PL_reg_call_cc;
3678 CURCUR *cctmp = PL_regcc;
3679 regexp *re = PL_reg_re;
3680 CHECKPOINT cp, lastcp;
3681
3682 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3683 REGCP_SET(lastcp);
0f5d15d6
IZ
3684 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3685 the caller. */
3686 PL_reginput = locinput; /* Make position available to
3687 the callcc. */
3688 cache_re(PL_reg_call_cc->re);
3689 PL_regcc = PL_reg_call_cc->cc;
3690 PL_reg_call_cc = PL_reg_call_cc->prev;
3691 if (regmatch(cur_call_cc->node)) {
3692 PL_reg_call_cc = cur_call_cc;
3693 regcpblow(cp);
3694 sayYES;
3695 }
02db2b7b 3696 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3697 regcppop();
3698 PL_reg_call_cc = cur_call_cc;
3699 PL_regcc = cctmp;
3700 PL_reg_re = re;
3701 cache_re(re);
3702
3703 DEBUG_r(
3704 PerlIO_printf(Perl_debug_log,
3705 "%*s continuation failed...\n",
3706 REPORT_CODE_OFF+PL_regindent*2, "")
3707 );
7821416a 3708 sayNO_SILENT;
0f5d15d6 3709 }
7821416a
IZ
3710 if (locinput < PL_regtill) {
3711 DEBUG_r(PerlIO_printf(Perl_debug_log,
3712 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3713 PL_colors[4],
3714 (long)(locinput - PL_reg_starttry),
3715 (long)(PL_regtill - PL_reg_starttry),
3716 PL_colors[5]));
3717 sayNO_FINAL; /* Cannot match: too short. */
3718 }
3719 PL_reginput = locinput; /* put where regtry can find it */
3720 sayYES_FINAL; /* Success! */
7e5428c5 3721 case SUCCEED:
3280af22 3722 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3723 sayYES_LOUD; /* Success! */
c277df42
IZ
3724 case SUSPEND:
3725 n = 1;
9fe1d20c 3726 PL_reginput = locinput;
9041c2e3 3727 goto do_ifmatch;
a0d0e21e 3728 case UNLESSM:
c277df42 3729 n = 0;
a0ed51b3 3730 if (scan->flags) {
efb30f32
HS
3731 s = HOPBACKc(locinput, scan->flags);
3732 if (!s)
3733 goto say_yes;
3734 PL_reginput = s;
a0ed51b3
LW
3735 }
3736 else
3737 PL_reginput = locinput;
c277df42
IZ
3738 goto do_ifmatch;
3739 case IFMATCH:
3740 n = 1;
a0ed51b3 3741 if (scan->flags) {
efb30f32
HS
3742 s = HOPBACKc(locinput, scan->flags);
3743 if (!s)
3744 goto say_no;
3745 PL_reginput = s;
a0ed51b3
LW
3746 }
3747 else
3748 PL_reginput = locinput;
3749
c277df42 3750 do_ifmatch:
c277df42
IZ
3751 inner = NEXTOPER(NEXTOPER(scan));
3752 if (regmatch(inner) != n) {
3753 say_no:
3754 if (logical) {
3755 logical = 0;
3756 sw = 0;
3757 goto do_longjump;
a0ed51b3
LW
3758 }
3759 else
c277df42
IZ
3760 sayNO;
3761 }
3762 say_yes:
3763 if (logical) {
3764 logical = 0;
3765 sw = 1;
3766 }
fe44a5e8 3767 if (OP(scan) == SUSPEND) {
3280af22 3768 locinput = PL_reginput;
565764a8 3769 nextchr = UCHARAT(locinput);
fe44a5e8 3770 }
c277df42
IZ
3771 /* FALL THROUGH. */
3772 case LONGJMP:
3773 do_longjump:
3774 next = scan + ARG(scan);
3775 if (next == scan)
3776 next = NULL;
a0d0e21e
LW
3777 break;
3778 default:
b900a521 3779 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3780 PTR2UV(scan), OP(scan));
cea2e8a9 3781 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3782 }
02db2b7b 3783 reenter:
a0d0e21e
LW
3784 scan = next;
3785 }
a687059c 3786
a0d0e21e
LW
3787 /*
3788 * We get here only if there's trouble -- normally "case END" is
3789 * the terminating point.
3790 */
cea2e8a9 3791 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3792 /*NOTREACHED*/
4633a7c4
LW
3793 sayNO;
3794
7821416a
IZ
3795yes_loud:
3796 DEBUG_r(
3797 PerlIO_printf(Perl_debug_log,
3798 "%*s %scould match...%s\n",
3799 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3800 );
3801 goto yes;
3802yes_final:
3803 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3804 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3805yes:
3806#ifdef DEBUGGING
3280af22 3807 PL_regindent--;
4633a7c4 3808#endif
02db2b7b
IZ
3809
3810#if 0 /* Breaks $^R */
3811 if (unwind)
3812 regcpblow(firstcp);
3813#endif
4633a7c4
LW
3814 return 1;
3815
3816no:
7821416a
IZ
3817 DEBUG_r(
3818 PerlIO_printf(Perl_debug_log,
3819 "%*s %sfailed...%s\n",
3820 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3821 );
3822 goto do_no;
3823no_final:
3824do_no:
02db2b7b
IZ
3825 if (unwind) {
3826 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3827
3828 switch (uw->type) {
3829 case RE_UNWIND_BRANCH:
3830 case RE_UNWIND_BRANCHJ:
3831 {
3832 re_unwind_branch_t *uwb = &(uw->branch);
3833 I32 lastparen = uwb->lastparen;
9041c2e3 3834
02db2b7b
IZ
3835 REGCP_UNWIND(uwb->lastcp);
3836 for (n = *PL_reglastparen; n > lastparen; n--)
3837 PL_regendp[n] = -1;
3838 *PL_reglastparen = n;
3839 scan = next = uwb->next;
9041c2e3
NIS
3840 if ( !scan ||
3841 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3842 ? BRANCH : BRANCHJ) ) { /* Failure */
3843 unwind = uwb->prev;
3844#ifdef DEBUGGING
3845 PL_regindent--;
3846#endif
3847 goto do_no;
3848 }
3849 /* Have more choice yet. Reuse the same uwb. */
3850 /*SUPPRESS 560*/
3851 if ((n = (uwb->type == RE_UNWIND_BRANCH
3852 ? NEXT_OFF(next) : ARG(next))))
3853 next += n;
3854 else
3855 next = NULL; /* XXXX Needn't unwinding in this case... */
3856 uwb->next = next;
3857 next = NEXTOPER(scan);
3858 if (uwb->type == RE_UNWIND_BRANCHJ)
3859 next = NEXTOPER(next);
3860 locinput = uwb->locinput;
3861 nextchr = uwb->nextchr;
3862#ifdef DEBUGGING
3863 PL_regindent = uwb->regindent;
3864#endif
3865
3866 goto reenter;
3867 }
3868 /* NOT REACHED */
3869 default:
3870 Perl_croak(aTHX_ "regexp unwind memory corruption");
3871 }
3872 /* NOT REACHED */
3873 }
4633a7c4 3874#ifdef DEBUGGING
3280af22 3875 PL_regindent--;
4633a7c4 3876#endif
a0d0e21e 3877 return 0;
a687059c
LW
3878}
3879
3880/*
3881 - regrepeat - repeatedly match something simple, report how many
3882 */
3883/*
3884 * [This routine now assumes that it will only match on things of length 1.
3885 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3886 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3887 */
76e3520e 3888STATIC I32
cea2e8a9 3889S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3890{
a0d0e21e 3891 register char *scan;
a0d0e21e 3892 register I32 c;
3280af22 3893 register char *loceol = PL_regeol;
a0ed51b3 3894 register I32 hardcount = 0;
53c4c00c 3895 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3896
3280af22 3897 scan = PL_reginput;
c277df42 3898 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3899 loceol = scan + max;
a0d0e21e 3900 switch (OP(p)) {
22c35a8c 3901 case REG_ANY:
1aa99e6b 3902 if (do_utf8) {
ffc61ed2 3903 loceol = PL_regeol;
1aa99e6b 3904 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3905 scan += UTF8SKIP(scan);
3906 hardcount++;
3907 }
3908 } else {
3909 while (scan < loceol && *scan != '\n')
3910 scan++;
a0ed51b3
LW
3911 }
3912 break;
ffc61ed2 3913 case SANY:
def8e4ea
JH
3914 if (do_utf8) {
3915 loceol = PL_regeol;
a0804c9e 3916 while (scan < loceol && hardcount < max) {
def8e4ea
JH
3917 scan += UTF8SKIP(scan);
3918 hardcount++;
3919 }
3920 }
3921 else
3922 scan = loceol;
a0ed51b3 3923 break;
f33976b4
DB
3924 case CANY:
3925 scan = loceol;
3926 break;
bbce6d69 3927 case EXACT: /* length of string is 1 */
cd439c50 3928 c = (U8)*STRING(p);
bbce6d69 3929 while (scan < loceol && UCHARAT(scan) == c)
3930 scan++;
3931 break;
3932 case EXACTF: /* length of string is 1 */
cd439c50 3933 c = (U8)*STRING(p);
bbce6d69 3934 while (scan < loceol &&
22c35a8c 3935 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3936 scan++;
3937 break;
3938 case EXACTFL: /* length of string is 1 */
3280af22 3939 PL_reg_flags |= RF_tainted;
cd439c50 3940 c = (U8)*STRING(p);
bbce6d69 3941 while (scan < loceol &&
22c35a8c 3942 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3943 scan++;
3944 break;
3945 case ANYOF:
ffc61ed2
JH
3946 if (do_utf8) {
3947 loceol = PL_regeol;
1aa99e6b
IH
3948 while (hardcount < max && scan < loceol &&
3949 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3950 scan += UTF8SKIP(scan);
3951 hardcount++;
3952 }
3953 } else {
3954 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3955 scan++;
3956 }
a0d0e21e
LW
3957 break;
3958 case ALNUM:
1aa99e6b 3959 if (do_utf8) {
ffc61ed2 3960 loceol = PL_regeol;
8269fa76 3961 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3962 while (hardcount < max && scan < loceol &&
3568d838 3963 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3964 scan += UTF8SKIP(scan);
3965 hardcount++;
3966 }
3967 } else {
3968 while (scan < loceol && isALNUM(*scan))
3969 scan++;
a0ed51b3
LW
3970 }
3971 break;
bbce6d69 3972 case ALNUML:
3280af22 3973 PL_reg_flags |= RF_tainted;
1aa99e6b 3974 if (do_utf8) {
ffc61ed2 3975 loceol = PL_regeol;
1aa99e6b
IH
3976 while (hardcount < max && scan < loceol &&
3977 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3978 scan += UTF8SKIP(scan);
3979 hardcount++;
3980 }
3981 } else {
3982 while (scan < loceol && isALNUM_LC(*scan))
3983 scan++;
a0ed51b3
LW
3984 }
3985 break;
a0d0e21e 3986 case NALNUM:
1aa99e6b 3987 if (do_utf8) {
ffc61ed2 3988 loceol = PL_regeol;
8269fa76 3989 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3990 while (hardcount < max && scan < loceol &&
3568d838 3991 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3992 scan += UTF8SKIP(scan);
3993 hardcount++;
3994 }
3995 } else {
3996 while (scan < loceol && !isALNUM(*scan))
3997 scan++;
a0ed51b3
LW
3998 }
3999 break;
bbce6d69 4000 case NALNUML:
3280af22 4001 PL_reg_flags |= RF_tainted;
1aa99e6b 4002 if (do_utf8) {
ffc61ed2 4003 loceol = PL_regeol;
1aa99e6b
IH
4004 while (hardcount < max && scan < loceol &&
4005 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4006 scan += UTF8SKIP(scan);
4007 hardcount++;
4008 }
4009 } else {
4010 while (scan < loceol && !isALNUM_LC(*scan))
4011 scan++;
a0ed51b3
LW
4012 }
4013 break;
a0d0e21e 4014 case SPACE:
1aa99e6b 4015 if (do_utf8) {
ffc61ed2 4016 loceol = PL_regeol;
8269fa76 4017 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4018 while (hardcount < max && scan < loceol &&
3568d838
JH
4019 (*scan == ' ' ||
4020 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4021 scan += UTF8SKIP(scan);
4022 hardcount++;
4023 }
4024 } else {
4025 while (scan < loceol && isSPACE(*scan))
4026 scan++;
a0ed51b3
LW
4027 }
4028 break;
bbce6d69 4029 case SPACEL:
3280af22 4030 PL_reg_flags |= RF_tainted;
1aa99e6b 4031 if (do_utf8) {
ffc61ed2 4032 loceol = PL_regeol;
1aa99e6b 4033 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4034 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4035 scan += UTF8SKIP(scan);
4036 hardcount++;
4037 }
4038 } else {
4039 while (scan < loceol && isSPACE_LC(*scan))
4040 scan++;
a0ed51b3
LW
4041 }
4042 break;
a0d0e21e 4043 case NSPACE:
1aa99e6b 4044 if (do_utf8) {
ffc61ed2 4045 loceol = PL_regeol;
8269fa76 4046 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4047 while (hardcount < max && scan < loceol &&
3568d838
JH
4048 !(*scan == ' ' ||
4049 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4050 scan += UTF8SKIP(scan);
4051 hardcount++;
4052 }
4053 } else {
4054 while (scan < loceol && !isSPACE(*scan))
4055 scan++;
4056 break;
a0ed51b3 4057 }
bbce6d69 4058 case NSPACEL:
3280af22 4059 PL_reg_flags |= RF_tainted;
1aa99e6b 4060 if (do_utf8) {
ffc61ed2 4061 loceol = PL_regeol;
1aa99e6b 4062 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4063 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4064 scan += UTF8SKIP(scan);
4065 hardcount++;
4066 }
4067 } else {
4068 while (scan < loceol && !isSPACE_LC(*scan))
4069 scan++;
a0ed51b3
LW
4070 }
4071 break;
a0d0e21e 4072 case DIGIT:
1aa99e6b 4073 if (do_utf8) {
ffc61ed2 4074 loceol = PL_regeol;
8269fa76 4075 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4076 while (hardcount < max && scan < loceol &&
3568d838 4077 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4078 scan += UTF8SKIP(scan);
4079 hardcount++;
4080 }
4081 } else {
4082 while (scan < loceol && isDIGIT(*scan))
4083 scan++;
a0ed51b3
LW
4084 }
4085 break;
a0d0e21e 4086 case NDIGIT:
1aa99e6b 4087 if (do_utf8) {
ffc61ed2 4088 loceol = PL_regeol;
8269fa76 4089 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4090 while (hardcount < max && scan < loceol &&
3568d838 4091 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4092 scan += UTF8SKIP(scan);
4093 hardcount++;
4094 }
4095 } else {
4096 while (scan < loceol && !isDIGIT(*scan))
4097 scan++;
a0ed51b3
LW
4098 }
4099 break;
a0d0e21e
LW
4100 default: /* Called on something of 0 width. */
4101 break; /* So match right here or not at all. */
4102 }
a687059c 4103
a0ed51b3
LW
4104 if (hardcount)
4105 c = hardcount;
4106 else
4107 c = scan - PL_reginput;
3280af22 4108 PL_reginput = scan;
a687059c 4109
9041c2e3 4110 DEBUG_r(
c277df42
IZ
4111 {
4112 SV *prop = sv_newmortal();
4113
4114 regprop(prop, p);
9041c2e3
NIS
4115 PerlIO_printf(Perl_debug_log,
4116 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4117 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4118 });
9041c2e3 4119
a0d0e21e 4120 return(c);
a687059c
LW
4121}
4122
4123/*
c277df42 4124 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4125 *
c277df42
IZ
4126 * The repeater is supposed to have constant length.
4127 */
4128
76e3520e 4129STATIC I32
cea2e8a9 4130S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4131{
b7953727 4132 register char *scan = Nullch;
c277df42 4133 register char *start;
3280af22 4134 register char *loceol = PL_regeol;
a0ed51b3 4135 I32 l = 0;
708e3b05 4136 I32 count = 0, res = 1;
a0ed51b3
LW
4137
4138 if (!max)
4139 return 0;
c277df42 4140
3280af22 4141 start = PL_reginput;
53c4c00c 4142 if (PL_reg_match_utf8) {
708e3b05 4143 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4144 if (!count++) {
4145 l = 0;
4146 while (start < PL_reginput) {
4147 l++;
4148 start += UTF8SKIP(start);
4149 }
4150 *lp = l;
4151 if (l == 0)
4152 return max;
4153 }
4154 if (count == max)
4155 return count;
4156 }
4157 }
4158 else {
708e3b05 4159 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4160 if (!count++) {
4161 *lp = l = PL_reginput - start;
4162 if (max != REG_INFTY && l*max < loceol - scan)
4163 loceol = scan + l*max;
4164 if (l == 0)
4165 return max;
c277df42
IZ
4166 }
4167 }
4168 }
708e3b05 4169 if (!res)
3280af22 4170 PL_reginput = scan;
9041c2e3 4171
a0ed51b3 4172 return count;
c277df42
IZ
4173}
4174
4175/*
ffc61ed2
JH
4176- regclass_swash - prepare the utf8 swash
4177*/
4178
4179SV *
9e55ce06 4180Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4181{
9e55ce06
JH
4182 SV *sw = NULL;
4183 SV *si = NULL;
4184 SV *alt = NULL;
ffc61ed2
JH
4185
4186 if (PL_regdata && PL_regdata->count) {
4187 U32 n = ARG(node);
4188
4189 if (PL_regdata->what[n] == 's') {
4190 SV *rv = (SV*)PL_regdata->data[n];
4191 AV *av = (AV*)SvRV((SV*)rv);
9e55ce06 4192 SV **a, **b;
9041c2e3 4193
9e55ce06
JH
4194 /* See the end of regcomp.c:S_reglass() for
4195 * documentation of these array elements. */
4196
4197 si = *av_fetch(av, 0, FALSE);
4198 a = av_fetch(av, 1, FALSE);
4199 b = av_fetch(av, 2, FALSE);
9041c2e3 4200
ffc61ed2
JH
4201 if (a)
4202 sw = *a;
4203 else if (si && doinit) {
4204 sw = swash_init("utf8", "", si, 1, 0);
4205 (void)av_store(av, 1, sw);
4206 }
9e55ce06
JH
4207 if (b)
4208 alt = *b;
ffc61ed2
JH
4209 }
4210 }
4211
9e55ce06
JH
4212 if (listsvp)
4213 *listsvp = si;
4214 if (altsvp)
4215 *altsvp = alt;
ffc61ed2
JH
4216
4217 return sw;
4218}
4219
4220/*
832705d4
JH
4221 - reginclasslen - determine if a character falls into a character class
4222
4223 The n is the ANYOF regnode, the p is the target string, lenp
4224 is pointer to the maximum length of how far to go in the p
4225 (if the lenp is zero, UTF8SKIP(p) is used),
4226 do_utf8 tells whether the target string is in UTF-8.
4227
bbce6d69 4228 */
4229
76e3520e 4230STATIC bool
9e55ce06 4231S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4232{
ffc61ed2 4233 char flags = ANYOF_FLAGS(n);
bbce6d69 4234 bool match = FALSE;
1aa99e6b 4235 UV c;
3568d838 4236 STRLEN len = 0;
9e55ce06 4237 STRLEN plen;
1aa99e6b 4238
3568d838 4239 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4240
9e55ce06 4241 plen = lenp ? *lenp : UNISKIP(c);
ffc61ed2 4242 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4243 if (lenp)
4244 *lenp = 0;
ffc61ed2 4245 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4246 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4247 match = TRUE;
bbce6d69 4248 }
3568d838 4249 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4250 match = TRUE;
ffc61ed2 4251 if (!match) {
9e55ce06
JH
4252 AV *av;
4253 SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4254
4255 if (sw) {
3568d838 4256 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4257 match = TRUE;
4258 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4259 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4260 STRLEN tmplen;
4261
4262 if (!match && lenp && av) {
4263 I32 i;
4264
4265 for (i = 0; i <= av_len(av); i++) {
4266 SV* sv = *av_fetch(av, i, FALSE);
4267 STRLEN len;
4268 char *s = SvPV(sv, len);
4269
4270 if (len <= plen && memEQ(s, p, len)) {
4271 *lenp = len;
4272 match = TRUE;
4273 break;
4274 }
4275 }
4276 }
4277 if (!match) {
4278 to_utf8_fold(p, tmpbuf, &tmplen);
4279 if (swash_fetch(sw, tmpbuf, do_utf8))
4280 match = TRUE;
4281 }
4282 if (!match) {
4283 to_utf8_upper(p, tmpbuf, &tmplen);
4284 if (swash_fetch(sw, tmpbuf, do_utf8))
4285 match = TRUE;
4286 }
ffc61ed2
JH
4287 }
4288 }
bbce6d69 4289 }
9e55ce06
JH
4290 if (match && lenp && *lenp == 0)
4291 *lenp = UNISKIP(c);
bbce6d69 4292 }
1aa99e6b 4293 if (!match && c < 256) {
ffc61ed2
JH
4294 if (ANYOF_BITMAP_TEST(n, c))
4295 match = TRUE;
4296 else if (flags & ANYOF_FOLD) {
3568d838 4297 I32 f;
a0ed51b3 4298
ffc61ed2
JH
4299 if (flags & ANYOF_LOCALE) {
4300 PL_reg_flags |= RF_tainted;
4301 f = PL_fold_locale[c];
4302 }
4303 else
4304 f = PL_fold[c];
4305 if (f != c && ANYOF_BITMAP_TEST(n, f))
4306 match = TRUE;
4307 }
4308
4309 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4310 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4311 if (
4312 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4313 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4314 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4315 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4316 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4317 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4318 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4319 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4320 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4321 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4322 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4323 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4324 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4325 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4326 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4327 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4328 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4329 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4330 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4331 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4332 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4333 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4334 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4335 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4336 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4337 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4338 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4339 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4340 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4341 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4342 ) /* How's that for a conditional? */
4343 {
4344 match = TRUE;
4345 }
a0ed51b3 4346 }
a0ed51b3
LW
4347 }
4348
a0ed51b3
LW
4349 return (flags & ANYOF_INVERT) ? !match : match;
4350}
161b471a 4351
832705d4
JH
4352/*
4353 - reginclass - determine if a character falls into a character class
4354
4355 The n is the ANYOF regnode, the p is the target string, do_utf8 tells
4356 whether the target string is in UTF-8.
4357
4358 */
4359
9e55ce06
JH
4360STATIC bool
4361S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
4362{
4363 return S_reginclasslen(aTHX_ n, p, 0, do_utf8);
4364}
4365
dfe13c55 4366STATIC U8 *
cea2e8a9 4367S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4368{
1aa99e6b
IH
4369 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4370}
4371
4372STATIC U8 *
4373S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4374{
a0ed51b3 4375 if (off >= 0) {
1aa99e6b 4376 while (off-- && s < lim) {
ffc61ed2 4377 /* XXX could check well-formedness here */
a0ed51b3 4378 s += UTF8SKIP(s);
ffc61ed2 4379 }
a0ed51b3
LW
4380 }
4381 else {
4382 while (off++) {
1aa99e6b 4383 if (s > lim) {
a0ed51b3 4384 s--;
ffc61ed2 4385 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4386 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4387 s--;
ffc61ed2
JH
4388 }
4389 /* XXX could check well-formedness here */
a0ed51b3
LW
4390 }
4391 }
4392 }
4393 return s;
4394}
161b471a 4395
dfe13c55 4396STATIC U8 *
1aa99e6b 4397S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4398{
1aa99e6b
IH
4399 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4400}
4401
4402STATIC U8 *
4403S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4404{
4405 if (off >= 0) {
1aa99e6b 4406 while (off-- && s < lim) {
ffc61ed2 4407 /* XXX could check well-formedness here */
a0ed51b3 4408 s += UTF8SKIP(s);
ffc61ed2 4409 }
a0ed51b3
LW
4410 if (off >= 0)
4411 return 0;
4412 }
4413 else {
4414 while (off++) {
1aa99e6b 4415 if (s > lim) {
a0ed51b3 4416 s--;
ffc61ed2 4417 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4418 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4419 s--;
ffc61ed2
JH
4420 }
4421 /* XXX could check well-formedness here */
a0ed51b3
LW
4422 }
4423 else
4424 break;
4425 }
4426 if (off <= 0)
4427 return 0;
4428 }
4429 return s;
4430}
51371543 4431
51371543 4432static void
acfe0abc 4433restore_pos(pTHX_ void *arg)
51371543 4434{
51371543
GS
4435 if (PL_reg_eval_set) {
4436 if (PL_reg_oldsaved) {
4437 PL_reg_re->subbeg = PL_reg_oldsaved;
4438 PL_reg_re->sublen = PL_reg_oldsavedlen;
4439 RX_MATCH_COPIED_on(PL_reg_re);
4440 }
4441 PL_reg_magic->mg_len = PL_reg_oldpos;
4442 PL_reg_eval_set = 0;
4443 PL_curpm = PL_reg_oldcurpm;
4444 }
4445}