This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
SvSetSV() is not returnable, but that's okay since
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
be3c0a43 70 **** Copyright (c) 1991-2002, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
e54858b0 114 (PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
a84d97b6
HS
143/*
144 Search for mandatory following text node; for lookahead, the text must
145 follow but for lookbehind (rn->flags != 0) we skip to the next step.
146*/
cca55fe3 147#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 148 while (JUMPABLE(rn)) \
a84d97b6 149 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 150 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
151 else if (OP(rn) == PLUS) \
152 rn = NEXTOPER(rn); \
a84d97b6
HS
153 else if (OP(rn) == IFMATCH) \
154 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 155 else rn += NEXT_OFF(rn); \
5f80c4cf 156} STMT_END
74750237 157
acfe0abc 158static void restore_pos(pTHX_ void *arg);
51371543 159
76e3520e 160STATIC CHECKPOINT
cea2e8a9 161S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 162{
3280af22 163 int retval = PL_savestack_ix;
b1ce53c5
JH
164#define REGCP_PAREN_ELEMS 4
165 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
166 int p;
167
e49a9654
IH
168 if (paren_elems_to_push < 0)
169 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
170
a01268b5 171#define REGCP_OTHER_ELEMS 6
b1ce53c5 172 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 173 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 174/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
175 SSPUSHINT(PL_regendp[p]);
176 SSPUSHINT(PL_regstartp[p]);
3280af22 177 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
178 SSPUSHINT(p);
179 }
b1ce53c5 180/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
181 SSPUSHINT(PL_regsize);
182 SSPUSHINT(*PL_reglastparen);
a01268b5 183 SSPUSHINT(*PL_reglastcloseparen);
3280af22 184 SSPUSHPTR(PL_reginput);
41123dfd
JH
185#define REGCP_FRAME_ELEMS 2
186/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
187 * are needed for the regexp context stack bookkeeping. */
188 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 189 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 190
a0d0e21e
LW
191 return retval;
192}
193
c277df42 194/* These are needed since we do not localize EVAL nodes: */
02db2b7b 195# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 197 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 198
02db2b7b 199# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 200 PerlIO_printf(Perl_debug_log, \
faccc32b 201 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 202 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 203
76e3520e 204STATIC char *
cea2e8a9 205S_regcppop(pTHX)
a0d0e21e 206{
b1ce53c5 207 I32 i;
a0d0e21e
LW
208 U32 paren = 0;
209 char *input;
cf93c79d 210 I32 tmps;
b1ce53c5
JH
211
212 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 213 i = SSPOPINT;
b1ce53c5
JH
214 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
215 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 216 input = (char *) SSPOPPTR;
a01268b5 217 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
218 *PL_reglastparen = SSPOPINT;
219 PL_regsize = SSPOPINT;
b1ce53c5
JH
220
221 /* Now restore the parentheses context. */
41123dfd
JH
222 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
223 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 224 paren = (U32)SSPOPINT;
3280af22 225 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
226 PL_regstartp[paren] = SSPOPINT;
227 tmps = SSPOPINT;
3280af22
NIS
228 if (paren <= *PL_reglastparen)
229 PL_regendp[paren] = tmps;
c277df42 230 DEBUG_r(
c3464db5 231 PerlIO_printf(Perl_debug_log,
b900a521 232 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 233 (UV)paren, (IV)PL_regstartp[paren],
b900a521 234 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 235 (IV)PL_regendp[paren],
3280af22 236 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 237 );
a0d0e21e 238 }
c277df42 239 DEBUG_r(
3280af22 240 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 241 PerlIO_printf(Perl_debug_log,
faccc32b
JH
242 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
243 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
244 }
245 );
daf18116 246#if 1
dafc8851
JH
247 /* It would seem that the similar code in regtry()
248 * already takes care of this, and in fact it is in
249 * a better location to since this code can #if 0-ed out
250 * but the code in regtry() is needed or otherwise tests
251 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
252 * (as of patchlevel 7877) will fail. Then again,
253 * this code seems to be necessary or otherwise
254 * building DynaLoader will fail:
255 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
256 * --jhi */
3280af22
NIS
257 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
258 if (paren > PL_regsize)
cf93c79d
IZ
259 PL_regstartp[paren] = -1;
260 PL_regendp[paren] = -1;
a0d0e21e 261 }
dafc8851 262#endif
a0d0e21e
LW
263 return input;
264}
265
0f5d15d6 266STATIC char *
cea2e8a9 267S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
268{
269 I32 tmp = PL_savestack_ix;
270
271 PL_savestack_ix = ss;
272 regcppop();
273 PL_savestack_ix = tmp;
942e002e 274 return Nullch;
0f5d15d6
IZ
275}
276
277typedef struct re_cc_state
278{
279 I32 ss;
280 regnode *node;
281 struct re_cc_state *prev;
282 CURCUR *cc;
283 regexp *re;
284} re_cc_state;
285
02db2b7b 286#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 287
29d1e993
HS
288#define TRYPAREN(paren, n, input) { \
289 if (paren) { \
290 if (n) { \
291 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
292 PL_regendp[paren] = input - PL_bostr; \
293 } \
294 else \
295 PL_regendp[paren] = -1; \
296 } \
297 if (regmatch(next)) \
298 sayYES; \
299 if (paren && n) \
300 PL_regendp[paren] = -1; \
301}
302
303
a687059c 304/*
e50aee73 305 * pregexec and friends
a687059c
LW
306 */
307
308/*
c277df42 309 - pregexec - match a regexp against a string
a687059c 310 */
c277df42 311I32
864dbfa3 312Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 313 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
314/* strend: pointer to null at end of string */
315/* strbeg: real beginning of string */
316/* minend: end of match must be >=minend after stringarg. */
317/* nosave: For optimizations. */
318{
319 return
9041c2e3 320 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
321 nosave ? 0 : REXEC_COPY_STR);
322}
0f5d15d6
IZ
323
324STATIC void
cea2e8a9 325S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
326{
327 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
328#ifdef DEBUGGING
329 PL_regprogram = prog->program;
330#endif
331 PL_regnpar = prog->nparens;
9041c2e3
NIS
332 PL_regdata = prog->data;
333 PL_reg_re = prog;
0f5d15d6 334}
22e551b9 335
9041c2e3 336/*
cad2e5aa
JH
337 * Need to implement the following flags for reg_anch:
338 *
339 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
340 * USE_INTUIT_ML
341 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
342 * INTUIT_AUTORITATIVE_ML
343 * INTUIT_ONCE_NOML - Intuit can match in one location only.
344 * INTUIT_ONCE_ML
345 *
346 * Another flag for this function: SECOND_TIME (so that float substrs
347 * with giant delta may be not rechecked).
348 */
349
350/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
351
2c2d71f5 352/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
353 Otherwise, only SvCUR(sv) is used to get strbeg. */
354
355/* XXXX We assume that strpos is strbeg unless sv. */
356
6eb5f6b9
JH
357/* XXXX Some places assume that there is a fixed substring.
358 An update may be needed if optimizer marks as "INTUITable"
359 RExen without fixed substrings. Similarly, it is assumed that
360 lengths of all the strings are no more than minlen, thus they
361 cannot come from lookahead.
362 (Or minlen should take into account lookahead.) */
363
2c2d71f5
JH
364/* A failure to find a constant substring means that there is no need to make
365 an expensive call to REx engine, thus we celebrate a failure. Similarly,
366 finding a substring too deep into the string means that less calls to
30944b6d
IZ
367 regtry() should be needed.
368
369 REx compiler's optimizer found 4 possible hints:
370 a) Anchored substring;
371 b) Fixed substring;
372 c) Whether we are anchored (beginning-of-line or \G);
373 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 374 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
375 string which does not contradict any of them.
376 */
2c2d71f5 377
6eb5f6b9
JH
378/* Most of decisions we do here should have been done at compile time.
379 The nodes of the REx which we used for the search should have been
380 deleted from the finite automaton. */
381
cad2e5aa
JH
382char *
383Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
384 char *strend, U32 flags, re_scream_pos_data *data)
385{
b7953727 386 register I32 start_shift = 0;
cad2e5aa 387 /* Should be nonnegative! */
b7953727 388 register I32 end_shift = 0;
2c2d71f5
JH
389 register char *s;
390 register SV *check;
a1933d95 391 char *strbeg;
cad2e5aa
JH
392 char *t;
393 I32 ml_anch;
6eb5f6b9 394 register char *other_last = Nullch; /* other substr checked before this */
b7953727 395 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
396#ifdef DEBUGGING
397 char *i_strpos = strpos;
ce333219 398 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 399#endif
cad2e5aa 400
b8d68ded
JH
401 if (prog->reganch & ROPT_UTF8) {
402 DEBUG_r(PerlIO_printf(Perl_debug_log,
403 "UTF-8 regex...\n"));
404 PL_reg_flags |= RF_utf8;
405 }
406
2a782b5b 407 DEBUG_r({
b8d68ded 408 char *s = PL_reg_match_utf8 ?
c728cb41
JH
409 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
410 strpos;
b8d68ded
JH
411 int len = PL_reg_match_utf8 ?
412 strlen(s) : strend - strpos;
2a782b5b
JH
413 if (!PL_colorset)
414 reginitcolors();
b8d68ded
JH
415 if (PL_reg_match_utf8)
416 DEBUG_r(PerlIO_printf(Perl_debug_log,
417 "UTF-8 target...\n"));
2a782b5b
JH
418 PerlIO_printf(Perl_debug_log,
419 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
420 PL_colors[4],PL_colors[5],PL_colors[0],
421 prog->precomp,
422 PL_colors[1],
423 (strlen(prog->precomp) > 60 ? "..." : ""),
424 PL_colors[0],
425 (int)(len > 60 ? 60 : len),
426 s, PL_colors[1],
427 (len > 60 ? "..." : "")
428 );
429 });
cad2e5aa 430
1aa99e6b 431 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
432 DEBUG_r(PerlIO_printf(Perl_debug_log,
433 "String too short... [re_intuit_start]\n"));
cad2e5aa 434 goto fail;
2c2d71f5 435 }
a1933d95 436 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 437 PL_regeol = strend;
653099ff 438 check = prog->check_substr;
2c2d71f5 439 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
440 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
441 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 442 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 443
7e25d62c
JH
444 if (!ml_anch) {
445 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
446 | ROPT_IMPLICIT)) /* not a real BOL */
447 /* SvCUR is not set on references: SvRV and SvPVX overlap */
448 && sv && !SvROK(sv)
449 && (strpos != strbeg)) {
450 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
451 goto fail;
452 }
453 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 454 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 455 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
456 I32 slen;
457
1aa99e6b 458 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
459 if (SvTAIL(check)) {
460 slen = SvCUR(check); /* >= 1 */
cad2e5aa 461
9041c2e3 462 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
463 || (strend - s == slen && strend[-1] != '\n')) {
464 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
465 goto fail_finish;
cad2e5aa
JH
466 }
467 /* Now should match s[0..slen-2] */
468 slen--;
653099ff 469 if (slen && (*SvPVX(check) != *s
cad2e5aa 470 || (slen > 1
653099ff 471 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
472 report_neq:
473 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
474 goto fail_finish;
475 }
cad2e5aa 476 }
653099ff
GS
477 else if (*SvPVX(check) != *s
478 || ((slen = SvCUR(check)) > 1
479 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
480 goto report_neq;
481 goto success_at_start;
7e25d62c 482 }
cad2e5aa 483 }
2c2d71f5 484 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 485 s = strpos;
2c2d71f5 486 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 487 end_shift = prog->minlen - start_shift -
653099ff 488 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 489 if (!ml_anch) {
653099ff
GS
490 I32 end = prog->check_offset_max + CHR_SVLEN(check)
491 - (SvTAIL(check) != 0);
1aa99e6b 492 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
493
494 if (end_shift < eshift)
495 end_shift = eshift;
496 }
cad2e5aa 497 }
2c2d71f5 498 else { /* Can match at random position */
cad2e5aa
JH
499 ml_anch = 0;
500 s = strpos;
2c2d71f5
JH
501 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
502 /* Should be nonnegative! */
503 end_shift = prog->minlen - start_shift -
653099ff 504 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
505 }
506
2c2d71f5 507#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 508 if (end_shift < 0)
6bbae5e6 509 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
510#endif
511
2c2d71f5
JH
512 restart:
513 /* Find a possible match in the region s..strend by looking for
514 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 515 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
516 I32 p = -1; /* Internal iterator of scream. */
517 I32 *pp = data ? data->scream_pos : &p;
518
2c2d71f5
JH
519 if (PL_screamfirst[BmRARE(check)] >= 0
520 || ( BmRARE(check) == '\n'
521 && (BmPREVIOUS(check) == SvCUR(check) - 1)
522 && SvTAIL(check) ))
9041c2e3 523 s = screaminstr(sv, check,
2c2d71f5 524 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 525 else
2c2d71f5 526 goto fail_finish;
cad2e5aa
JH
527 if (data)
528 *data->scream_olds = s;
529 }
f33976b4 530 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
531 s = fbm_instr((U8*)(s + start_shift),
532 (U8*)(strend - end_shift),
533 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 534 else
1aa99e6b
IH
535 s = fbm_instr(HOP3(s, start_shift, strend),
536 HOP3(strend, -end_shift, strbeg),
2c2d71f5 537 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
538
539 /* Update the count-of-usability, remove useless subpatterns,
540 unshift s. */
2c2d71f5
JH
541
542 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
543 (s ? "Found" : "Did not find"),
544 ((check == prog->anchored_substr) ? "anchored" : "floating"),
545 PL_colors[0],
7b0972df
JH
546 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
547 SvPVX(check),
2c2d71f5
JH
548 PL_colors[1], (SvTAIL(check) ? "$" : ""),
549 (s ? " at offset " : "...\n") ) );
550
551 if (!s)
552 goto fail_finish;
553
6eb5f6b9
JH
554 check_at = s;
555
2c2d71f5 556 /* Finish the diagnostic message */
30944b6d 557 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
558
559 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
560 Start with the other substr.
561 XXXX no SCREAM optimization yet - and a very coarse implementation
562 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
563 *always* match. Probably should be marked during compile...
564 Probably it is right to do no SCREAM here...
565 */
566
567 if (prog->float_substr && prog->anchored_substr) {
30944b6d 568 /* Take into account the "other" substring. */
2c2d71f5
JH
569 /* XXXX May be hopelessly wrong for UTF... */
570 if (!other_last)
6eb5f6b9 571 other_last = strpos;
2c2d71f5 572 if (check == prog->float_substr) {
30944b6d
IZ
573 do_other_anchored:
574 {
1aa99e6b 575 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
576 char *s1 = s;
577
2c2d71f5
JH
578 t = s - prog->check_offset_max;
579 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
580 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 581 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 582 && t > strpos)))
30944b6d 583 /* EMPTY */;
2c2d71f5
JH
584 else
585 t = strpos;
1aa99e6b 586 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
587 if (t < other_last) /* These positions already checked */
588 t = other_last;
1aa99e6b 589 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
590 if (last < last1)
591 last1 = last;
592 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
593 /* On end-of-str: see comment below. */
594 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
595 HOP3(HOP3(last1, prog->anchored_offset, strend)
596 + SvCUR(prog->anchored_substr),
597 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
598 prog->anchored_substr,
599 PL_multiline ? FBMrf_MULTILINE : 0);
600 DEBUG_r(PerlIO_printf(Perl_debug_log,
601 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
602 (s ? "Found" : "Contradicts"),
603 PL_colors[0],
7b0972df
JH
604 (int)(SvCUR(prog->anchored_substr)
605 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
606 SvPVX(prog->anchored_substr),
607 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
608 if (!s) {
609 if (last1 >= last2) {
610 DEBUG_r(PerlIO_printf(Perl_debug_log,
611 ", giving up...\n"));
612 goto fail_finish;
613 }
614 DEBUG_r(PerlIO_printf(Perl_debug_log,
615 ", trying floating at offset %ld...\n",
1aa99e6b
IH
616 (long)(HOP3c(s1, 1, strend) - i_strpos)));
617 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
618 s = HOP3c(last, 1, strend);
2c2d71f5
JH
619 goto restart;
620 }
621 else {
622 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 623 (long)(s - i_strpos)));
1aa99e6b
IH
624 t = HOP3c(s, -prog->anchored_offset, strbeg);
625 other_last = HOP3c(s, 1, strend);
30944b6d 626 s = s1;
2c2d71f5
JH
627 if (t == strpos)
628 goto try_at_start;
2c2d71f5
JH
629 goto try_at_offset;
630 }
30944b6d 631 }
2c2d71f5
JH
632 }
633 else { /* Take into account the floating substring. */
634 char *last, *last1;
635 char *s1 = s;
636
1aa99e6b
IH
637 t = HOP3c(s, -start_shift, strbeg);
638 last1 = last =
639 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
640 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
641 last = HOP3c(t, prog->float_max_offset, strend);
642 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
643 if (s < other_last)
644 s = other_last;
2c2d71f5
JH
645 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
646 /* fbm_instr() takes into account exact value of end-of-str
647 if the check is SvTAIL(ed). Since false positives are OK,
648 and end-of-str is not later than strend we are OK. */
649 s = fbm_instr((unsigned char*)s,
650 (unsigned char*)last + SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0),
652 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
653 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
654 (s ? "Found" : "Contradicts"),
655 PL_colors[0],
7b0972df
JH
656 (int)(SvCUR(prog->float_substr)
657 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
658 SvPVX(prog->float_substr),
659 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
660 if (!s) {
661 if (last1 == last) {
662 DEBUG_r(PerlIO_printf(Perl_debug_log,
663 ", giving up...\n"));
664 goto fail_finish;
665 }
666 DEBUG_r(PerlIO_printf(Perl_debug_log,
667 ", trying anchored starting at offset %ld...\n",
30944b6d 668 (long)(s1 + 1 - i_strpos)));
803ff556 669 other_last = last;
1aa99e6b 670 s = HOP3c(t, 1, strend);
2c2d71f5
JH
671 goto restart;
672 }
673 else {
674 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 675 (long)(s - i_strpos)));
803ff556 676 other_last = s; /* Fix this later. --Hugo */
30944b6d 677 s = s1;
2c2d71f5
JH
678 if (t == strpos)
679 goto try_at_start;
2c2d71f5
JH
680 goto try_at_offset;
681 }
682 }
cad2e5aa 683 }
2c2d71f5
JH
684
685 t = s - prog->check_offset_max;
2c2d71f5
JH
686 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
687 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
688 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
689 && t > strpos))) {
2c2d71f5
JH
690 /* Fixed substring is found far enough so that the match
691 cannot start at strpos. */
692 try_at_offset:
cad2e5aa 693 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
694 /* Eventually fbm_*() should handle this, but often
695 anchored_offset is not 0, so this check will not be wasted. */
696 /* XXXX In the code below we prefer to look for "^" even in
697 presence of anchored substrings. And we search even
698 beyond the found float position. These pessimizations
699 are historical artefacts only. */
700 find_anchor:
2c2d71f5 701 while (t < strend - prog->minlen) {
cad2e5aa 702 if (*t == '\n') {
4ee3650e 703 if (t < check_at - prog->check_offset_min) {
30944b6d 704 if (prog->anchored_substr) {
4ee3650e
GS
705 /* Since we moved from the found position,
706 we definitely contradict the found anchored
30944b6d
IZ
707 substr. Due to the above check we do not
708 contradict "check" substr.
709 Thus we can arrive here only if check substr
710 is float. Redo checking for "other"=="fixed".
711 */
9041c2e3 712 strpos = t + 1;
30944b6d
IZ
713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
714 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
715 goto do_other_anchored;
716 }
4ee3650e
GS
717 /* We don't contradict the found floating substring. */
718 /* XXXX Why not check for STCLASS? */
cad2e5aa 719 s = t + 1;
2c2d71f5 720 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 721 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
722 goto set_useful;
723 }
4ee3650e
GS
724 /* Position contradicts check-string */
725 /* XXXX probably better to look for check-string
726 than for "\n", so one should lower the limit for t? */
727 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 728 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 729 other_last = strpos = s = t + 1;
cad2e5aa
JH
730 goto restart;
731 }
732 t++;
733 }
2c2d71f5
JH
734 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
735 PL_colors[0],PL_colors[1]));
736 goto fail_finish;
cad2e5aa 737 }
f5952150
GS
738 else {
739 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
740 PL_colors[0],PL_colors[1]));
741 }
cad2e5aa
JH
742 s = t;
743 set_useful:
2c2d71f5 744 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
745 }
746 else {
f5952150 747 /* The found string does not prohibit matching at strpos,
2c2d71f5 748 - no optimization of calling REx engine can be performed,
f5952150
GS
749 unless it was an MBOL and we are not after MBOL,
750 or a future STCLASS check will fail this. */
2c2d71f5
JH
751 try_at_start:
752 /* Even in this situation we may use MBOL flag if strpos is offset
753 wrt the start of the string. */
05b4157f 754 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 755 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
756 /* May be due to an implicit anchor of m{.*foo} */
757 && !(prog->reganch & ROPT_IMPLICIT))
758 {
cad2e5aa
JH
759 t = strpos;
760 goto find_anchor;
761 }
30944b6d 762 DEBUG_r( if (ml_anch)
f5952150
GS
763 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
764 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 765 );
2c2d71f5 766 success_at_start:
30944b6d 767 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 768 && prog->check_substr /* Could be deleted already */
cad2e5aa 769 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
770 && prog->check_substr == prog->float_substr)
771 {
cad2e5aa 772 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 773 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
774 SvREFCNT_dec(prog->check_substr);
775 prog->check_substr = Nullsv; /* disable */
776 prog->float_substr = Nullsv; /* clear */
5e39e1e5 777 check = Nullsv; /* abort */
cad2e5aa 778 s = strpos;
3cf5c195
IZ
779 /* XXXX This is a remnant of the old implementation. It
780 looks wasteful, since now INTUIT can use many
6eb5f6b9 781 other heuristics. */
cad2e5aa
JH
782 prog->reganch &= ~RE_USE_INTUIT;
783 }
784 else
785 s = strpos;
786 }
787
6eb5f6b9
JH
788 /* Last resort... */
789 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
790 if (prog->regstclass) {
791 /* minlen == 0 is possible if regstclass is \b or \B,
792 and the fixed substr is ''$.
793 Since minlen is already taken into account, s+1 is before strend;
794 accidentally, minlen >= 1 guaranties no false positives at s + 1
795 even for \b or \B. But (minlen? 1 : 0) below assumes that
796 regstclass does not come from lookahead... */
797 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
798 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 799 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 800 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 801 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 802 : 1);
6eb5f6b9 803 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
804 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
805 : (prog->float_substr
806 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
807 cl_l, strend)
808 : strend);
a1933d95 809 char *startpos = strbeg;
6eb5f6b9
JH
810
811 t = s;
76384e4a 812 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 813 PL_regdata = prog->data;
76384e4a
GS
814 PL_bostr = startpos;
815 }
f33976b4 816 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
817 if (!s) {
818#ifdef DEBUGGING
b7953727 819 char *what = 0;
6eb5f6b9
JH
820#endif
821 if (endpos == strend) {
822 DEBUG_r( PerlIO_printf(Perl_debug_log,
823 "Could not match STCLASS...\n") );
824 goto fail;
825 }
66e933ab
GS
826 DEBUG_r( PerlIO_printf(Perl_debug_log,
827 "This position contradicts STCLASS...\n") );
653099ff
GS
828 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
829 goto fail;
6eb5f6b9
JH
830 /* Contradict one of substrings */
831 if (prog->anchored_substr) {
6eb5f6b9
JH
832 if (prog->anchored_substr == check) {
833 DEBUG_r( what = "anchored" );
834 hop_and_restart:
1aa99e6b 835 s = HOP3c(t, 1, strend);
66e933ab
GS
836 if (s + start_shift + end_shift > strend) {
837 /* XXXX Should be taken into account earlier? */
838 DEBUG_r( PerlIO_printf(Perl_debug_log,
839 "Could not match STCLASS...\n") );
840 goto fail;
841 }
5e39e1e5
HS
842 if (!check)
843 goto giveup;
6eb5f6b9 844 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 845 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
846 what, (long)(s + start_shift - i_strpos)) );
847 goto restart;
848 }
66e933ab 849 /* Have both, check_string is floating */
6eb5f6b9
JH
850 if (t + start_shift >= check_at) /* Contradicts floating=check */
851 goto retry_floating_check;
852 /* Recheck anchored substring, but not floating... */
9041c2e3 853 s = check_at;
5e39e1e5
HS
854 if (!check)
855 goto giveup;
6eb5f6b9 856 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 857 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
858 (long)(other_last - i_strpos)) );
859 goto do_other_anchored;
860 }
60e71179
GS
861 /* Another way we could have checked stclass at the
862 current position only: */
863 if (ml_anch) {
864 s = t = t + 1;
5e39e1e5
HS
865 if (!check)
866 goto giveup;
60e71179 867 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
868 "Looking for /%s^%s/m starting at offset %ld...\n",
869 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 870 goto try_at_offset;
66e933ab 871 }
60e71179
GS
872 if (!prog->float_substr) /* Could have been deleted */
873 goto fail;
6eb5f6b9
JH
874 /* Check is floating subtring. */
875 retry_floating_check:
876 t = check_at - start_shift;
877 DEBUG_r( what = "floating" );
878 goto hop_and_restart;
879 }
b7953727
JH
880 if (t != s) {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 882 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
883 (long)(t - i_strpos), (long)(s - i_strpos))
884 );
885 }
886 else {
887 DEBUG_r(PerlIO_printf(Perl_debug_log,
888 "Does not contradict STCLASS...\n");
889 );
890 }
6eb5f6b9 891 }
5e39e1e5
HS
892 giveup:
893 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
894 PL_colors[4], (check ? "Guessed" : "Giving up"),
895 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 896 return s;
2c2d71f5
JH
897
898 fail_finish: /* Substring not found */
66e933ab
GS
899 if (prog->check_substr) /* could be removed already */
900 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 901 fail:
2c2d71f5 902 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
903 PL_colors[4],PL_colors[5]));
904 return Nullch;
905}
9661b544 906
6eb5f6b9 907/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
908STATIC char *
909S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 910{
6eb5f6b9
JH
911 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
912 char *m;
d8093b23
G
913 STRLEN ln;
914 unsigned int c1;
915 unsigned int c2;
6eb5f6b9
JH
916 char *e;
917 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 918 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 919
6eb5f6b9
JH
920 /* We know what class it must start with. */
921 switch (OP(c)) {
6eb5f6b9 922 case ANYOF:
a0ed51b3 923 while (s < strend) {
e0f9d4a8
JH
924 STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
925
926 if (reginclass(c, (U8*)s, do_utf8) ||
ebc501f0
JH
927 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
928 /* The assignment of 2 is intentional:
929 * for the sharp s, the skip is 2. */
930 (skip = SHARP_S_SKIP)
931 )) {
6eb5f6b9
JH
932 if (tmp && (norun || regtry(prog, s)))
933 goto got_it;
934 else
935 tmp = doevery;
a0ed51b3 936 }
e0f9d4a8
JH
937 else
938 tmp = 1;
939 s += skip;
a0d0e21e 940 }
6eb5f6b9 941 break;
f33976b4
DB
942 case CANY:
943 while (s < strend) {
944 if (tmp && (norun || regtry(prog, s)))
945 goto got_it;
946 else
947 tmp = doevery;
948 s++;
949 }
950 break;
6eb5f6b9
JH
951 case EXACTF:
952 m = STRING(c);
953 ln = STR_LEN(c);
1aa99e6b 954 if (UTF) {
a2a2844f 955 STRLEN ulen1, ulen2;
e7ae6809
JH
956 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
957 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
958
959 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
960 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
961
7f16dd3d
JH
962 c1 = utf8_to_uvchr(tmpbuf1, 0);
963 c2 = utf8_to_uvchr(tmpbuf2, 0);
1aa99e6b
IH
964 }
965 else {
966 c1 = *(U8*)m;
967 c2 = PL_fold[c1];
968 }
6eb5f6b9
JH
969 goto do_exactf;
970 case EXACTFL:
971 m = STRING(c);
972 ln = STR_LEN(c);
d8093b23 973 c1 = *(U8*)m;
6eb5f6b9
JH
974 c2 = PL_fold_locale[c1];
975 do_exactf:
93c50829 976 e = do_utf8 ? s + ln : strend - ln;
b3c9acc1 977
6eb5f6b9
JH
978 if (norun && e < s)
979 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 980
60a8b682
JH
981 /* The idea in the EXACTF* cases is to first find the
982 * first character of the EXACTF* node and then, if
983 * necessary, case-insensitively compare the full
984 * text of the node. The c1 and c2 are the first
985 * characters (though in Unicode it gets a bit
986 * more complicated because there are more cases
7f16dd3d
JH
987 * than just upper and lower: one needs to use
988 * the so-called folding case for case-insensitive
989 * matching (called "loose matching" in Unicode).
990 * ibcmp_utf8() will do just that. */
60a8b682 991
1aa99e6b 992 if (do_utf8) {
575cac57
JH
993 UV c, f;
994 U8 tmpbuf [UTF8_MAXLEN+1];
995 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
996 STRLEN len, foldlen;
d7f013c8 997
09091399 998 if (c1 == c2) {
1aa99e6b 999 while (s <= e) {
575cac57 1000 c = utf8_to_uvchr((U8*)s, &len);
80aecb99
JH
1001 if ( c == c1
1002 && (ln == len ||
66423254
JH
1003 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1004 m, (char **)0, ln, UTF))
55da9344 1005 && (norun || regtry(prog, s)) )
1aa99e6b 1006 goto got_it;
80aecb99
JH
1007 else {
1008 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1009 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1010 if ( f != c
1011 && (f == c1 || f == c2)
1012 && (ln == foldlen ||
66423254
JH
1013 !ibcmp_utf8((char *) foldbuf,
1014 (char **)0, foldlen, do_utf8,
d07ddd77 1015 m,
66423254 1016 (char **)0, ln, UTF))
80aecb99
JH
1017 && (norun || regtry(prog, s)) )
1018 goto got_it;
1019 }
1aa99e6b
IH
1020 s += len;
1021 }
09091399
JH
1022 }
1023 else {
1aa99e6b 1024 while (s <= e) {
575cac57 1025 c = utf8_to_uvchr((U8*)s, &len);
80aecb99 1026
60a8b682 1027 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1028 * Note that not all the possible combinations
1029 * are handled here: some of them are handled
1030 * by the standard folding rules, and some of
1031 * them (the character class or ANYOF cases)
1032 * are handled during compiletime in
1033 * regexec.c:S_regclass(). */
880bd946
JH
1034 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1035 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1036 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1037
1038 if ( (c == c1 || c == c2)
1039 && (ln == len ||
66423254
JH
1040 ibcmp_utf8(s, (char **)0, 0, do_utf8,
1041 m, (char **)0, ln, UTF))
55da9344 1042 && (norun || regtry(prog, s)) )
1aa99e6b 1043 goto got_it;
80aecb99
JH
1044 else {
1045 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1046 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1047 if ( f != c
1048 && (f == c1 || f == c2)
1049 && (ln == foldlen ||
a6872d42 1050 !ibcmp_utf8((char *) foldbuf,
66423254 1051 (char **)0, foldlen, do_utf8,
d07ddd77 1052 m,
66423254 1053 (char **)0, ln, UTF))
80aecb99
JH
1054 && (norun || regtry(prog, s)) )
1055 goto got_it;
1056 }
1aa99e6b
IH
1057 s += len;
1058 }
09091399 1059 }
1aa99e6b
IH
1060 }
1061 else {
1062 if (c1 == c2)
1063 while (s <= e) {
1064 if ( *(U8*)s == c1
1065 && (ln == 1 || !(OP(c) == EXACTF
1066 ? ibcmp(s, m, ln)
1067 : ibcmp_locale(s, m, ln)))
1068 && (norun || regtry(prog, s)) )
1069 goto got_it;
1070 s++;
1071 }
1072 else
1073 while (s <= e) {
1074 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1075 && (ln == 1 || !(OP(c) == EXACTF
1076 ? ibcmp(s, m, ln)
1077 : ibcmp_locale(s, m, ln)))
1078 && (norun || regtry(prog, s)) )
1079 goto got_it;
1080 s++;
1081 }
b3c9acc1
IZ
1082 }
1083 break;
bbce6d69 1084 case BOUNDL:
3280af22 1085 PL_reg_flags |= RF_tainted;
bbce6d69 1086 /* FALL THROUGH */
a0d0e21e 1087 case BOUND:
ffc61ed2 1088 if (do_utf8) {
12d33761 1089 if (s == PL_bostr)
ffc61ed2
JH
1090 tmp = '\n';
1091 else {
1aa99e6b 1092 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1093
0064a8a9
JH
1094 if (s > (char*)r)
1095 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1096 }
1097 tmp = ((OP(c) == BOUND ?
9041c2e3 1098 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1099 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1100 while (s < strend) {
1101 if (tmp == !(OP(c) == BOUND ?
3568d838 1102 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1103 isALNUM_LC_utf8((U8*)s)))
1104 {
1105 tmp = !tmp;
1106 if ((norun || regtry(prog, s)))
1107 goto got_it;
1108 }
1109 s += UTF8SKIP(s);
a687059c 1110 }
a0d0e21e 1111 }
667bb95a 1112 else {
12d33761 1113 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1114 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1115 while (s < strend) {
1116 if (tmp ==
1117 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1118 tmp = !tmp;
1119 if ((norun || regtry(prog, s)))
1120 goto got_it;
1121 }
1122 s++;
a0ed51b3 1123 }
a0ed51b3 1124 }
6eb5f6b9 1125 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1126 goto got_it;
1127 break;
bbce6d69 1128 case NBOUNDL:
3280af22 1129 PL_reg_flags |= RF_tainted;
bbce6d69 1130 /* FALL THROUGH */
a0d0e21e 1131 case NBOUND:
ffc61ed2 1132 if (do_utf8) {
12d33761 1133 if (s == PL_bostr)
ffc61ed2
JH
1134 tmp = '\n';
1135 else {
1aa99e6b 1136 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1137
0064a8a9
JH
1138 if (s > (char*)r)
1139 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1140 }
1141 tmp = ((OP(c) == NBOUND ?
9041c2e3 1142 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1143 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1144 while (s < strend) {
1145 if (tmp == !(OP(c) == NBOUND ?
3568d838 1146 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1147 isALNUM_LC_utf8((U8*)s)))
1148 tmp = !tmp;
1149 else if ((norun || regtry(prog, s)))
1150 goto got_it;
1151 s += UTF8SKIP(s);
1152 }
a0d0e21e 1153 }
667bb95a 1154 else {
12d33761 1155 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1156 tmp = ((OP(c) == NBOUND ?
1157 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1158 while (s < strend) {
1159 if (tmp ==
1160 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1161 tmp = !tmp;
1162 else if ((norun || regtry(prog, s)))
1163 goto got_it;
1164 s++;
1165 }
a0ed51b3 1166 }
6eb5f6b9 1167 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1168 goto got_it;
1169 break;
a0d0e21e 1170 case ALNUM:
ffc61ed2 1171 if (do_utf8) {
8269fa76 1172 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1173 while (s < strend) {
3568d838 1174 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1175 if (tmp && (norun || regtry(prog, s)))
1176 goto got_it;
1177 else
1178 tmp = doevery;
1179 }
bbce6d69 1180 else
ffc61ed2
JH
1181 tmp = 1;
1182 s += UTF8SKIP(s);
bbce6d69 1183 }
bbce6d69 1184 }
ffc61ed2
JH
1185 else {
1186 while (s < strend) {
1187 if (isALNUM(*s)) {
1188 if (tmp && (norun || regtry(prog, s)))
1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
a0ed51b3 1193 else
ffc61ed2
JH
1194 tmp = 1;
1195 s++;
a0ed51b3 1196 }
a0ed51b3
LW
1197 }
1198 break;
bbce6d69 1199 case ALNUML:
3280af22 1200 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1201 if (do_utf8) {
1202 while (s < strend) {
1203 if (isALNUM_LC_utf8((U8*)s)) {
1204 if (tmp && (norun || regtry(prog, s)))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
a687059c 1209 else
ffc61ed2
JH
1210 tmp = 1;
1211 s += UTF8SKIP(s);
a0d0e21e 1212 }
a0d0e21e 1213 }
ffc61ed2
JH
1214 else {
1215 while (s < strend) {
1216 if (isALNUM_LC(*s)) {
1217 if (tmp && (norun || regtry(prog, s)))
1218 goto got_it;
1219 else
1220 tmp = doevery;
1221 }
a0ed51b3 1222 else
ffc61ed2
JH
1223 tmp = 1;
1224 s++;
a0ed51b3 1225 }
a0ed51b3
LW
1226 }
1227 break;
a0d0e21e 1228 case NALNUM:
ffc61ed2 1229 if (do_utf8) {
8269fa76 1230 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1231 while (s < strend) {
3568d838 1232 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1233 if (tmp && (norun || regtry(prog, s)))
1234 goto got_it;
1235 else
1236 tmp = doevery;
1237 }
bbce6d69 1238 else
ffc61ed2
JH
1239 tmp = 1;
1240 s += UTF8SKIP(s);
bbce6d69 1241 }
bbce6d69 1242 }
ffc61ed2
JH
1243 else {
1244 while (s < strend) {
1245 if (!isALNUM(*s)) {
1246 if (tmp && (norun || regtry(prog, s)))
1247 goto got_it;
1248 else
1249 tmp = doevery;
1250 }
a0ed51b3 1251 else
ffc61ed2
JH
1252 tmp = 1;
1253 s++;
a0ed51b3 1254 }
a0ed51b3
LW
1255 }
1256 break;
bbce6d69 1257 case NALNUML:
3280af22 1258 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1259 if (do_utf8) {
1260 while (s < strend) {
1261 if (!isALNUM_LC_utf8((U8*)s)) {
1262 if (tmp && (norun || regtry(prog, s)))
1263 goto got_it;
1264 else
1265 tmp = doevery;
1266 }
a687059c 1267 else
ffc61ed2
JH
1268 tmp = 1;
1269 s += UTF8SKIP(s);
a687059c 1270 }
a0d0e21e 1271 }
ffc61ed2
JH
1272 else {
1273 while (s < strend) {
1274 if (!isALNUM_LC(*s)) {
1275 if (tmp && (norun || regtry(prog, s)))
1276 goto got_it;
1277 else
1278 tmp = doevery;
1279 }
a0ed51b3 1280 else
ffc61ed2
JH
1281 tmp = 1;
1282 s++;
a0ed51b3 1283 }
a0ed51b3
LW
1284 }
1285 break;
a0d0e21e 1286 case SPACE:
ffc61ed2 1287 if (do_utf8) {
8269fa76 1288 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1289 while (s < strend) {
3568d838 1290 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1291 if (tmp && (norun || regtry(prog, s)))
1292 goto got_it;
1293 else
1294 tmp = doevery;
1295 }
a0d0e21e 1296 else
ffc61ed2
JH
1297 tmp = 1;
1298 s += UTF8SKIP(s);
2304df62 1299 }
a0d0e21e 1300 }
ffc61ed2
JH
1301 else {
1302 while (s < strend) {
1303 if (isSPACE(*s)) {
1304 if (tmp && (norun || regtry(prog, s)))
1305 goto got_it;
1306 else
1307 tmp = doevery;
1308 }
a0ed51b3 1309 else
ffc61ed2
JH
1310 tmp = 1;
1311 s++;
a0ed51b3 1312 }
a0ed51b3
LW
1313 }
1314 break;
bbce6d69 1315 case SPACEL:
3280af22 1316 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1317 if (do_utf8) {
1318 while (s < strend) {
1319 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1320 if (tmp && (norun || regtry(prog, s)))
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
bbce6d69 1325 else
ffc61ed2
JH
1326 tmp = 1;
1327 s += UTF8SKIP(s);
bbce6d69 1328 }
bbce6d69 1329 }
ffc61ed2
JH
1330 else {
1331 while (s < strend) {
1332 if (isSPACE_LC(*s)) {
1333 if (tmp && (norun || regtry(prog, s)))
1334 goto got_it;
1335 else
1336 tmp = doevery;
1337 }
a0ed51b3 1338 else
ffc61ed2
JH
1339 tmp = 1;
1340 s++;
a0ed51b3 1341 }
a0ed51b3
LW
1342 }
1343 break;
a0d0e21e 1344 case NSPACE:
ffc61ed2 1345 if (do_utf8) {
8269fa76 1346 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1347 while (s < strend) {
3568d838 1348 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1349 if (tmp && (norun || regtry(prog, s)))
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
a0d0e21e 1354 else
ffc61ed2
JH
1355 tmp = 1;
1356 s += UTF8SKIP(s);
a687059c 1357 }
a0d0e21e 1358 }
ffc61ed2
JH
1359 else {
1360 while (s < strend) {
1361 if (!isSPACE(*s)) {
1362 if (tmp && (norun || regtry(prog, s)))
1363 goto got_it;
1364 else
1365 tmp = doevery;
1366 }
a0ed51b3 1367 else
ffc61ed2
JH
1368 tmp = 1;
1369 s++;
a0ed51b3 1370 }
a0ed51b3
LW
1371 }
1372 break;
bbce6d69 1373 case NSPACEL:
3280af22 1374 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1375 if (do_utf8) {
1376 while (s < strend) {
1377 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1378 if (tmp && (norun || regtry(prog, s)))
1379 goto got_it;
1380 else
1381 tmp = doevery;
1382 }
bbce6d69 1383 else
ffc61ed2
JH
1384 tmp = 1;
1385 s += UTF8SKIP(s);
bbce6d69 1386 }
bbce6d69 1387 }
ffc61ed2
JH
1388 else {
1389 while (s < strend) {
1390 if (!isSPACE_LC(*s)) {
1391 if (tmp && (norun || regtry(prog, s)))
1392 goto got_it;
1393 else
1394 tmp = doevery;
1395 }
a0ed51b3 1396 else
ffc61ed2
JH
1397 tmp = 1;
1398 s++;
a0ed51b3 1399 }
a0ed51b3
LW
1400 }
1401 break;
a0d0e21e 1402 case DIGIT:
ffc61ed2 1403 if (do_utf8) {
8269fa76 1404 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1405 while (s < strend) {
3568d838 1406 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1407 if (tmp && (norun || regtry(prog, s)))
1408 goto got_it;
1409 else
1410 tmp = doevery;
1411 }
a0d0e21e 1412 else
ffc61ed2
JH
1413 tmp = 1;
1414 s += UTF8SKIP(s);
2b69d0c2 1415 }
a0d0e21e 1416 }
ffc61ed2
JH
1417 else {
1418 while (s < strend) {
1419 if (isDIGIT(*s)) {
1420 if (tmp && (norun || regtry(prog, s)))
1421 goto got_it;
1422 else
1423 tmp = doevery;
1424 }
a0ed51b3 1425 else
ffc61ed2
JH
1426 tmp = 1;
1427 s++;
a0ed51b3 1428 }
a0ed51b3
LW
1429 }
1430 break;
b8c5462f
JH
1431 case DIGITL:
1432 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1433 if (do_utf8) {
1434 while (s < strend) {
1435 if (isDIGIT_LC_utf8((U8*)s)) {
1436 if (tmp && (norun || regtry(prog, s)))
1437 goto got_it;
1438 else
1439 tmp = doevery;
1440 }
b8c5462f 1441 else
ffc61ed2
JH
1442 tmp = 1;
1443 s += UTF8SKIP(s);
b8c5462f 1444 }
b8c5462f 1445 }
ffc61ed2
JH
1446 else {
1447 while (s < strend) {
1448 if (isDIGIT_LC(*s)) {
1449 if (tmp && (norun || regtry(prog, s)))
1450 goto got_it;
1451 else
1452 tmp = doevery;
1453 }
b8c5462f 1454 else
ffc61ed2
JH
1455 tmp = 1;
1456 s++;
b8c5462f 1457 }
b8c5462f
JH
1458 }
1459 break;
a0d0e21e 1460 case NDIGIT:
ffc61ed2 1461 if (do_utf8) {
8269fa76 1462 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1463 while (s < strend) {
3568d838 1464 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1465 if (tmp && (norun || regtry(prog, s)))
1466 goto got_it;
1467 else
1468 tmp = doevery;
1469 }
a0d0e21e 1470 else
ffc61ed2
JH
1471 tmp = 1;
1472 s += UTF8SKIP(s);
a687059c 1473 }
a0d0e21e 1474 }
ffc61ed2
JH
1475 else {
1476 while (s < strend) {
1477 if (!isDIGIT(*s)) {
1478 if (tmp && (norun || regtry(prog, s)))
1479 goto got_it;
1480 else
1481 tmp = doevery;
1482 }
a0ed51b3 1483 else
ffc61ed2
JH
1484 tmp = 1;
1485 s++;
a0ed51b3 1486 }
a0ed51b3
LW
1487 }
1488 break;
b8c5462f
JH
1489 case NDIGITL:
1490 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1491 if (do_utf8) {
1492 while (s < strend) {
1493 if (!isDIGIT_LC_utf8((U8*)s)) {
1494 if (tmp && (norun || regtry(prog, s)))
1495 goto got_it;
1496 else
1497 tmp = doevery;
1498 }
b8c5462f 1499 else
ffc61ed2
JH
1500 tmp = 1;
1501 s += UTF8SKIP(s);
b8c5462f 1502 }
a0ed51b3 1503 }
ffc61ed2
JH
1504 else {
1505 while (s < strend) {
1506 if (!isDIGIT_LC(*s)) {
1507 if (tmp && (norun || regtry(prog, s)))
1508 goto got_it;
1509 else
1510 tmp = doevery;
1511 }
cf93c79d 1512 else
ffc61ed2
JH
1513 tmp = 1;
1514 s++;
b8c5462f 1515 }
b8c5462f
JH
1516 }
1517 break;
b3c9acc1 1518 default:
3c3eec57
GS
1519 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1520 break;
d6a28714 1521 }
6eb5f6b9
JH
1522 return 0;
1523 got_it:
1524 return s;
1525}
1526
1527/*
1528 - regexec_flags - match a regexp against a string
1529 */
1530I32
1531Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1532 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1533/* strend: pointer to null at end of string */
1534/* strbeg: real beginning of string */
1535/* minend: end of match must be >=minend after stringarg. */
1536/* data: May be used for some additional optimizations. */
1537/* nosave: For optimizations. */
1538{
6eb5f6b9
JH
1539 register char *s;
1540 register regnode *c;
1541 register char *startpos = stringarg;
6eb5f6b9
JH
1542 I32 minlen; /* must match at least this many chars */
1543 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1544 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1545 constant substr. */ /* CC */
1546 I32 end_shift = 0; /* Same for the end. */ /* CC */
1547 I32 scream_pos = -1; /* Internal iterator of scream. */
1548 char *scream_olds;
1549 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1550 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1551#ifdef DEBUGGING
9e55ce06
JH
1552 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1553 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1554#endif
6eb5f6b9
JH
1555
1556 PL_regcc = 0;
1557
1558 cache_re(prog);
1559#ifdef DEBUGGING
aea4f609 1560 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1561#endif
1562
1563 /* Be paranoid... */
1564 if (prog == NULL || startpos == NULL) {
1565 Perl_croak(aTHX_ "NULL regexp parameter");
1566 return 0;
1567 }
1568
1569 minlen = prog->minlen;
61a36c01 1570 if (strend - startpos < minlen) {
a72c7584
JH
1571 DEBUG_r(PerlIO_printf(Perl_debug_log,
1572 "String too short [regexec_flags]...\n"));
1573 goto phooey;
1aa99e6b 1574 }
6eb5f6b9 1575
6eb5f6b9
JH
1576 /* Check validity of program. */
1577 if (UCHARAT(prog->program) != REG_MAGIC) {
1578 Perl_croak(aTHX_ "corrupted regexp program");
1579 }
1580
1581 PL_reg_flags = 0;
1582 PL_reg_eval_set = 0;
1583 PL_reg_maxiter = 0;
1584
1585 if (prog->reganch & ROPT_UTF8)
1586 PL_reg_flags |= RF_utf8;
1587
1588 /* Mark beginning of line for ^ and lookbehind. */
1589 PL_regbol = startpos;
1590 PL_bostr = strbeg;
1591 PL_reg_sv = sv;
1592
1593 /* Mark end of line for $ (and such) */
1594 PL_regeol = strend;
1595
1596 /* see how far we have to get to not match where we matched before */
1597 PL_regtill = startpos+minend;
1598
1599 /* We start without call_cc context. */
1600 PL_reg_call_cc = 0;
1601
1602 /* If there is a "must appear" string, look for it. */
1603 s = startpos;
1604
1605 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1606 MAGIC *mg;
1607
1608 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1609 PL_reg_ganch = startpos;
1610 else if (sv && SvTYPE(sv) >= SVt_PVMG
1611 && SvMAGIC(sv)
14befaf4
DM
1612 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1613 && mg->mg_len >= 0) {
6eb5f6b9
JH
1614 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1615 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1616 if (s > PL_reg_ganch)
6eb5f6b9
JH
1617 goto phooey;
1618 s = PL_reg_ganch;
1619 }
1620 }
1621 else /* pos() not defined */
1622 PL_reg_ganch = strbeg;
1623 }
1624
699c3c34
JH
1625 if (do_utf8 == (UTF!=0) &&
1626 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1627 re_scream_pos_data d;
1628
1629 d.scream_olds = &scream_olds;
1630 d.scream_pos = &scream_pos;
1631 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1632 if (!s) {
1633 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1634 goto phooey; /* not present */
3fa9c3d7 1635 }
6eb5f6b9
JH
1636 }
1637
2a782b5b 1638 DEBUG_r({
9e55ce06
JH
1639 char *s0 = UTF ?
1640 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1641 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1642 prog->precomp;
1643 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1644 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1645 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1646 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1647 if (!PL_colorset)
1648 reginitcolors();
1649 PerlIO_printf(Perl_debug_log,
9e55ce06 1650 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1651 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1652 len0, len0, s0,
2a782b5b 1653 PL_colors[1],
9e55ce06 1654 len0 > 60 ? "..." : "",
2a782b5b 1655 PL_colors[0],
9e55ce06
JH
1656 (int)(len1 > 60 ? 60 : len1),
1657 s1, PL_colors[1],
1658 (len1 > 60 ? "..." : "")
2a782b5b
JH
1659 );
1660 });
6eb5f6b9
JH
1661
1662 /* Simplest case: anchored match need be tried only once. */
1663 /* [unless only anchor is BOL and multiline is set] */
1664 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1665 if (s == startpos && regtry(prog, startpos))
1666 goto got_it;
1667 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1668 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1669 {
1670 char *end;
1671
1672 if (minlen)
1673 dontbother = minlen - 1;
1aa99e6b 1674 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1675 /* for multiline we only have to try after newlines */
1676 if (prog->check_substr) {
1677 if (s == startpos)
1678 goto after_try;
1679 while (1) {
1680 if (regtry(prog, s))
1681 goto got_it;
1682 after_try:
1683 if (s >= end)
1684 goto phooey;
1685 if (prog->reganch & RE_USE_INTUIT) {
1686 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1687 if (!s)
1688 goto phooey;
1689 }
1690 else
1691 s++;
1692 }
1693 } else {
1694 if (s > startpos)
1695 s--;
1696 while (s < end) {
1697 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1698 if (regtry(prog, s))
1699 goto got_it;
1700 }
1701 }
1702 }
1703 }
1704 goto phooey;
1705 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1706 if (regtry(prog, PL_reg_ganch))
1707 goto got_it;
1708 goto phooey;
1709 }
1710
1711 /* Messy cases: unanchored match. */
9041c2e3 1712 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1713 /* we have /x+whatever/ */
1714 /* it must be a one character string (XXXX Except UTF?) */
1715 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1716#ifdef DEBUGGING
1717 int did_match = 0;
1718#endif
1719
1aa99e6b 1720 if (do_utf8) {
6eb5f6b9
JH
1721 while (s < strend) {
1722 if (*s == ch) {
bf93d4cc 1723 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1724 if (regtry(prog, s)) goto got_it;
1725 s += UTF8SKIP(s);
1726 while (s < strend && *s == ch)
1727 s += UTF8SKIP(s);
1728 }
1729 s += UTF8SKIP(s);
1730 }
1731 }
1732 else {
1733 while (s < strend) {
1734 if (*s == ch) {
bf93d4cc 1735 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1736 if (regtry(prog, s)) goto got_it;
1737 s++;
1738 while (s < strend && *s == ch)
1739 s++;
1740 }
1741 s++;
1742 }
1743 }
b7953727 1744 DEBUG_r(if (!did_match)
bf93d4cc 1745 PerlIO_printf(Perl_debug_log,
b7953727
JH
1746 "Did not find anchored character...\n")
1747 );
6eb5f6b9
JH
1748 }
1749 /*SUPPRESS 560*/
1aa99e6b
IH
1750 else if (do_utf8 == (UTF!=0) &&
1751 (prog->anchored_substr != Nullsv
9041c2e3 1752 || (prog->float_substr != Nullsv
1aa99e6b 1753 && prog->float_max_offset < strend - s))) {
9041c2e3 1754 SV *must = prog->anchored_substr
6eb5f6b9 1755 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1756 I32 back_max =
6eb5f6b9 1757 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1758 I32 back_min =
6eb5f6b9 1759 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1760 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1761 -(I32)(CHR_SVLEN(must)
1aa99e6b 1762 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1763 char *last1; /* Last position checked before */
bf93d4cc
GS
1764#ifdef DEBUGGING
1765 int did_match = 0;
1766#endif
6eb5f6b9
JH
1767
1768 if (s > PL_bostr)
1769 last1 = HOPc(s, -1);
1770 else
1771 last1 = s - 1; /* bogus */
1772
1773 /* XXXX check_substr already used to find `s', can optimize if
1774 check_substr==must. */
1775 scream_pos = -1;
1776 dontbother = end_shift;
1777 strend = HOPc(strend, -dontbother);
1778 while ( (s <= last) &&
9041c2e3 1779 ((flags & REXEC_SCREAM)
1aa99e6b 1780 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1781 end_shift, &scream_pos, 0))
1aa99e6b 1782 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1783 (unsigned char*)strend, must,
6eb5f6b9 1784 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1785 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1786 if (HOPc(s, -back_max) > last1) {
1787 last1 = HOPc(s, -back_min);
1788 s = HOPc(s, -back_max);
1789 }
1790 else {
1791 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1792
1793 last1 = HOPc(s, -back_min);
1794 s = t;
1795 }
1aa99e6b 1796 if (do_utf8) {
6eb5f6b9
JH
1797 while (s <= last1) {
1798 if (regtry(prog, s))
1799 goto got_it;
1800 s += UTF8SKIP(s);
1801 }
1802 }
1803 else {
1804 while (s <= last1) {
1805 if (regtry(prog, s))
1806 goto got_it;
1807 s++;
1808 }
1809 }
1810 }
b7953727
JH
1811 DEBUG_r(if (!did_match)
1812 PerlIO_printf(Perl_debug_log,
1813 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1814 ((must == prog->anchored_substr)
1815 ? "anchored" : "floating"),
1816 PL_colors[0],
1817 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1818 SvPVX(must),
b7953727
JH
1819 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1820 );
6eb5f6b9
JH
1821 goto phooey;
1822 }
155aba94 1823 else if ((c = prog->regstclass)) {
66e933ab
GS
1824 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1825 /* don't bother with what can't match */
6eb5f6b9 1826 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1827 DEBUG_r({
1828 SV *prop = sv_newmortal();
9e55ce06
JH
1829 char *s0;
1830 char *s1;
1831 int len0;
1832 int len1;
1833
ffc61ed2 1834 regprop(prop, c);
9e55ce06
JH
1835 s0 = UTF ?
1836 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1837 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1838 SvPVX(prop);
1839 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1840 s1 = UTF ?
c728cb41 1841 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1842 len1 = UTF ? SvCUR(dsv1) : strend - s;
1843 PerlIO_printf(Perl_debug_log,
1844 "Matching stclass `%*.*s' against `%*.*s'\n",
1845 len0, len0, s0,
1846 len1, len1, s1);
ffc61ed2 1847 });
6eb5f6b9
JH
1848 if (find_byclass(prog, c, s, strend, startpos, 0))
1849 goto got_it;
bf93d4cc 1850 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1851 }
1852 else {
1853 dontbother = 0;
1854 if (prog->float_substr != Nullsv) { /* Trim the end. */
1855 char *last;
d6a28714
JH
1856
1857 if (flags & REXEC_SCREAM) {
1858 last = screaminstr(sv, prog->float_substr, s - strbeg,
1859 end_shift, &scream_pos, 1); /* last one */
1860 if (!last)
ffc61ed2 1861 last = scream_olds; /* Only one occurrence. */
b8c5462f 1862 }
d6a28714
JH
1863 else {
1864 STRLEN len;
1865 char *little = SvPV(prog->float_substr, len);
1866
1867 if (SvTAIL(prog->float_substr)) {
1868 if (memEQ(strend - len + 1, little, len - 1))
1869 last = strend - len + 1;
1870 else if (!PL_multiline)
9041c2e3 1871 last = memEQ(strend - len, little, len)
d6a28714 1872 ? strend - len : Nullch;
b8c5462f 1873 else
d6a28714
JH
1874 goto find_last;
1875 } else {
1876 find_last:
9041c2e3 1877 if (len)
d6a28714 1878 last = rninstr(s, strend, little, little + len);
b8c5462f 1879 else
d6a28714 1880 last = strend; /* matching `$' */
b8c5462f 1881 }
b8c5462f 1882 }
bf93d4cc
GS
1883 if (last == NULL) {
1884 DEBUG_r(PerlIO_printf(Perl_debug_log,
1885 "%sCan't trim the tail, match fails (should not happen)%s\n",
1886 PL_colors[4],PL_colors[5]));
1887 goto phooey; /* Should not happen! */
1888 }
d6a28714
JH
1889 dontbother = strend - last + prog->float_min_offset;
1890 }
1891 if (minlen && (dontbother < minlen))
1892 dontbother = minlen - 1;
1893 strend -= dontbother; /* this one's always in bytes! */
1894 /* We don't know much -- general case. */
1aa99e6b 1895 if (do_utf8) {
d6a28714
JH
1896 for (;;) {
1897 if (regtry(prog, s))
1898 goto got_it;
1899 if (s >= strend)
1900 break;
b8c5462f 1901 s += UTF8SKIP(s);
d6a28714
JH
1902 };
1903 }
1904 else {
1905 do {
1906 if (regtry(prog, s))
1907 goto got_it;
1908 } while (s++ < strend);
1909 }
1910 }
1911
1912 /* Failure. */
1913 goto phooey;
1914
1915got_it:
1916 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1917
1918 if (PL_reg_eval_set) {
1919 /* Preserve the current value of $^R */
1920 if (oreplsv != GvSV(PL_replgv))
1921 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1922 restored, the value remains
1923 the same. */
acfe0abc 1924 restore_pos(aTHX_ 0);
d6a28714
JH
1925 }
1926
1927 /* make sure $`, $&, $', and $digit will work later */
1928 if ( !(flags & REXEC_NOT_FIRST) ) {
1929 if (RX_MATCH_COPIED(prog)) {
1930 Safefree(prog->subbeg);
1931 RX_MATCH_COPIED_off(prog);
1932 }
1933 if (flags & REXEC_COPY_STR) {
1934 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1935
1936 s = savepvn(strbeg, i);
1937 prog->subbeg = s;
1938 prog->sublen = i;
1939 RX_MATCH_COPIED_on(prog);
1940 }
1941 else {
1942 prog->subbeg = strbeg;
1943 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1944 }
1945 }
9041c2e3 1946
d6a28714
JH
1947 return 1;
1948
1949phooey:
bf93d4cc
GS
1950 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1951 PL_colors[4],PL_colors[5]));
d6a28714 1952 if (PL_reg_eval_set)
acfe0abc 1953 restore_pos(aTHX_ 0);
d6a28714
JH
1954 return 0;
1955}
1956
1957/*
1958 - regtry - try match at specific point
1959 */
1960STATIC I32 /* 0 failure, 1 success */
1961S_regtry(pTHX_ regexp *prog, char *startpos)
1962{
d6a28714
JH
1963 register I32 i;
1964 register I32 *sp;
1965 register I32 *ep;
1966 CHECKPOINT lastcp;
1967
02db2b7b
IZ
1968#ifdef DEBUGGING
1969 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1970#endif
d6a28714
JH
1971 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1972 MAGIC *mg;
1973
1974 PL_reg_eval_set = RS_init;
1975 DEBUG_r(DEBUG_s(
b900a521
JH
1976 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1977 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1978 ));
e8347627 1979 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1980 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1981 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1982 SAVETMPS;
1983 /* Apparently this is not needed, judging by wantarray. */
e8347627 1984 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1985 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1986
1987 if (PL_reg_sv) {
1988 /* Make $_ available to executed code. */
1989 if (PL_reg_sv != DEFSV) {
4d1ff10f 1990 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1991 SAVESPTR(DEFSV);
1992 DEFSV = PL_reg_sv;
b8c5462f 1993 }
d6a28714 1994
9041c2e3 1995 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1996 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1997 /* prepare for quick setting of pos */
14befaf4
DM
1998 sv_magic(PL_reg_sv, (SV*)0,
1999 PERL_MAGIC_regex_global, Nullch, 0);
2000 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 2001 mg->mg_len = -1;
b8c5462f 2002 }
d6a28714
JH
2003 PL_reg_magic = mg;
2004 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2005 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2006 }
09687e5a 2007 if (!PL_reg_curpm) {
0f79a09d 2008 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
2009#ifdef USE_ITHREADS
2010 {
2011 SV* repointer = newSViv(0);
577e12cc 2012 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2013 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2014 av_push(PL_regex_padav,repointer);
2015 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2016 PL_regex_pad = AvARRAY(PL_regex_padav);
2017 }
2018#endif
2019 }
aaa362c4 2020 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2021 PL_reg_oldcurpm = PL_curpm;
2022 PL_curpm = PL_reg_curpm;
2023 if (RX_MATCH_COPIED(prog)) {
2024 /* Here is a serious problem: we cannot rewrite subbeg,
2025 since it may be needed if this match fails. Thus
2026 $` inside (?{}) could fail... */
2027 PL_reg_oldsaved = prog->subbeg;
2028 PL_reg_oldsavedlen = prog->sublen;
2029 RX_MATCH_COPIED_off(prog);
2030 }
2031 else
2032 PL_reg_oldsaved = Nullch;
2033 prog->subbeg = PL_bostr;
2034 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2035 }
2036 prog->startp[0] = startpos - PL_bostr;
2037 PL_reginput = startpos;
2038 PL_regstartp = prog->startp;
2039 PL_regendp = prog->endp;
2040 PL_reglastparen = &prog->lastparen;
a01268b5 2041 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
2042 prog->lastparen = 0;
2043 PL_regsize = 0;
2044 DEBUG_r(PL_reg_starttry = startpos);
2045 if (PL_reg_start_tmpl <= prog->nparens) {
2046 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2047 if(PL_reg_start_tmp)
2048 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2049 else
2050 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2051 }
2052
128e8167
JH
2053#ifdef DEBUGGING
2054 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
2055 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
2056 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
2057#endif
2058
d6a28714
JH
2059 /* XXXX What this code is doing here?!!! There should be no need
2060 to do this again and again, PL_reglastparen should take care of
3dd2943c 2061 this! --ilya*/
dafc8851
JH
2062
2063 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2064 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2065 * PL_reglastparen), is not needed at all by the test suite
2066 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2067 * enough, for building DynaLoader, or otherwise this
2068 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2069 * will happen. Meanwhile, this code *is* needed for the
2070 * above-mentioned test suite tests to succeed. The common theme
2071 * on those tests seems to be returning null fields from matches.
2072 * --jhi */
dafc8851 2073#if 1
d6a28714
JH
2074 sp = prog->startp;
2075 ep = prog->endp;
2076 if (prog->nparens) {
09e8ae3b 2077 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2078 *++sp = -1;
2079 *++ep = -1;
2080 }
2081 }
dafc8851 2082#endif
02db2b7b 2083 REGCP_SET(lastcp);
d6a28714
JH
2084 if (regmatch(prog->program + 1)) {
2085 prog->endp[0] = PL_reginput - PL_bostr;
2086 return 1;
2087 }
02db2b7b 2088 REGCP_UNWIND(lastcp);
d6a28714
JH
2089 return 0;
2090}
2091
02db2b7b
IZ
2092#define RE_UNWIND_BRANCH 1
2093#define RE_UNWIND_BRANCHJ 2
2094
2095union re_unwind_t;
2096
2097typedef struct { /* XX: makes sense to enlarge it... */
2098 I32 type;
2099 I32 prev;
2100 CHECKPOINT lastcp;
2101} re_unwind_generic_t;
2102
2103typedef struct {
2104 I32 type;
2105 I32 prev;
2106 CHECKPOINT lastcp;
2107 I32 lastparen;
2108 regnode *next;
2109 char *locinput;
2110 I32 nextchr;
2111#ifdef DEBUGGING
2112 int regindent;
2113#endif
2114} re_unwind_branch_t;
2115
2116typedef union re_unwind_t {
2117 I32 type;
2118 re_unwind_generic_t generic;
2119 re_unwind_branch_t branch;
2120} re_unwind_t;
2121
8ba1375e
MJD
2122#define sayYES goto yes
2123#define sayNO goto no
e0f9d4a8 2124#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2125#define sayYES_FINAL goto yes_final
2126#define sayYES_LOUD goto yes_loud
2127#define sayNO_FINAL goto no_final
2128#define sayNO_SILENT goto do_no
2129#define saySAME(x) if (x) goto yes; else goto no
2130
2131#define REPORT_CODE_OFF 24
2132
d6a28714
JH
2133/*
2134 - regmatch - main matching routine
2135 *
2136 * Conceptually the strategy is simple: check to see whether the current
2137 * node matches, call self recursively to see whether the rest matches,
2138 * and then act accordingly. In practice we make some effort to avoid
2139 * recursion, in particular by going through "ordinary" nodes (that don't
2140 * need to know whether the rest of the match failed) by a loop instead of
2141 * by recursion.
2142 */
2143/* [lwall] I've hoisted the register declarations to the outer block in order to
2144 * maybe save a little bit of pushing and popping on the stack. It also takes
2145 * advantage of machines that use a register save mask on subroutine entry.
2146 */
2147STATIC I32 /* 0 failure, 1 success */
2148S_regmatch(pTHX_ regnode *prog)
2149{
d6a28714
JH
2150 register regnode *scan; /* Current node. */
2151 regnode *next; /* Next node. */
2152 regnode *inner; /* Next node in internal branch. */
2153 register I32 nextchr; /* renamed nextchr - nextchar colides with
2154 function of same name */
2155 register I32 n; /* no or next */
b7953727
JH
2156 register I32 ln = 0; /* len or last */
2157 register char *s = Nullch; /* operand or save */
d6a28714 2158 register char *locinput = PL_reginput;
b7953727 2159 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2160 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2161 I32 unwind = 0;
b7953727 2162#if 0
02db2b7b 2163 I32 firstcp = PL_savestack_ix;
b7953727 2164#endif
53c4c00c 2165 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2166#ifdef DEBUGGING
ce333219
JH
2167 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2168 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2169 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2170#endif
02db2b7b 2171
d6a28714
JH
2172#ifdef DEBUGGING
2173 PL_regindent++;
2174#endif
2175
2176 /* Note that nextchr is a byte even in UTF */
2177 nextchr = UCHARAT(locinput);
2178 scan = prog;
2179 while (scan != NULL) {
8ba1375e 2180
2a782b5b 2181 DEBUG_r( {
d6a28714
JH
2182 SV *prop = sv_newmortal();
2183 int docolor = *PL_colors[0];
2184 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2185 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2186 /* The part of the string before starttry has one color
2187 (pref0_len chars), between starttry and current
2188 position another one (pref_len - pref0_len chars),
2189 after the current position the third one.
2190 We assume that pref0_len <= pref_len, otherwise we
2191 decrease pref0_len. */
9041c2e3 2192 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2193 ? (5 + taill) - l : locinput - PL_bostr;
2194 int pref0_len;
d6a28714 2195
df1ffd02 2196 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2197 pref_len++;
2198 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2199 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2200 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2201 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2202 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2203 l--;
d6a28714
JH
2204 if (pref0_len < 0)
2205 pref0_len = 0;
2206 if (pref0_len > pref_len)
2207 pref0_len = pref_len;
2208 regprop(prop, scan);
2a782b5b
JH
2209 {
2210 char *s0 =
df1ffd02 2211 do_utf8 ?
2a782b5b 2212 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2213 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2214 locinput - pref_len;
df1ffd02
JH
2215 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2216 char *s1 = do_utf8 ?
2a782b5b 2217 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2218 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2219 locinput - pref_len + pref0_len;
df1ffd02
JH
2220 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2221 char *s2 = do_utf8 ?
2a782b5b 2222 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2223 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2224 locinput;
df1ffd02 2225 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2226 PerlIO_printf(Perl_debug_log,
2227 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2228 (IV)(locinput - PL_bostr),
2229 PL_colors[4],
2230 len0, s0,
2231 PL_colors[5],
2232 PL_colors[2],
2233 len1, s1,
2234 PL_colors[3],
2235 (docolor ? "" : "> <"),
2236 PL_colors[0],
2237 len2, s2,
2238 PL_colors[1],
2239 15 - l - pref_len + 1,
2240 "",
2241 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2242 SvPVX(prop));
2243 }
2244 });
d6a28714
JH
2245
2246 next = scan + NEXT_OFF(scan);
2247 if (next == scan)
2248 next = NULL;
2249
2250 switch (OP(scan)) {
2251 case BOL:
12d33761
HS
2252 if (locinput == PL_bostr || (PL_multiline &&
2253 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2254 {
2255 /* regtill = regbol; */
b8c5462f
JH
2256 break;
2257 }
d6a28714
JH
2258 sayNO;
2259 case MBOL:
12d33761
HS
2260 if (locinput == PL_bostr ||
2261 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2262 {
b8c5462f
JH
2263 break;
2264 }
d6a28714
JH
2265 sayNO;
2266 case SBOL:
c2a73568 2267 if (locinput == PL_bostr)
b8c5462f 2268 break;
d6a28714
JH
2269 sayNO;
2270 case GPOS:
2271 if (locinput == PL_reg_ganch)
2272 break;
2273 sayNO;
2274 case EOL:
2275 if (PL_multiline)
2276 goto meol;
2277 else
2278 goto seol;
2279 case MEOL:
2280 meol:
2281 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2282 sayNO;
b8c5462f 2283 break;
d6a28714
JH
2284 case SEOL:
2285 seol:
2286 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2287 sayNO;
d6a28714 2288 if (PL_regeol - locinput > 1)
b8c5462f 2289 sayNO;
b8c5462f 2290 break;
d6a28714
JH
2291 case EOS:
2292 if (PL_regeol != locinput)
b8c5462f 2293 sayNO;
d6a28714 2294 break;
ffc61ed2 2295 case SANY:
d6a28714 2296 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2297 sayNO;
f33976b4
DB
2298 if (do_utf8) {
2299 locinput += PL_utf8skip[nextchr];
2300 if (locinput > PL_regeol)
2301 sayNO;
2302 nextchr = UCHARAT(locinput);
2303 }
2304 else
2305 nextchr = UCHARAT(++locinput);
2306 break;
2307 case CANY:
2308 if (!nextchr && locinput >= PL_regeol)
2309 sayNO;
b8c5462f 2310 nextchr = UCHARAT(++locinput);
a0d0e21e 2311 break;
ffc61ed2 2312 case REG_ANY:
1aa99e6b
IH
2313 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2314 sayNO;
2315 if (do_utf8) {
b8c5462f 2316 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2317 if (locinput > PL_regeol)
2318 sayNO;
a0ed51b3 2319 nextchr = UCHARAT(locinput);
a0ed51b3 2320 }
1aa99e6b
IH
2321 else
2322 nextchr = UCHARAT(++locinput);
a0ed51b3 2323 break;
d6a28714 2324 case EXACT:
cd439c50
IZ
2325 s = STRING(scan);
2326 ln = STR_LEN(scan);
1aa99e6b 2327 if (do_utf8 != (UTF!=0)) {
bc517b45 2328 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2329 char *l = locinput;
2330 char *e = s + ln;
bc517b45 2331 STRLEN ulen;
a72c7584 2332
5ff6fc6d
JH
2333 if (do_utf8) {
2334 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2335 while (s < e) {
2336 if (l >= PL_regeol)
5ff6fc6d
JH
2337 sayNO;
2338 if (NATIVE_TO_UNI(*(U8*)s) !=
2c237f47 2339 utf8_to_uvuni((U8*)l, &ulen))
5ff6fc6d 2340 sayNO;
bc517b45 2341 l += ulen;
5ff6fc6d 2342 s ++;
1aa99e6b 2343 }
5ff6fc6d
JH
2344 }
2345 else {
2346 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2347 while (s < e) {
2348 if (l >= PL_regeol)
2349 sayNO;
5ff6fc6d 2350 if (NATIVE_TO_UNI(*((U8*)l)) !=
2c237f47 2351 utf8_to_uvuni((U8*)s, &ulen))
1aa99e6b 2352 sayNO;
bc517b45 2353 s += ulen;
a72c7584 2354 l ++;
1aa99e6b 2355 }
5ff6fc6d 2356 }
1aa99e6b
IH
2357 locinput = l;
2358 nextchr = UCHARAT(locinput);
2359 break;
2360 }
bc517b45 2361 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2362 /* Inline the first character, for speed. */
2363 if (UCHARAT(s) != nextchr)
2364 sayNO;
2365 if (PL_regeol - locinput < ln)
2366 sayNO;
2367 if (ln > 1 && memNE(s, locinput, ln))
2368 sayNO;
2369 locinput += ln;
2370 nextchr = UCHARAT(locinput);
2371 break;
2372 case EXACTFL:
b8c5462f
JH
2373 PL_reg_flags |= RF_tainted;
2374 /* FALL THROUGH */
d6a28714 2375 case EXACTF:
cd439c50
IZ
2376 s = STRING(scan);
2377 ln = STR_LEN(scan);
d6a28714 2378
d07ddd77
JH
2379 if (do_utf8 || UTF) {
2380 /* Either target or the pattern are utf8. */
d6a28714 2381 char *l = locinput;
d07ddd77 2382 char *e = PL_regeol;
bc517b45 2383
1feea2c7
JH
2384 if (ibcmp_utf8(s, 0, ln, UTF,
2385 l, &e, 0, do_utf8)) {
5486206c
JH
2386 /* One more case for the sharp s:
2387 * pack("U0U*", 0xDF) =~ /ss/i,
2388 * the 0xC3 0x9F are the UTF-8
2389 * byte sequence for the U+00DF. */
2390 if (!(do_utf8 &&
2391 toLOWER(s[0]) == 's' &&
2392 ln >= 2 &&
2393 toLOWER(s[1]) == 's' &&
2394 (U8)l[0] == 0xC3 &&
2395 e - l >= 2 &&
2396 (U8)l[1] == 0x9F))
2397 sayNO;
2398 }
d07ddd77
JH
2399 locinput = e;
2400 nextchr = UCHARAT(locinput);
2401 break;
a0ed51b3 2402 }
d6a28714 2403
bc517b45
JH
2404 /* Neither the target and the pattern are utf8. */
2405
d6a28714
JH
2406 /* Inline the first character, for speed. */
2407 if (UCHARAT(s) != nextchr &&
2408 UCHARAT(s) != ((OP(scan) == EXACTF)
2409 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2410 sayNO;
d6a28714 2411 if (PL_regeol - locinput < ln)
b8c5462f 2412 sayNO;
d6a28714
JH
2413 if (ln > 1 && (OP(scan) == EXACTF
2414 ? ibcmp(s, locinput, ln)
2415 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2416 sayNO;
d6a28714
JH
2417 locinput += ln;
2418 nextchr = UCHARAT(locinput);
a0d0e21e 2419 break;
d6a28714 2420 case ANYOF:
ffc61ed2 2421 if (do_utf8) {
9e55ce06
JH
2422 STRLEN inclasslen = PL_regeol - locinput;
2423
2424 if (!reginclasslen(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2425 sayNO_ANYOF;
ffc61ed2
JH
2426 if (locinput >= PL_regeol)
2427 sayNO;
9e55ce06 2428 locinput += inclasslen;
b8c5462f 2429 nextchr = UCHARAT(locinput);
e0f9d4a8 2430 break;
ffc61ed2
JH
2431 }
2432 else {
2433 if (nextchr < 0)
2434 nextchr = UCHARAT(locinput);
2435 if (!reginclass(scan, (U8*)locinput, do_utf8))
e0f9d4a8 2436 sayNO_ANYOF;
ffc61ed2
JH
2437 if (!nextchr && locinput >= PL_regeol)
2438 sayNO;
2439 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2440 break;
2441 }
2442 no_anyof:
2443 /* If we might have the case of the German sharp s
2444 * in a casefolding Unicode character class. */
2445
ebc501f0
JH
2446 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2447 locinput += SHARP_S_SKIP;
e0f9d4a8 2448 nextchr = UCHARAT(locinput);
ffc61ed2 2449 }
e0f9d4a8
JH
2450 else
2451 sayNO;
b8c5462f 2452 break;
d6a28714 2453 case ALNUML:
b8c5462f
JH
2454 PL_reg_flags |= RF_tainted;
2455 /* FALL THROUGH */
d6a28714 2456 case ALNUM:
b8c5462f 2457 if (!nextchr)
4633a7c4 2458 sayNO;
ffc61ed2 2459 if (do_utf8) {
ad24be35 2460 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2461 if (!(OP(scan) == ALNUM
3568d838 2462 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2463 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2464 {
2465 sayNO;
a0ed51b3 2466 }
b8c5462f 2467 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2468 nextchr = UCHARAT(locinput);
2469 break;
2470 }
ffc61ed2 2471 if (!(OP(scan) == ALNUM
d6a28714 2472 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2473 sayNO;
b8c5462f 2474 nextchr = UCHARAT(++locinput);
a0d0e21e 2475 break;
d6a28714 2476 case NALNUML:
b8c5462f
JH
2477 PL_reg_flags |= RF_tainted;
2478 /* FALL THROUGH */
d6a28714
JH
2479 case NALNUM:
2480 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2481 sayNO;
ffc61ed2 2482 if (do_utf8) {
8269fa76 2483 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2484 if (OP(scan) == NALNUM
3568d838 2485 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2486 : isALNUM_LC_utf8((U8*)locinput))
2487 {
b8c5462f 2488 sayNO;
d6a28714 2489 }
b8c5462f
JH
2490 locinput += PL_utf8skip[nextchr];
2491 nextchr = UCHARAT(locinput);
2492 break;
2493 }
ffc61ed2 2494 if (OP(scan) == NALNUM
d6a28714 2495 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2496 sayNO;
76e3520e 2497 nextchr = UCHARAT(++locinput);
a0d0e21e 2498 break;
d6a28714
JH
2499 case BOUNDL:
2500 case NBOUNDL:
3280af22 2501 PL_reg_flags |= RF_tainted;
bbce6d69 2502 /* FALL THROUGH */
d6a28714
JH
2503 case BOUND:
2504 case NBOUND:
2505 /* was last char in word? */
ffc61ed2 2506 if (do_utf8) {
12d33761
HS
2507 if (locinput == PL_bostr)
2508 ln = '\n';
ffc61ed2
JH
2509 else {
2510 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2511
2b9d42f0 2512 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2513 }
2514 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2515 ln = isALNUM_uni(ln);
8269fa76 2516 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2517 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2518 }
2519 else {
9041c2e3 2520 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2521 n = isALNUM_LC_utf8((U8*)locinput);
2522 }
a0ed51b3 2523 }
d6a28714 2524 else {
12d33761
HS
2525 ln = (locinput != PL_bostr) ?
2526 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2527 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2528 ln = isALNUM(ln);
2529 n = isALNUM(nextchr);
2530 }
2531 else {
2532 ln = isALNUM_LC(ln);
2533 n = isALNUM_LC(nextchr);
2534 }
d6a28714 2535 }
ffc61ed2
JH
2536 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2537 OP(scan) == BOUNDL))
2538 sayNO;
a0ed51b3 2539 break;
d6a28714 2540 case SPACEL:
3280af22 2541 PL_reg_flags |= RF_tainted;
bbce6d69 2542 /* FALL THROUGH */
d6a28714 2543 case SPACE:
9442cb0e 2544 if (!nextchr)
4633a7c4 2545 sayNO;
1aa99e6b 2546 if (do_utf8) {
fd400ab9 2547 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2548 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2549 if (!(OP(scan) == SPACE
3568d838 2550 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2551 : isSPACE_LC_utf8((U8*)locinput)))
2552 {
2553 sayNO;
2554 }
2555 locinput += PL_utf8skip[nextchr];
2556 nextchr = UCHARAT(locinput);
2557 break;
d6a28714 2558 }
ffc61ed2
JH
2559 if (!(OP(scan) == SPACE
2560 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2561 sayNO;
2562 nextchr = UCHARAT(++locinput);
2563 }
2564 else {
2565 if (!(OP(scan) == SPACE
2566 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2567 sayNO;
2568 nextchr = UCHARAT(++locinput);
a0ed51b3 2569 }
a0ed51b3 2570 break;
d6a28714 2571 case NSPACEL:
3280af22 2572 PL_reg_flags |= RF_tainted;
bbce6d69 2573 /* FALL THROUGH */
d6a28714 2574 case NSPACE:
9442cb0e 2575 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2576 sayNO;
1aa99e6b 2577 if (do_utf8) {
8269fa76 2578 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2579 if (OP(scan) == NSPACE
3568d838 2580 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2581 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2582 {
2583 sayNO;
2584 }
2585 locinput += PL_utf8skip[nextchr];
2586 nextchr = UCHARAT(locinput);
2587 break;
a0ed51b3 2588 }
ffc61ed2 2589 if (OP(scan) == NSPACE
d6a28714 2590 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2591 sayNO;
76e3520e 2592 nextchr = UCHARAT(++locinput);
a0d0e21e 2593 break;
d6a28714 2594 case DIGITL:
a0ed51b3
LW
2595 PL_reg_flags |= RF_tainted;
2596 /* FALL THROUGH */
d6a28714 2597 case DIGIT:
9442cb0e 2598 if (!nextchr)
a0ed51b3 2599 sayNO;
1aa99e6b 2600 if (do_utf8) {
8269fa76 2601 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2602 if (!(OP(scan) == DIGIT
3568d838 2603 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2604 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2605 {
a0ed51b3 2606 sayNO;
dfe13c55 2607 }
6f06b55f 2608 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2609 nextchr = UCHARAT(locinput);
2610 break;
2611 }
ffc61ed2 2612 if (!(OP(scan) == DIGIT
9442cb0e 2613 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2614 sayNO;
2615 nextchr = UCHARAT(++locinput);
2616 break;
d6a28714 2617 case NDIGITL:
b8c5462f
JH
2618 PL_reg_flags |= RF_tainted;
2619 /* FALL THROUGH */
d6a28714 2620 case NDIGIT:
9442cb0e 2621 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2622 sayNO;
1aa99e6b 2623 if (do_utf8) {
8269fa76 2624 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2625 if (OP(scan) == NDIGIT
3568d838 2626 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2627 : isDIGIT_LC_utf8((U8*)locinput))
2628 {
a0ed51b3 2629 sayNO;
9442cb0e 2630 }
6f06b55f 2631 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2632 nextchr = UCHARAT(locinput);
2633 break;
2634 }
ffc61ed2 2635 if (OP(scan) == NDIGIT
9442cb0e 2636 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2637 sayNO;
2638 nextchr = UCHARAT(++locinput);
2639 break;
2640 case CLUMP:
b7c83a7e 2641 if (locinput >= PL_regeol)
a0ed51b3 2642 sayNO;
b7c83a7e
JH
2643 if (do_utf8) {
2644 LOAD_UTF8_CHARCLASS(mark,"~");
2645 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2646 sayNO;
2647 locinput += PL_utf8skip[nextchr];
2648 while (locinput < PL_regeol &&
2649 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2650 locinput += UTF8SKIP(locinput);
2651 if (locinput > PL_regeol)
2652 sayNO;
eb08e2da
JH
2653 }
2654 else
2655 locinput++;
a0ed51b3
LW
2656 nextchr = UCHARAT(locinput);
2657 break;
c8756f30 2658 case REFFL:
3280af22 2659 PL_reg_flags |= RF_tainted;
c8756f30 2660 /* FALL THROUGH */
c277df42 2661 case REF:
c8756f30 2662 case REFF:
c277df42 2663 n = ARG(scan); /* which paren pair */
cf93c79d 2664 ln = PL_regstartp[n];
2c2d71f5 2665 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2666 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2667 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2668 if (ln == PL_regendp[n])
a0d0e21e 2669 break;
a0ed51b3 2670
cf93c79d 2671 s = PL_bostr + ln;
1aa99e6b 2672 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2673 char *l = locinput;
cf93c79d 2674 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2675 /*
2676 * Note that we can't do the "other character" lookup trick as
2677 * in the 8-bit case (no pun intended) because in Unicode we
2678 * have to map both upper and title case to lower case.
2679 */
2680 if (OP(scan) == REFF) {
a2a2844f 2681 STRLEN ulen1, ulen2;
e7ae6809
JH
2682 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2683 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2684 while (s < e) {
2685 if (l >= PL_regeol)
2686 sayNO;
a2a2844f
JH
2687 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2688 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2689 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2690 sayNO;
a2a2844f
JH
2691 s += ulen1;
2692 l += ulen2;
a0ed51b3
LW
2693 }
2694 }
2695 locinput = l;
2696 nextchr = UCHARAT(locinput);
2697 break;
2698 }
2699
a0d0e21e 2700 /* Inline the first character, for speed. */
76e3520e 2701 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2702 (OP(scan) == REF ||
2703 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2704 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2705 sayNO;
cf93c79d 2706 ln = PL_regendp[n] - ln;
3280af22 2707 if (locinput + ln > PL_regeol)
4633a7c4 2708 sayNO;
c8756f30
AK
2709 if (ln > 1 && (OP(scan) == REF
2710 ? memNE(s, locinput, ln)
2711 : (OP(scan) == REFF
2712 ? ibcmp(s, locinput, ln)
2713 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2714 sayNO;
a0d0e21e 2715 locinput += ln;
76e3520e 2716 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2717 break;
2718
2719 case NOTHING:
c277df42 2720 case TAIL:
a0d0e21e
LW
2721 break;
2722 case BACK:
2723 break;
c277df42
IZ
2724 case EVAL:
2725 {
2726 dSP;
533c011a 2727 OP_4tree *oop = PL_op;
3280af22
NIS
2728 COP *ocurcop = PL_curcop;
2729 SV **ocurpad = PL_curpad;
c277df42 2730 SV *ret;
9041c2e3 2731
c277df42 2732 n = ARG(scan);
533c011a 2733 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2734 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2735 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2736 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2737
8e5e9ebe
RGS
2738 {
2739 SV **before = SP;
2740 CALLRUNOPS(aTHX); /* Scalar context. */
2741 SPAGAIN;
2742 if (SP == before)
2743 ret = Nullsv; /* protect against empty (?{}) blocks. */
2744 else {
2745 ret = POPs;
2746 PUTBACK;
2747 }
2748 }
2749
0f5d15d6
IZ
2750 PL_op = oop;
2751 PL_curpad = ocurpad;
2752 PL_curcop = ocurcop;
c277df42 2753 if (logical) {
0f5d15d6
IZ
2754 if (logical == 2) { /* Postponed subexpression. */
2755 regexp *re;
22c35a8c 2756 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2757 re_cc_state state;
0f5d15d6
IZ
2758 CHECKPOINT cp, lastcp;
2759
2760 if(SvROK(ret) || SvRMAGICAL(ret)) {
2761 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2762
2763 if(SvMAGICAL(sv))
14befaf4 2764 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2765 }
2766 if (mg) {
2767 re = (regexp *)mg->mg_obj;
df0003d4 2768 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2769 }
2770 else {
2771 STRLEN len;
2772 char *t = SvPV(ret, len);
2773 PMOP pm;
2774 char *oprecomp = PL_regprecomp;
2775 I32 osize = PL_regsize;
2776 I32 onpar = PL_regnpar;
2777
5fcd1c1b 2778 Zero(&pm, 1, PMOP);
cea2e8a9 2779 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2780 if (!(SvFLAGS(ret)
0f5d15d6 2781 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2782 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2783 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2784 PL_regprecomp = oprecomp;
2785 PL_regsize = osize;
2786 PL_regnpar = onpar;
2787 }
2788 DEBUG_r(
9041c2e3 2789 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2790 "Entering embedded `%s%.60s%s%s'\n",
2791 PL_colors[0],
2792 re->precomp,
2793 PL_colors[1],
2794 (strlen(re->precomp) > 60 ? "..." : ""))
2795 );
2796 state.node = next;
2797 state.prev = PL_reg_call_cc;
2798 state.cc = PL_regcc;
2799 state.re = PL_reg_re;
2800
2ab05381 2801 PL_regcc = 0;
9041c2e3 2802
0f5d15d6 2803 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2804 REGCP_SET(lastcp);
0f5d15d6
IZ
2805 cache_re(re);
2806 state.ss = PL_savestack_ix;
2807 *PL_reglastparen = 0;
a01268b5 2808 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2809 PL_reg_call_cc = &state;
2810 PL_reginput = locinput;
2c2d71f5
JH
2811
2812 /* XXXX This is too dramatic a measure... */
2813 PL_reg_maxiter = 0;
2814
0f5d15d6 2815 if (regmatch(re->program + 1)) {
2c914db6
IZ
2816 /* Even though we succeeded, we need to restore
2817 global variables, since we may be wrapped inside
2818 SUSPEND, thus the match may be not finished yet. */
2819
2820 /* XXXX Do this only if SUSPENDed? */
2821 PL_reg_call_cc = state.prev;
2822 PL_regcc = state.cc;
2823 PL_reg_re = state.re;
2824 cache_re(PL_reg_re);
2825
2826 /* XXXX This is too dramatic a measure... */
2827 PL_reg_maxiter = 0;
2828
2829 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2830 ReREFCNT_dec(re);
2831 regcpblow(cp);
2832 sayYES;
2833 }
0f5d15d6 2834 ReREFCNT_dec(re);
02db2b7b 2835 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2836 regcppop();
2837 PL_reg_call_cc = state.prev;
2838 PL_regcc = state.cc;
2839 PL_reg_re = state.re;
d3790889 2840 cache_re(PL_reg_re);
2c2d71f5
JH
2841
2842 /* XXXX This is too dramatic a measure... */
2843 PL_reg_maxiter = 0;
2844
8e514ae6 2845 logical = 0;
0f5d15d6
IZ
2846 sayNO;
2847 }
c277df42 2848 sw = SvTRUE(ret);
0f5d15d6 2849 logical = 0;
a0ed51b3
LW
2850 }
2851 else
3280af22 2852 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2853 break;
2854 }
a0d0e21e 2855 case OPEN:
c277df42 2856 n = ARG(scan); /* which paren pair */
3280af22
NIS
2857 PL_reg_start_tmp[n] = locinput;
2858 if (n > PL_regsize)
2859 PL_regsize = n;
a0d0e21e
LW
2860 break;
2861 case CLOSE:
c277df42 2862 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2863 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2864 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2865 if (n > *PL_reglastparen)
2866 *PL_reglastparen = n;
a01268b5 2867 *PL_reglastcloseparen = n;
a0d0e21e 2868 break;
c277df42
IZ
2869 case GROUPP:
2870 n = ARG(scan); /* which paren pair */
cf93c79d 2871 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2872 break;
2873 case IFTHEN:
2c2d71f5 2874 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2875 if (sw)
2876 next = NEXTOPER(NEXTOPER(scan));
2877 else {
2878 next = scan + ARG(scan);
2879 if (OP(next) == IFTHEN) /* Fake one. */
2880 next = NEXTOPER(NEXTOPER(next));
2881 }
2882 break;
2883 case LOGICAL:
0f5d15d6 2884 logical = scan->flags;
c277df42 2885 break;
2ab05381
IZ
2886/*******************************************************************
2887 PL_regcc contains infoblock about the innermost (...)* loop, and
2888 a pointer to the next outer infoblock.
2889
2890 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2891
2892 1) After matching X, regnode for CURLYX is processed;
2893
9041c2e3 2894 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2895 regmatch() recursively with the starting point at WHILEM node;
2896
2897 3) Each hit of WHILEM node tries to match A and Z (in the order
2898 depending on the current iteration, min/max of {min,max} and
2899 greediness). The information about where are nodes for "A"
2900 and "Z" is read from the infoblock, as is info on how many times "A"
2901 was already matched, and greediness.
2902
2903 4) After A matches, the same WHILEM node is hit again.
2904
2905 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2906 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2907 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2908 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2909 of the external loop.
2910
2911 Currently present infoblocks form a tree with a stem formed by PL_curcc
2912 and whatever it mentions via ->next, and additional attached trees
2913 corresponding to temporarily unset infoblocks as in "5" above.
2914
9041c2e3 2915 In the following picture infoblocks for outer loop of
2ab05381
IZ
2916 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2917 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2918 infoblocks are drawn below the "reset" infoblock.
2919
2920 In fact in the picture below we do not show failed matches for Z and T
2921 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2922 more obvious *why* one needs to *temporary* unset infoblocks.]
2923
2924 Matched REx position InfoBlocks Comment
2925 (Y(A)*?Z)*?T x
2926 Y(A)*?Z)*?T x <- O
2927 Y (A)*?Z)*?T x <- O
2928 Y A)*?Z)*?T x <- O <- I
2929 YA )*?Z)*?T x <- O <- I
2930 YA A)*?Z)*?T x <- O <- I
2931 YAA )*?Z)*?T x <- O <- I
2932 YAA Z)*?T x <- O # Temporary unset I
2933 I
2934
2935 YAAZ Y(A)*?Z)*?T x <- O
2936 I
2937
2938 YAAZY (A)*?Z)*?T x <- O
2939 I
2940
2941 YAAZY A)*?Z)*?T x <- O <- I
2942 I
2943
2944 YAAZYA )*?Z)*?T x <- O <- I
2945 I
2946
2947 YAAZYA Z)*?T x <- O # Temporary unset I
2948 I,I
2949
2950 YAAZYAZ )*?T x <- O
2951 I,I
2952
2953 YAAZYAZ T x # Temporary unset O
2954 O
2955 I,I
2956
2957 YAAZYAZT x
2958 O
2959 I,I
2960 *******************************************************************/
a0d0e21e
LW
2961 case CURLYX: {
2962 CURCUR cc;
3280af22 2963 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2964 /* No need to save/restore up to this paren */
2965 I32 parenfloor = scan->flags;
c277df42
IZ
2966
2967 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2968 next += ARG(next);
3280af22
NIS
2969 cc.oldcc = PL_regcc;
2970 PL_regcc = &cc;
cb434fcc
IZ
2971 /* XXXX Probably it is better to teach regpush to support
2972 parenfloor > PL_regsize... */
2973 if (parenfloor > *PL_reglastparen)
2974 parenfloor = *PL_reglastparen; /* Pessimization... */
2975 cc.parenfloor = parenfloor;
a0d0e21e
LW
2976 cc.cur = -1;
2977 cc.min = ARG1(scan);
2978 cc.max = ARG2(scan);
c277df42 2979 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2980 cc.next = next;
2981 cc.minmod = minmod;
2982 cc.lastloc = 0;
3280af22 2983 PL_reginput = locinput;
a0d0e21e
LW
2984 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2985 regcpblow(cp);
3280af22 2986 PL_regcc = cc.oldcc;
4633a7c4 2987 saySAME(n);
a0d0e21e
LW
2988 }
2989 /* NOT REACHED */
2990 case WHILEM: {
2991 /*
2992 * This is really hard to understand, because after we match
2993 * what we're trying to match, we must make sure the rest of
2c2d71f5 2994 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2995 * to go back UP the parse tree by recursing ever deeper. And
2996 * if it fails, we have to reset our parent's current state
2997 * that we can try again after backing off.
2998 */
2999
c277df42 3000 CHECKPOINT cp, lastcp;
3280af22 3001 CURCUR* cc = PL_regcc;
c277df42
IZ
3002 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3003
4633a7c4 3004 n = cc->cur + 1; /* how many we know we matched */
3280af22 3005 PL_reginput = locinput;
a0d0e21e 3006
c277df42 3007 DEBUG_r(
9041c2e3
NIS
3008 PerlIO_printf(Perl_debug_log,
3009 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 3010 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3011 (long)n, (long)cc->min,
c277df42
IZ
3012 (long)cc->max, (long)cc)
3013 );
4633a7c4 3014
a0d0e21e
LW
3015 /* If degenerate scan matches "", assume scan done. */
3016
579cf2c3 3017 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3018 PL_regcc = cc->oldcc;
2ab05381
IZ
3019 if (PL_regcc)
3020 ln = PL_regcc->cur;
c277df42 3021 DEBUG_r(
c3464db5
DD
3022 PerlIO_printf(Perl_debug_log,
3023 "%*s empty match detected, try continuation...\n",
3280af22 3024 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3025 );
a0d0e21e 3026 if (regmatch(cc->next))
4633a7c4 3027 sayYES;
2ab05381
IZ
3028 if (PL_regcc)
3029 PL_regcc->cur = ln;
3280af22 3030 PL_regcc = cc;
4633a7c4 3031 sayNO;
a0d0e21e
LW
3032 }
3033
3034 /* First just match a string of min scans. */
3035
3036 if (n < cc->min) {
3037 cc->cur = n;
3038 cc->lastloc = locinput;
4633a7c4
LW
3039 if (regmatch(cc->scan))
3040 sayYES;
3041 cc->cur = n - 1;
c277df42 3042 cc->lastloc = lastloc;
4633a7c4 3043 sayNO;
a0d0e21e
LW
3044 }
3045
2c2d71f5
JH
3046 if (scan->flags) {
3047 /* Check whether we already were at this position.
3048 Postpone detection until we know the match is not
3049 *that* much linear. */
3050 if (!PL_reg_maxiter) {
3051 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3052 PL_reg_leftiter = PL_reg_maxiter;
3053 }
3054 if (PL_reg_leftiter-- == 0) {
3055 I32 size = (PL_reg_maxiter + 7)/8;
3056 if (PL_reg_poscache) {
3057 if (PL_reg_poscache_size < size) {
3058 Renew(PL_reg_poscache, size, char);
3059 PL_reg_poscache_size = size;
3060 }
3061 Zero(PL_reg_poscache, size, char);
3062 }
3063 else {
3064 PL_reg_poscache_size = size;
3065 Newz(29, PL_reg_poscache, size, char);
3066 }
3067 DEBUG_r(
3068 PerlIO_printf(Perl_debug_log,
3069 "%sDetected a super-linear match, switching on caching%s...\n",
3070 PL_colors[4], PL_colors[5])
3071 );
3072 }
3073 if (PL_reg_leftiter < 0) {
3074 I32 o = locinput - PL_bostr, b;
3075
3076 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3077 b = o % 8;
3078 o /= 8;
3079 if (PL_reg_poscache[o] & (1<<b)) {
3080 DEBUG_r(
3081 PerlIO_printf(Perl_debug_log,
3082 "%*s already tried at this position...\n",
3083 REPORT_CODE_OFF+PL_regindent*2, "")
3084 );
7821416a 3085 sayNO_SILENT;
2c2d71f5
JH
3086 }
3087 PL_reg_poscache[o] |= (1<<b);
3088 }
3089 }
3090
a0d0e21e
LW
3091 /* Prefer next over scan for minimal matching. */
3092
3093 if (cc->minmod) {
3280af22 3094 PL_regcc = cc->oldcc;
2ab05381
IZ
3095 if (PL_regcc)
3096 ln = PL_regcc->cur;
5f05dabc 3097 cp = regcppush(cc->parenfloor);
02db2b7b 3098 REGCP_SET(lastcp);
5f05dabc 3099 if (regmatch(cc->next)) {
c277df42 3100 regcpblow(cp);
4633a7c4 3101 sayYES; /* All done. */
5f05dabc 3102 }
02db2b7b 3103 REGCP_UNWIND(lastcp);
5f05dabc 3104 regcppop();
2ab05381
IZ
3105 if (PL_regcc)
3106 PL_regcc->cur = ln;
3280af22 3107 PL_regcc = cc;
a0d0e21e 3108
c277df42 3109 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3110 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3111 && !(PL_reg_flags & RF_warned)) {
3112 PL_reg_flags |= RF_warned;
e476b1b5 3113 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3114 "Complex regular subexpression recursion",
3115 REG_INFTY - 1);
c277df42 3116 }
4633a7c4 3117 sayNO;
c277df42 3118 }
a687059c 3119
c277df42 3120 DEBUG_r(
c3464db5
DD
3121 PerlIO_printf(Perl_debug_log,
3122 "%*s trying longer...\n",
3280af22 3123 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3124 );
a0d0e21e 3125 /* Try scanning more and see if it helps. */
3280af22 3126 PL_reginput = locinput;
a0d0e21e
LW
3127 cc->cur = n;
3128 cc->lastloc = locinput;
5f05dabc 3129 cp = regcppush(cc->parenfloor);
02db2b7b 3130 REGCP_SET(lastcp);
5f05dabc 3131 if (regmatch(cc->scan)) {
c277df42 3132 regcpblow(cp);
4633a7c4 3133 sayYES;
5f05dabc 3134 }
02db2b7b 3135 REGCP_UNWIND(lastcp);
5f05dabc 3136 regcppop();
4633a7c4 3137 cc->cur = n - 1;
c277df42 3138 cc->lastloc = lastloc;
4633a7c4 3139 sayNO;
a0d0e21e
LW
3140 }
3141
3142 /* Prefer scan over next for maximal matching. */
3143
3144 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3145 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3146 cc->cur = n;
3147 cc->lastloc = locinput;
02db2b7b 3148 REGCP_SET(lastcp);
5f05dabc 3149 if (regmatch(cc->scan)) {
c277df42 3150 regcpblow(cp);
4633a7c4 3151 sayYES;
5f05dabc 3152 }
02db2b7b 3153 REGCP_UNWIND(lastcp);
a0d0e21e 3154 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3155 PL_reginput = locinput;
c277df42 3156 DEBUG_r(
c3464db5
DD
3157 PerlIO_printf(Perl_debug_log,
3158 "%*s failed, try continuation...\n",
3280af22 3159 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3160 );
3161 }
9041c2e3 3162 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3163 && !(PL_reg_flags & RF_warned)) {
3280af22 3164 PL_reg_flags |= RF_warned;
e476b1b5 3165 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3166 "Complex regular subexpression recursion",
3167 REG_INFTY - 1);
a0d0e21e
LW
3168 }
3169
3170 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3171 PL_regcc = cc->oldcc;
2ab05381
IZ
3172 if (PL_regcc)
3173 ln = PL_regcc->cur;
a0d0e21e 3174 if (regmatch(cc->next))
4633a7c4 3175 sayYES;
2ab05381
IZ
3176 if (PL_regcc)
3177 PL_regcc->cur = ln;
3280af22 3178 PL_regcc = cc;
4633a7c4 3179 cc->cur = n - 1;
c277df42 3180 cc->lastloc = lastloc;
4633a7c4 3181 sayNO;
a0d0e21e
LW
3182 }
3183 /* NOT REACHED */
9041c2e3 3184 case BRANCHJ:
c277df42
IZ
3185 next = scan + ARG(scan);
3186 if (next == scan)
3187 next = NULL;
3188 inner = NEXTOPER(NEXTOPER(scan));
3189 goto do_branch;
9041c2e3 3190 case BRANCH:
c277df42
IZ
3191 inner = NEXTOPER(scan);
3192 do_branch:
3193 {
c277df42
IZ
3194 c1 = OP(scan);
3195 if (OP(next) != c1) /* No choice. */
3196 next = inner; /* Avoid recursion. */
a0d0e21e 3197 else {
02db2b7b
IZ
3198 I32 lastparen = *PL_reglastparen;
3199 I32 unwind1;
3200 re_unwind_branch_t *uw;
3201
3202 /* Put unwinding data on stack */
3203 unwind1 = SSNEWt(1,re_unwind_branch_t);
3204 uw = SSPTRt(unwind1,re_unwind_branch_t);
3205 uw->prev = unwind;
3206 unwind = unwind1;
3207 uw->type = ((c1 == BRANCH)
3208 ? RE_UNWIND_BRANCH
3209 : RE_UNWIND_BRANCHJ);
3210 uw->lastparen = lastparen;
3211 uw->next = next;
3212 uw->locinput = locinput;
3213 uw->nextchr = nextchr;
3214#ifdef DEBUGGING
3215 uw->regindent = ++PL_regindent;
3216#endif
c277df42 3217
02db2b7b
IZ
3218 REGCP_SET(uw->lastcp);
3219
3220 /* Now go into the first branch */
3221 next = inner;
a687059c 3222 }
a0d0e21e
LW
3223 }
3224 break;
3225 case MINMOD:
3226 minmod = 1;
3227 break;
c277df42
IZ
3228 case CURLYM:
3229 {
00db4c45 3230 I32 l = 0;
c277df42 3231 CHECKPOINT lastcp;
9041c2e3 3232
c277df42
IZ
3233 /* We suppose that the next guy does not need
3234 backtracking: in particular, it is of constant length,
3235 and has no parenths to influence future backrefs. */
3236 ln = ARG1(scan); /* min to match */
3237 n = ARG2(scan); /* max to match */
c277df42
IZ
3238 paren = scan->flags;
3239 if (paren) {
3280af22
NIS
3240 if (paren > PL_regsize)
3241 PL_regsize = paren;
3242 if (paren > *PL_reglastparen)
3243 *PL_reglastparen = paren;
c277df42 3244 }
dc45a647 3245 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3246 if (paren)
3247 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3248 PL_reginput = locinput;
c277df42
IZ
3249 if (minmod) {
3250 minmod = 0;
3251 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3252 sayNO;
f31a99c8
HS
3253 /* if we matched something zero-length we don't need to
3254 backtrack - capturing parens are already defined, so
3255 the caveat in the maximal case doesn't apply
3256
3257 XXXX if ln == 0, we can redo this check first time
3258 through the following loop
3259 */
3260 if (ln && l == 0)
3261 n = ln; /* don't backtrack */
3280af22 3262 locinput = PL_reginput;
cca55fe3 3263 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3264 regnode *text_node = next;
3265
cca55fe3 3266 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3267
cca55fe3 3268 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3269 else {
cca55fe3
JP
3270 if (PL_regkind[(U8)OP(text_node)] == REF) {
3271 I32 n, ln;
3272 n = ARG(text_node); /* which paren pair */
3273 ln = PL_regstartp[n];
3274 /* assume yes if we haven't seen CLOSEn */
3275 if (
3276 *PL_reglastparen < n ||
3277 ln == -1 ||
3278 ln == PL_regendp[n]
3279 ) {
3280 c1 = c2 = -1000;
3281 goto assume_ok_MM;
3282 }
3283 c1 = *(PL_bostr + ln);
3284 }
3285 else { c1 = (U8)*STRING(text_node); }
af5decee 3286 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3287 c2 = PL_fold[c1];
af5decee 3288 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3289 c2 = PL_fold_locale[c1];
3290 else
3291 c2 = c1;
3292 }
a0ed51b3
LW
3293 }
3294 else
c277df42 3295 c1 = c2 = -1000;
cca55fe3 3296 assume_ok_MM:
02db2b7b 3297 REGCP_SET(lastcp);
5f4b28b2 3298 /* This may be improved if l == 0. */
c277df42
IZ
3299 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3300 /* If it could work, try it. */
3301 if (c1 == -1000 ||
3280af22
NIS
3302 UCHARAT(PL_reginput) == c1 ||
3303 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3304 {
3305 if (paren) {
f31a99c8 3306 if (ln) {
cf93c79d
IZ
3307 PL_regstartp[paren] =
3308 HOPc(PL_reginput, -l) - PL_bostr;
3309 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3310 }
3311 else
cf93c79d 3312 PL_regendp[paren] = -1;
c277df42
IZ
3313 }
3314 if (regmatch(next))
3315 sayYES;
02db2b7b 3316 REGCP_UNWIND(lastcp);
c277df42
IZ
3317 }
3318 /* Couldn't or didn't -- move forward. */
3280af22 3319 PL_reginput = locinput;
c277df42
IZ
3320 if (regrepeat_hard(scan, 1, &l)) {
3321 ln++;
3280af22 3322 locinput = PL_reginput;
c277df42
IZ
3323 }
3324 else
3325 sayNO;
3326 }
a0ed51b3
LW
3327 }
3328 else {
c277df42 3329 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3330 /* if we matched something zero-length we don't need to
3331 backtrack, unless the minimum count is zero and we
3332 are capturing the result - in that case the capture
3333 being defined or not may affect later execution
3334 */
3335 if (n != 0 && l == 0 && !(paren && ln == 0))
3336 ln = n; /* don't backtrack */
3280af22 3337 locinput = PL_reginput;
c277df42 3338 DEBUG_r(
5c0ca799 3339 PerlIO_printf(Perl_debug_log,
faccc32b 3340 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3341 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3342 (IV) n, (IV)l)
c277df42
IZ
3343 );
3344 if (n >= ln) {
cca55fe3 3345 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3346 regnode *text_node = next;
3347
cca55fe3 3348 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3349
cca55fe3 3350 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3351 else {
cca55fe3
JP
3352 if (PL_regkind[(U8)OP(text_node)] == REF) {
3353 I32 n, ln;
3354 n = ARG(text_node); /* which paren pair */
3355 ln = PL_regstartp[n];
3356 /* assume yes if we haven't seen CLOSEn */
3357 if (
3358 *PL_reglastparen < n ||
3359 ln == -1 ||
3360 ln == PL_regendp[n]
3361 ) {
3362 c1 = c2 = -1000;
3363 goto assume_ok_REG;
3364 }
3365 c1 = *(PL_bostr + ln);
3366 }
3367 else { c1 = (U8)*STRING(text_node); }
3368
af5decee 3369 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3370 c2 = PL_fold[c1];
af5decee 3371 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3372 c2 = PL_fold_locale[c1];
3373 else
3374 c2 = c1;
3375 }
a0ed51b3
LW
3376 }
3377 else
c277df42
IZ
3378 c1 = c2 = -1000;
3379 }
cca55fe3 3380 assume_ok_REG:
02db2b7b 3381 REGCP_SET(lastcp);
c277df42
IZ
3382 while (n >= ln) {
3383 /* If it could work, try it. */
3384 if (c1 == -1000 ||
3280af22
NIS
3385 UCHARAT(PL_reginput) == c1 ||
3386 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3387 {
3388 DEBUG_r(
c3464db5 3389 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3390 "%*s trying tail with n=%"IVdf"...\n",
3391 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3392 );
3393 if (paren) {
3394 if (n) {
cf93c79d
IZ
3395 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3396 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3397 }
a0ed51b3 3398 else
cf93c79d 3399 PL_regendp[paren] = -1;
c277df42 3400 }
a0ed51b3
LW
3401 if (regmatch(next))
3402 sayYES;
02db2b7b 3403 REGCP_UNWIND(lastcp);
a0ed51b3 3404 }
c277df42
IZ
3405 /* Couldn't or didn't -- back up. */
3406 n--;
dfe13c55 3407 locinput = HOPc(locinput, -l);
3280af22 3408 PL_reginput = locinput;
c277df42
IZ
3409 }
3410 }
3411 sayNO;
3412 break;
3413 }
3414 case CURLYN:
3415 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3416 if (paren > PL_regsize)
3417 PL_regsize = paren;
3418 if (paren > *PL_reglastparen)
3419 *PL_reglastparen = paren;
c277df42
IZ
3420 ln = ARG1(scan); /* min to match */
3421 n = ARG2(scan); /* max to match */
dc45a647 3422 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3423 goto repeat;
a0d0e21e 3424 case CURLY:
c277df42 3425 paren = 0;
a0d0e21e
LW
3426 ln = ARG1(scan); /* min to match */
3427 n = ARG2(scan); /* max to match */
dc45a647 3428 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3429 goto repeat;
3430 case STAR:
3431 ln = 0;
c277df42 3432 n = REG_INFTY;
a0d0e21e 3433 scan = NEXTOPER(scan);
c277df42 3434 paren = 0;
a0d0e21e
LW
3435 goto repeat;
3436 case PLUS:
c277df42
IZ
3437 ln = 1;
3438 n = REG_INFTY;
3439 scan = NEXTOPER(scan);
3440 paren = 0;
3441 repeat:
a0d0e21e
LW
3442 /*
3443 * Lookahead to avoid useless match attempts
3444 * when we know what character comes next.
3445 */
5f80c4cf
JP
3446
3447 /*
3448 * Used to only do .*x and .*?x, but now it allows
3449 * for )'s, ('s and (?{ ... })'s to be in the way
3450 * of the quantifier and the EXACT-like node. -- japhy
3451 */
3452
cca55fe3 3453 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3454 U8 *s;
3455 regnode *text_node = next;
3456
cca55fe3 3457 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3458
cca55fe3 3459 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3460 else {
cca55fe3
JP
3461 if (PL_regkind[(U8)OP(text_node)] == REF) {
3462 I32 n, ln;
3463 n = ARG(text_node); /* which paren pair */
3464 ln = PL_regstartp[n];
3465 /* assume yes if we haven't seen CLOSEn */
3466 if (
3467 *PL_reglastparen < n ||
3468 ln == -1 ||
3469 ln == PL_regendp[n]
3470 ) {
3471 c1 = c2 = -1000;
3472 goto assume_ok_easy;
3473 }
9246c65e 3474 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3475 }
3476 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3477
3478 if (!UTF) {
3479 c2 = c1 = *s;
f65d3ee7 3480 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3481 c2 = PL_fold[c1];
f65d3ee7 3482 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3483 c2 = PL_fold_locale[c1];
1aa99e6b 3484 }
5f80c4cf 3485 else { /* UTF */
f65d3ee7 3486 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3487 STRLEN ulen1, ulen2;
e7ae6809
JH
3488 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3489 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3490
3491 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3492 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3493
3494 c1 = utf8_to_uvuni(tmpbuf1, 0);
3495 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3496 }
3497 else {
3498 c2 = c1 = utf8_to_uvchr(s, NULL);
3499 }
1aa99e6b
IH
3500 }
3501 }
bbce6d69 3502 }
a0d0e21e 3503 else
bbce6d69 3504 c1 = c2 = -1000;
cca55fe3 3505 assume_ok_easy:
3280af22 3506 PL_reginput = locinput;
a0d0e21e 3507 if (minmod) {
c277df42 3508 CHECKPOINT lastcp;
a0d0e21e
LW
3509 minmod = 0;
3510 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3511 sayNO;
a0ed51b3 3512 locinput = PL_reginput;
02db2b7b 3513 REGCP_SET(lastcp);
0fe9bf95 3514 if (c1 != -1000) {
1aa99e6b 3515 char *e; /* Should not check after this */
0fe9bf95
IZ
3516 char *old = locinput;
3517
1aa99e6b 3518 if (n == REG_INFTY) {
0fe9bf95 3519 e = PL_regeol - 1;
1aa99e6b
IH
3520 if (do_utf8)
3521 while (UTF8_IS_CONTINUATION(*(U8*)e))
3522 e--;
3523 }
3524 else if (do_utf8) {
3525 int m = n - ln;
3526 for (e = locinput;
3527 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3528 e += UTF8SKIP(e);
3529 }
3530 else {
3531 e = locinput + n - ln;
3532 if (e >= PL_regeol)
3533 e = PL_regeol - 1;
3534 }
0fe9bf95 3535 while (1) {
1aa99e6b 3536 int count;
0fe9bf95 3537 /* Find place 'next' could work */
1aa99e6b
IH
3538 if (!do_utf8) {
3539 if (c1 == c2) {
a8e8ab15
JH
3540 while (locinput <= e &&
3541 UCHARAT(locinput) != c1)
1aa99e6b
IH
3542 locinput++;
3543 } else {
9041c2e3 3544 while (locinput <= e
a8e8ab15
JH
3545 && UCHARAT(locinput) != c1
3546 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3547 locinput++;
3548 }
3549 count = locinput - old;
3550 }
3551 else {
3552 STRLEN len;
3553 if (c1 == c2) {
3554 for (count = 0;
3555 locinput <= e &&
9041c2e3 3556 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3557 count++)
3558 locinput += len;
3559
3560 } else {
3561 for (count = 0; locinput <= e; count++) {
9041c2e3 3562 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3563 if (c == c1 || c == c2)
3564 break;
9041c2e3 3565 locinput += len;
1aa99e6b
IH
3566 }
3567 }
0fe9bf95 3568 }
9041c2e3 3569 if (locinput > e)
0fe9bf95
IZ
3570 sayNO;
3571 /* PL_reginput == old now */
3572 if (locinput != old) {
3573 ln = 1; /* Did some */
1aa99e6b 3574 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3575 sayNO;
3576 }
3577 /* PL_reginput == locinput now */
29d1e993 3578 TRYPAREN(paren, ln, locinput);
0fe9bf95 3579 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3580 REGCP_UNWIND(lastcp);
0fe9bf95 3581 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3582 old = locinput;
3583 if (do_utf8)
3584 locinput += UTF8SKIP(locinput);
3585 else
3586 locinput++;
0fe9bf95
IZ
3587 }
3588 }
3589 else
c277df42 3590 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3591 UV c;
3592 if (c1 != -1000) {
3593 if (do_utf8)
9041c2e3 3594 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3595 else
9041c2e3 3596 c = UCHARAT(PL_reginput);
2390ecbc
PP
3597 /* If it could work, try it. */
3598 if (c == c1 || c == c2)
3599 {
3600 TRYPAREN(paren, n, PL_reginput);
3601 REGCP_UNWIND(lastcp);
3602 }
1aa99e6b 3603 }
a0d0e21e 3604 /* If it could work, try it. */
2390ecbc 3605 else if (c1 == -1000)
bbce6d69 3606 {
29d1e993 3607 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3608 REGCP_UNWIND(lastcp);
bbce6d69 3609 }
c277df42 3610 /* Couldn't or didn't -- move forward. */
a0ed51b3 3611 PL_reginput = locinput;
a0d0e21e
LW
3612 if (regrepeat(scan, 1)) {
3613 ln++;
a0ed51b3
LW
3614 locinput = PL_reginput;
3615 }
3616 else
4633a7c4 3617 sayNO;
a0d0e21e
LW
3618 }
3619 }
3620 else {
c277df42 3621 CHECKPOINT lastcp;
a0d0e21e 3622 n = regrepeat(scan, n);
a0ed51b3 3623 locinput = PL_reginput;
22c35a8c 3624 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
15272685
HS
3625 ((!PL_multiline && OP(next) != MEOL) ||
3626 OP(next) == SEOL || OP(next) == EOS))
3627 {
a0d0e21e 3628 ln = n; /* why back off? */
1aeab75a
GS
3629 /* ...because $ and \Z can match before *and* after
3630 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3631 We should back off by one in this case. */
3632 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3633 ln--;
3634 }
02db2b7b 3635 REGCP_SET(lastcp);
c277df42 3636 if (paren) {
8fa7f367 3637 UV c = 0;
c277df42 3638 while (n >= ln) {
1aa99e6b
IH
3639 if (c1 != -1000) {
3640 if (do_utf8)
9041c2e3 3641 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3642 else
9041c2e3 3643 c = UCHARAT(PL_reginput);
1aa99e6b 3644 }
c277df42 3645 /* If it could work, try it. */
1aa99e6b 3646 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3647 {
29d1e993 3648 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3649 REGCP_UNWIND(lastcp);
c277df42
IZ
3650 }
3651 /* Couldn't or didn't -- back up. */
3652 n--;
dfe13c55 3653 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3654 }
a0ed51b3
LW
3655 }
3656 else {
8fa7f367 3657 UV c = 0;
c277df42 3658 while (n >= ln) {
1aa99e6b
IH
3659 if (c1 != -1000) {
3660 if (do_utf8)
9041c2e3 3661 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3662 else
9041c2e3 3663 c = UCHARAT(PL_reginput);
1aa99e6b 3664 }
c277df42 3665 /* If it could work, try it. */
1aa99e6b 3666 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3667 {
29d1e993 3668 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3669 REGCP_UNWIND(lastcp);
c277df42
IZ
3670 }
3671 /* Couldn't or didn't -- back up. */
3672 n--;
dfe13c55 3673 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3674 }
a0d0e21e
LW
3675 }
3676 }
4633a7c4 3677 sayNO;
c277df42 3678 break;
a0d0e21e 3679 case END:
0f5d15d6
IZ
3680 if (PL_reg_call_cc) {
3681 re_cc_state *cur_call_cc = PL_reg_call_cc;
3682 CURCUR *cctmp = PL_regcc;
3683 regexp *re = PL_reg_re;
3684 CHECKPOINT cp, lastcp;
3685
3686 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3687 REGCP_SET(lastcp);
0f5d15d6
IZ
3688 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3689 the caller. */
3690 PL_reginput = locinput; /* Make position available to
3691 the callcc. */
3692 cache_re(PL_reg_call_cc->re);
3693 PL_regcc = PL_reg_call_cc->cc;
3694 PL_reg_call_cc = PL_reg_call_cc->prev;
3695 if (regmatch(cur_call_cc->node)) {
3696 PL_reg_call_cc = cur_call_cc;
3697 regcpblow(cp);
3698 sayYES;
3699 }
02db2b7b 3700 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3701 regcppop();
3702 PL_reg_call_cc = cur_call_cc;
3703 PL_regcc = cctmp;
3704 PL_reg_re = re;
3705 cache_re(re);
3706
3707 DEBUG_r(
3708 PerlIO_printf(Perl_debug_log,
3709 "%*s continuation failed...\n",
3710 REPORT_CODE_OFF+PL_regindent*2, "")
3711 );
7821416a 3712 sayNO_SILENT;
0f5d15d6 3713 }
7821416a
IZ
3714 if (locinput < PL_regtill) {
3715 DEBUG_r(PerlIO_printf(Perl_debug_log,
3716 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3717 PL_colors[4],
3718 (long)(locinput - PL_reg_starttry),
3719 (long)(PL_regtill - PL_reg_starttry),
3720 PL_colors[5]));
3721 sayNO_FINAL; /* Cannot match: too short. */
3722 }
3723 PL_reginput = locinput; /* put where regtry can find it */
3724 sayYES_FINAL; /* Success! */
7e5428c5 3725 case SUCCEED:
3280af22 3726 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3727 sayYES_LOUD; /* Success! */
c277df42
IZ
3728 case SUSPEND:
3729 n = 1;
9fe1d20c 3730 PL_reginput = locinput;
9041c2e3 3731 goto do_ifmatch;
a0d0e21e 3732 case UNLESSM:
c277df42 3733 n = 0;
a0ed51b3 3734 if (scan->flags) {
efb30f32
HS
3735 s = HOPBACKc(locinput, scan->flags);
3736 if (!s)
3737 goto say_yes;
3738 PL_reginput = s;
a0ed51b3
LW
3739 }
3740 else
3741 PL_reginput = locinput;
c277df42
IZ
3742 goto do_ifmatch;
3743 case IFMATCH:
3744 n = 1;
a0ed51b3 3745 if (scan->flags) {
efb30f32
HS
3746 s = HOPBACKc(locinput, scan->flags);
3747 if (!s)
3748 goto say_no;
3749 PL_reginput = s;
a0ed51b3
LW
3750 }
3751 else
3752 PL_reginput = locinput;
3753
c277df42 3754 do_ifmatch:
c277df42
IZ
3755 inner = NEXTOPER(NEXTOPER(scan));
3756 if (regmatch(inner) != n) {
3757 say_no:
3758 if (logical) {
3759 logical = 0;
3760 sw = 0;
3761 goto do_longjump;
a0ed51b3
LW
3762 }
3763 else
c277df42
IZ
3764 sayNO;
3765 }
3766 say_yes:
3767 if (logical) {
3768 logical = 0;
3769 sw = 1;
3770 }
fe44a5e8 3771 if (OP(scan) == SUSPEND) {
3280af22 3772 locinput = PL_reginput;
565764a8 3773 nextchr = UCHARAT(locinput);
fe44a5e8 3774 }
c277df42
IZ
3775 /* FALL THROUGH. */
3776 case LONGJMP:
3777 do_longjump:
3778 next = scan + ARG(scan);
3779 if (next == scan)
3780 next = NULL;
a0d0e21e
LW
3781 break;
3782 default:
b900a521 3783 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3784 PTR2UV(scan), OP(scan));
cea2e8a9 3785 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3786 }
02db2b7b 3787 reenter:
a0d0e21e
LW
3788 scan = next;
3789 }
a687059c 3790
a0d0e21e
LW
3791 /*
3792 * We get here only if there's trouble -- normally "case END" is
3793 * the terminating point.
3794 */
cea2e8a9 3795 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3796 /*NOTREACHED*/
4633a7c4
LW
3797 sayNO;
3798
7821416a
IZ
3799yes_loud:
3800 DEBUG_r(
3801 PerlIO_printf(Perl_debug_log,
3802 "%*s %scould match...%s\n",
3803 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3804 );
3805 goto yes;
3806yes_final:
3807 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3808 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3809yes:
3810#ifdef DEBUGGING
3280af22 3811 PL_regindent--;
4633a7c4 3812#endif
02db2b7b
IZ
3813
3814#if 0 /* Breaks $^R */
3815 if (unwind)
3816 regcpblow(firstcp);
3817#endif
4633a7c4
LW
3818 return 1;
3819
3820no:
7821416a
IZ
3821 DEBUG_r(
3822 PerlIO_printf(Perl_debug_log,
3823 "%*s %sfailed...%s\n",
3824 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3825 );
3826 goto do_no;
3827no_final:
3828do_no:
02db2b7b
IZ
3829 if (unwind) {
3830 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3831
3832 switch (uw->type) {
3833 case RE_UNWIND_BRANCH:
3834 case RE_UNWIND_BRANCHJ:
3835 {
3836 re_unwind_branch_t *uwb = &(uw->branch);
3837 I32 lastparen = uwb->lastparen;
9041c2e3 3838
02db2b7b
IZ
3839 REGCP_UNWIND(uwb->lastcp);
3840 for (n = *PL_reglastparen; n > lastparen; n--)
3841 PL_regendp[n] = -1;
3842 *PL_reglastparen = n;
3843 scan = next = uwb->next;
9041c2e3
NIS
3844 if ( !scan ||
3845 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3846 ? BRANCH : BRANCHJ) ) { /* Failure */
3847 unwind = uwb->prev;
3848#ifdef DEBUGGING
3849 PL_regindent--;
3850#endif
3851 goto do_no;
3852 }
3853 /* Have more choice yet. Reuse the same uwb. */
3854 /*SUPPRESS 560*/
3855 if ((n = (uwb->type == RE_UNWIND_BRANCH
3856 ? NEXT_OFF(next) : ARG(next))))
3857 next += n;
3858 else
3859 next = NULL; /* XXXX Needn't unwinding in this case... */
3860 uwb->next = next;
3861 next = NEXTOPER(scan);
3862 if (uwb->type == RE_UNWIND_BRANCHJ)
3863 next = NEXTOPER(next);
3864 locinput = uwb->locinput;
3865 nextchr = uwb->nextchr;
3866#ifdef DEBUGGING
3867 PL_regindent = uwb->regindent;
3868#endif
3869
3870 goto reenter;
3871 }
3872 /* NOT REACHED */
3873 default:
3874 Perl_croak(aTHX_ "regexp unwind memory corruption");
3875 }
3876 /* NOT REACHED */
3877 }
4633a7c4 3878#ifdef DEBUGGING
3280af22 3879 PL_regindent--;
4633a7c4 3880#endif
a0d0e21e 3881 return 0;
a687059c
LW
3882}
3883
3884/*
3885 - regrepeat - repeatedly match something simple, report how many
3886 */
3887/*
3888 * [This routine now assumes that it will only match on things of length 1.
3889 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3890 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3891 */
76e3520e 3892STATIC I32
cea2e8a9 3893S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3894{
a0d0e21e 3895 register char *scan;
a0d0e21e 3896 register I32 c;
3280af22 3897 register char *loceol = PL_regeol;
a0ed51b3 3898 register I32 hardcount = 0;
53c4c00c 3899 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3900
3280af22 3901 scan = PL_reginput;
c277df42 3902 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3903 loceol = scan + max;
a0d0e21e 3904 switch (OP(p)) {
22c35a8c 3905 case REG_ANY:
1aa99e6b 3906 if (do_utf8) {
ffc61ed2 3907 loceol = PL_regeol;
1aa99e6b 3908 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3909 scan += UTF8SKIP(scan);
3910 hardcount++;
3911 }
3912 } else {
3913 while (scan < loceol && *scan != '\n')
3914 scan++;
a0ed51b3
LW
3915 }
3916 break;
ffc61ed2 3917 case SANY:
def8e4ea
JH
3918 if (do_utf8) {
3919 loceol = PL_regeol;
a0804c9e 3920 while (scan < loceol && hardcount < max) {
def8e4ea
JH
3921 scan += UTF8SKIP(scan);
3922 hardcount++;
3923 }
3924 }
3925 else
3926 scan = loceol;
a0ed51b3 3927 break;
f33976b4
DB
3928 case CANY:
3929 scan = loceol;
3930 break;
bbce6d69 3931 case EXACT: /* length of string is 1 */
cd439c50 3932 c = (U8)*STRING(p);
bbce6d69 3933 while (scan < loceol && UCHARAT(scan) == c)
3934 scan++;
3935 break;
3936 case EXACTF: /* length of string is 1 */
cd439c50 3937 c = (U8)*STRING(p);
bbce6d69 3938 while (scan < loceol &&
22c35a8c 3939 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3940 scan++;
3941 break;
3942 case EXACTFL: /* length of string is 1 */
3280af22 3943 PL_reg_flags |= RF_tainted;
cd439c50 3944 c = (U8)*STRING(p);
bbce6d69 3945 while (scan < loceol &&
22c35a8c 3946 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3947 scan++;
3948 break;
3949 case ANYOF:
ffc61ed2
JH
3950 if (do_utf8) {
3951 loceol = PL_regeol;
1aa99e6b
IH
3952 while (hardcount < max && scan < loceol &&
3953 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3954 scan += UTF8SKIP(scan);
3955 hardcount++;
3956 }
3957 } else {
3958 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3959 scan++;
3960 }
a0d0e21e
LW
3961 break;
3962 case ALNUM:
1aa99e6b 3963 if (do_utf8) {
ffc61ed2 3964 loceol = PL_regeol;
8269fa76 3965 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3966 while (hardcount < max && scan < loceol &&
3568d838 3967 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3968 scan += UTF8SKIP(scan);
3969 hardcount++;
3970 }
3971 } else {
3972 while (scan < loceol && isALNUM(*scan))
3973 scan++;
a0ed51b3
LW
3974 }
3975 break;
bbce6d69 3976 case ALNUML:
3280af22 3977 PL_reg_flags |= RF_tainted;
1aa99e6b 3978 if (do_utf8) {
ffc61ed2 3979 loceol = PL_regeol;
1aa99e6b
IH
3980 while (hardcount < max && scan < loceol &&
3981 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3982 scan += UTF8SKIP(scan);
3983 hardcount++;
3984 }
3985 } else {
3986 while (scan < loceol && isALNUM_LC(*scan))
3987 scan++;
a0ed51b3
LW
3988 }
3989 break;
a0d0e21e 3990 case NALNUM:
1aa99e6b 3991 if (do_utf8) {
ffc61ed2 3992 loceol = PL_regeol;
8269fa76 3993 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3994 while (hardcount < max && scan < loceol &&
3568d838 3995 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3996 scan += UTF8SKIP(scan);
3997 hardcount++;
3998 }
3999 } else {
4000 while (scan < loceol && !isALNUM(*scan))
4001 scan++;
a0ed51b3
LW
4002 }
4003 break;
bbce6d69 4004 case NALNUML:
3280af22 4005 PL_reg_flags |= RF_tainted;
1aa99e6b 4006 if (do_utf8) {
ffc61ed2 4007 loceol = PL_regeol;
1aa99e6b
IH
4008 while (hardcount < max && scan < loceol &&
4009 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4010 scan += UTF8SKIP(scan);
4011 hardcount++;
4012 }
4013 } else {
4014 while (scan < loceol && !isALNUM_LC(*scan))
4015 scan++;
a0ed51b3
LW
4016 }
4017 break;
a0d0e21e 4018 case SPACE:
1aa99e6b 4019 if (do_utf8) {
ffc61ed2 4020 loceol = PL_regeol;
8269fa76 4021 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4022 while (hardcount < max && scan < loceol &&
3568d838
JH
4023 (*scan == ' ' ||
4024 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4025 scan += UTF8SKIP(scan);
4026 hardcount++;
4027 }
4028 } else {
4029 while (scan < loceol && isSPACE(*scan))
4030 scan++;
a0ed51b3
LW
4031 }
4032 break;
bbce6d69 4033 case SPACEL:
3280af22 4034 PL_reg_flags |= RF_tainted;
1aa99e6b 4035 if (do_utf8) {
ffc61ed2 4036 loceol = PL_regeol;
1aa99e6b 4037 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4038 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4039 scan += UTF8SKIP(scan);
4040 hardcount++;
4041 }
4042 } else {
4043 while (scan < loceol && isSPACE_LC(*scan))
4044 scan++;
a0ed51b3
LW
4045 }
4046 break;
a0d0e21e 4047 case NSPACE:
1aa99e6b 4048 if (do_utf8) {
ffc61ed2 4049 loceol = PL_regeol;
8269fa76 4050 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4051 while (hardcount < max && scan < loceol &&
3568d838
JH
4052 !(*scan == ' ' ||
4053 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4054 scan += UTF8SKIP(scan);
4055 hardcount++;
4056 }
4057 } else {
4058 while (scan < loceol && !isSPACE(*scan))
4059 scan++;
4060 break;
a0ed51b3 4061 }
bbce6d69 4062 case NSPACEL:
3280af22 4063 PL_reg_flags |= RF_tainted;
1aa99e6b 4064 if (do_utf8) {
ffc61ed2 4065 loceol = PL_regeol;
1aa99e6b 4066 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4067 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4068 scan += UTF8SKIP(scan);
4069 hardcount++;
4070 }
4071 } else {
4072 while (scan < loceol && !isSPACE_LC(*scan))
4073 scan++;
a0ed51b3
LW
4074 }
4075 break;
a0d0e21e 4076 case DIGIT:
1aa99e6b 4077 if (do_utf8) {
ffc61ed2 4078 loceol = PL_regeol;
8269fa76 4079 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4080 while (hardcount < max && scan < loceol &&
3568d838 4081 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4082 scan += UTF8SKIP(scan);
4083 hardcount++;
4084 }
4085 } else {
4086 while (scan < loceol && isDIGIT(*scan))
4087 scan++;
a0ed51b3
LW
4088 }
4089 break;
a0d0e21e 4090 case NDIGIT:
1aa99e6b 4091 if (do_utf8) {
ffc61ed2 4092 loceol = PL_regeol;
8269fa76 4093 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4094 while (hardcount < max && scan < loceol &&
3568d838 4095 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4096 scan += UTF8SKIP(scan);
4097 hardcount++;
4098 }
4099 } else {
4100 while (scan < loceol && !isDIGIT(*scan))
4101 scan++;
a0ed51b3
LW
4102 }
4103 break;
a0d0e21e
LW
4104 default: /* Called on something of 0 width. */
4105 break; /* So match right here or not at all. */
4106 }
a687059c 4107
a0ed51b3
LW
4108 if (hardcount)
4109 c = hardcount;
4110 else
4111 c = scan - PL_reginput;
3280af22 4112 PL_reginput = scan;
a687059c 4113
9041c2e3 4114 DEBUG_r(
c277df42
IZ
4115 {
4116 SV *prop = sv_newmortal();
4117
4118 regprop(prop, p);
9041c2e3
NIS
4119 PerlIO_printf(Perl_debug_log,
4120 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4121 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4122 });
9041c2e3 4123
a0d0e21e 4124 return(c);
a687059c
LW
4125}
4126
4127/*
c277df42 4128 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4129 *
c277df42
IZ
4130 * The repeater is supposed to have constant length.
4131 */
4132
76e3520e 4133STATIC I32
cea2e8a9 4134S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4135{
b7953727 4136 register char *scan = Nullch;
c277df42 4137 register char *start;
3280af22 4138 register char *loceol = PL_regeol;
a0ed51b3 4139 I32 l = 0;
708e3b05 4140 I32 count = 0, res = 1;
a0ed51b3
LW
4141
4142 if (!max)
4143 return 0;
c277df42 4144
3280af22 4145 start = PL_reginput;
53c4c00c 4146 if (PL_reg_match_utf8) {
708e3b05 4147 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4148 if (!count++) {
4149 l = 0;
4150 while (start < PL_reginput) {
4151 l++;
4152 start += UTF8SKIP(start);
4153 }
4154 *lp = l;
4155 if (l == 0)
4156 return max;
4157 }
4158 if (count == max)
4159 return count;
4160 }
4161 }
4162 else {
708e3b05 4163 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4164 if (!count++) {
4165 *lp = l = PL_reginput - start;
4166 if (max != REG_INFTY && l*max < loceol - scan)
4167 loceol = scan + l*max;
4168 if (l == 0)
4169 return max;
c277df42
IZ
4170 }
4171 }
4172 }
708e3b05 4173 if (!res)
3280af22 4174 PL_reginput = scan;
9041c2e3 4175
a0ed51b3 4176 return count;
c277df42
IZ
4177}
4178
4179/*
ffc61ed2
JH
4180- regclass_swash - prepare the utf8 swash
4181*/
4182
4183SV *
9e55ce06 4184Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4185{
9e55ce06
JH
4186 SV *sw = NULL;
4187 SV *si = NULL;
4188 SV *alt = NULL;
ffc61ed2
JH
4189
4190 if (PL_regdata && PL_regdata->count) {
4191 U32 n = ARG(node);
4192
4193 if (PL_regdata->what[n] == 's') {
4194 SV *rv = (SV*)PL_regdata->data[n];
4195 AV *av = (AV*)SvRV((SV*)rv);
9e55ce06 4196 SV **a, **b;
9041c2e3 4197
9e55ce06
JH
4198 /* See the end of regcomp.c:S_reglass() for
4199 * documentation of these array elements. */
4200
4201 si = *av_fetch(av, 0, FALSE);
4202 a = av_fetch(av, 1, FALSE);
4203 b = av_fetch(av, 2, FALSE);
9041c2e3 4204
ffc61ed2
JH
4205 if (a)
4206 sw = *a;
4207 else if (si && doinit) {
4208 sw = swash_init("utf8", "", si, 1, 0);
4209 (void)av_store(av, 1, sw);
4210 }
9e55ce06
JH
4211 if (b)
4212 alt = *b;
ffc61ed2
JH
4213 }
4214 }
4215
9e55ce06
JH
4216 if (listsvp)
4217 *listsvp = si;
4218 if (altsvp)
4219 *altsvp = alt;
ffc61ed2
JH
4220
4221 return sw;
4222}
4223
4224/*
832705d4
JH
4225 - reginclasslen - determine if a character falls into a character class
4226
4227 The n is the ANYOF regnode, the p is the target string, lenp
4228 is pointer to the maximum length of how far to go in the p
4229 (if the lenp is zero, UTF8SKIP(p) is used),
4230 do_utf8 tells whether the target string is in UTF-8.
4231
bbce6d69 4232 */
4233
76e3520e 4234STATIC bool
9e55ce06 4235S_reginclasslen(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4236{
ffc61ed2 4237 char flags = ANYOF_FLAGS(n);
bbce6d69 4238 bool match = FALSE;
1aa99e6b 4239 UV c;
3568d838 4240 STRLEN len = 0;
9e55ce06 4241 STRLEN plen;
1aa99e6b 4242
3568d838 4243 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 4244
9e55ce06 4245 plen = lenp ? *lenp : UNISKIP(c);
ffc61ed2 4246 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4247 if (lenp)
4248 *lenp = 0;
ffc61ed2 4249 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4250 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4251 match = TRUE;
bbce6d69 4252 }
3568d838 4253 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4254 match = TRUE;
ffc61ed2 4255 if (!match) {
9e55ce06
JH
4256 AV *av;
4257 SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4258
4259 if (sw) {
3568d838 4260 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4261 match = TRUE;
4262 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4263 if (!match && lenp && av) {
4264 I32 i;
4265
4266 for (i = 0; i <= av_len(av); i++) {
4267 SV* sv = *av_fetch(av, i, FALSE);
4268 STRLEN len;
4269 char *s = SvPV(sv, len);
4270
4271 if (len <= plen && memEQ(s, p, len)) {
4272 *lenp = len;
4273 match = TRUE;
4274 break;
4275 }
4276 }
4277 }
4278 if (!match) {
4a623e43
JH
4279 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
4280 STRLEN tmplen;
4281
9e55ce06
JH
4282 to_utf8_fold(p, tmpbuf, &tmplen);
4283 if (swash_fetch(sw, tmpbuf, do_utf8))
4284 match = TRUE;
4285 }
ffc61ed2
JH
4286 }
4287 }
bbce6d69 4288 }
9e55ce06
JH
4289 if (match && lenp && *lenp == 0)
4290 *lenp = UNISKIP(c);
bbce6d69 4291 }
1aa99e6b 4292 if (!match && c < 256) {
ffc61ed2
JH
4293 if (ANYOF_BITMAP_TEST(n, c))
4294 match = TRUE;
4295 else if (flags & ANYOF_FOLD) {
3568d838 4296 I32 f;
a0ed51b3 4297
ffc61ed2
JH
4298 if (flags & ANYOF_LOCALE) {
4299 PL_reg_flags |= RF_tainted;
4300 f = PL_fold_locale[c];
4301 }
4302 else
4303 f = PL_fold[c];
4304 if (f != c && ANYOF_BITMAP_TEST(n, f))
4305 match = TRUE;
4306 }
4307
4308 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4309 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4310 if (
4311 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4312 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4313 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4314 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4315 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4316 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4317 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4318 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4319 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4320 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4321 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4322 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4323 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4324 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4325 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4326 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4327 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4328 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4329 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4330 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4331 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4332 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4333 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4334 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4335 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4336 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4337 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4338 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4339 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4340 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4341 ) /* How's that for a conditional? */
4342 {
4343 match = TRUE;
4344 }
a0ed51b3 4345 }
a0ed51b3
LW
4346 }
4347
a0ed51b3
LW
4348 return (flags & ANYOF_INVERT) ? !match : match;
4349}
161b471a 4350
832705d4
JH
4351/*
4352 - reginclass - determine if a character falls into a character class
4353
4354 The n is the ANYOF regnode, the p is the target string, do_utf8 tells
4355 whether the target string is in UTF-8.
4356
4357 */
4358
9e55ce06
JH
4359STATIC bool
4360S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
4361{
4362 return S_reginclasslen(aTHX_ n, p, 0, do_utf8);
4363}
4364
dfe13c55 4365STATIC U8 *
cea2e8a9 4366S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4367{
1aa99e6b
IH
4368 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4369}
4370
4371STATIC U8 *
4372S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4373{
a0ed51b3 4374 if (off >= 0) {
1aa99e6b 4375 while (off-- && s < lim) {
ffc61ed2 4376 /* XXX could check well-formedness here */
a0ed51b3 4377 s += UTF8SKIP(s);
ffc61ed2 4378 }
a0ed51b3
LW
4379 }
4380 else {
4381 while (off++) {
1aa99e6b 4382 if (s > lim) {
a0ed51b3 4383 s--;
ffc61ed2 4384 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4385 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4386 s--;
ffc61ed2
JH
4387 }
4388 /* XXX could check well-formedness here */
a0ed51b3
LW
4389 }
4390 }
4391 }
4392 return s;
4393}
161b471a 4394
dfe13c55 4395STATIC U8 *
1aa99e6b 4396S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4397{
1aa99e6b
IH
4398 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4399}
4400
4401STATIC U8 *
4402S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4403{
4404 if (off >= 0) {
1aa99e6b 4405 while (off-- && s < lim) {
ffc61ed2 4406 /* XXX could check well-formedness here */
a0ed51b3 4407 s += UTF8SKIP(s);
ffc61ed2 4408 }
a0ed51b3
LW
4409 if (off >= 0)
4410 return 0;
4411 }
4412 else {
4413 while (off++) {
1aa99e6b 4414 if (s > lim) {
a0ed51b3 4415 s--;
ffc61ed2 4416 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4417 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4418 s--;
ffc61ed2
JH
4419 }
4420 /* XXX could check well-formedness here */
a0ed51b3
LW
4421 }
4422 else
4423 break;
4424 }
4425 if (off <= 0)
4426 return 0;
4427 }
4428 return s;
4429}
51371543 4430
51371543 4431static void
acfe0abc 4432restore_pos(pTHX_ void *arg)
51371543 4433{
51371543
GS
4434 if (PL_reg_eval_set) {
4435 if (PL_reg_oldsaved) {
4436 PL_reg_re->subbeg = PL_reg_oldsaved;
4437 PL_reg_re->sublen = PL_reg_oldsavedlen;
4438 RX_MATCH_COPIED_on(PL_reg_re);
4439 }
4440 PL_reg_magic->mg_len = PL_reg_oldpos;
4441 PL_reg_eval_set = 0;
4442 PL_curpm = PL_reg_oldcurpm;
4443 }
4444}