This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
subsume CURCUR (current curly) struct within the main regmatch_state
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
52657f30
AL
119#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
120 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
121 : (U8*)(pos + off)))
122#define HOPBACKc(pos, off) ((char*) \
123 ((PL_reg_match_utf8) \
124 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
efb30f32
HS
125 : (pos - off >= PL_bostr) \
126 ? (U8*)(pos - off) \
52657f30 127 : (U8*)NULL) \
efb30f32 128)
efb30f32 129
1aa99e6b 130#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c 131#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 133
1a4fad37
AL
134#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
135 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
136#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
137#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
138#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
139#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 140
5f80c4cf 141/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
142#define JUMPABLE(rn) ( \
143 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
144 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
145 OP(rn) == PLUS || OP(rn) == MINMOD || \
146 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
147)
148
cca55fe3
JP
149#define HAS_TEXT(rn) ( \
150 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
151)
e2d8ce26 152
a84d97b6
HS
153/*
154 Search for mandatory following text node; for lookahead, the text must
155 follow but for lookbehind (rn->flags != 0) we skip to the next step.
156*/
cca55fe3 157#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 158 while (JUMPABLE(rn)) \
a84d97b6 159 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 160 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
161 else if (OP(rn) == PLUS) \
162 rn = NEXTOPER(rn); \
a84d97b6
HS
163 else if (OP(rn) == IFMATCH) \
164 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 165 else rn += NEXT_OFF(rn); \
5f80c4cf 166} STMT_END
74750237 167
acfe0abc 168static void restore_pos(pTHX_ void *arg);
51371543 169
76e3520e 170STATIC CHECKPOINT
cea2e8a9 171S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 172{
97aff369 173 dVAR;
a3b680e6 174 const int retval = PL_savestack_ix;
b1ce53c5 175#define REGCP_PAREN_ELEMS 4
a3b680e6 176 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
177 int p;
178
e49a9654
IH
179 if (paren_elems_to_push < 0)
180 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
181
a01268b5 182#define REGCP_OTHER_ELEMS 6
4b3c1a47 183 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 184 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 185/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
186 SSPUSHINT(PL_regendp[p]);
187 SSPUSHINT(PL_regstartp[p]);
3280af22 188 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
189 SSPUSHINT(p);
190 }
b1ce53c5 191/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
192 SSPUSHINT(PL_regsize);
193 SSPUSHINT(*PL_reglastparen);
a01268b5 194 SSPUSHINT(*PL_reglastcloseparen);
3280af22 195 SSPUSHPTR(PL_reginput);
41123dfd
JH
196#define REGCP_FRAME_ELEMS 2
197/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
198 * are needed for the regexp context stack bookkeeping. */
199 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 200 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 201
a0d0e21e
LW
202 return retval;
203}
204
c277df42 205/* These are needed since we do not localize EVAL nodes: */
a3621e74 206# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 207 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 208 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 209
a3621e74 210# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 211 PerlIO_printf(Perl_debug_log, \
faccc32b 212 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 213 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 214
76e3520e 215STATIC char *
cea2e8a9 216S_regcppop(pTHX)
a0d0e21e 217{
97aff369 218 dVAR;
b1ce53c5 219 I32 i;
a0d0e21e
LW
220 U32 paren = 0;
221 char *input;
b1ce53c5 222
a3621e74
YO
223 GET_RE_DEBUG_FLAGS_DECL;
224
b1ce53c5 225 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 226 i = SSPOPINT;
b1ce53c5
JH
227 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
228 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 229 input = (char *) SSPOPPTR;
a01268b5 230 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
231 *PL_reglastparen = SSPOPINT;
232 PL_regsize = SSPOPINT;
b1ce53c5
JH
233
234 /* Now restore the parentheses context. */
41123dfd
JH
235 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
236 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 237 I32 tmps;
a0d0e21e 238 paren = (U32)SSPOPINT;
3280af22 239 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
240 PL_regstartp[paren] = SSPOPINT;
241 tmps = SSPOPINT;
3280af22
NIS
242 if (paren <= *PL_reglastparen)
243 PL_regendp[paren] = tmps;
a3621e74 244 DEBUG_EXECUTE_r(
c3464db5 245 PerlIO_printf(Perl_debug_log,
b900a521 246 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 247 (UV)paren, (IV)PL_regstartp[paren],
b900a521 248 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 249 (IV)PL_regendp[paren],
3280af22 250 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 251 );
a0d0e21e 252 }
a3621e74 253 DEBUG_EXECUTE_r(
eb160463 254 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 255 PerlIO_printf(Perl_debug_log,
faccc32b
JH
256 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
257 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
258 }
259 );
daf18116 260#if 1
dafc8851
JH
261 /* It would seem that the similar code in regtry()
262 * already takes care of this, and in fact it is in
263 * a better location to since this code can #if 0-ed out
264 * but the code in regtry() is needed or otherwise tests
265 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
266 * (as of patchlevel 7877) will fail. Then again,
267 * this code seems to be necessary or otherwise
268 * building DynaLoader will fail:
269 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
270 * --jhi */
eb160463
GS
271 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
272 if ((I32)paren > PL_regsize)
cf93c79d
IZ
273 PL_regstartp[paren] = -1;
274 PL_regendp[paren] = -1;
a0d0e21e 275 }
dafc8851 276#endif
a0d0e21e
LW
277 return input;
278}
279
02db2b7b 280#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 281
95b24440 282#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
283 if (paren) { \
284 if (n) { \
285 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
286 PL_regendp[paren] = input - PL_bostr; \
287 } \
288 else \
289 PL_regendp[paren] = -1; \
290 } \
95b24440
DM
291 REGMATCH(next, where); \
292 if (result) \
29d1e993
HS
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6 321{
97aff369 322 dVAR;
0f5d15d6
IZ
323 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
324#ifdef DEBUGGING
325 PL_regprogram = prog->program;
326#endif
327 PL_regnpar = prog->nparens;
9041c2e3
NIS
328 PL_regdata = prog->data;
329 PL_reg_re = prog;
0f5d15d6 330}
22e551b9 331
9041c2e3 332/*
cad2e5aa
JH
333 * Need to implement the following flags for reg_anch:
334 *
335 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
336 * USE_INTUIT_ML
337 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
338 * INTUIT_AUTORITATIVE_ML
339 * INTUIT_ONCE_NOML - Intuit can match in one location only.
340 * INTUIT_ONCE_ML
341 *
342 * Another flag for this function: SECOND_TIME (so that float substrs
343 * with giant delta may be not rechecked).
344 */
345
346/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
347
3f7c398e 348/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
349 Otherwise, only SvCUR(sv) is used to get strbeg. */
350
351/* XXXX We assume that strpos is strbeg unless sv. */
352
6eb5f6b9
JH
353/* XXXX Some places assume that there is a fixed substring.
354 An update may be needed if optimizer marks as "INTUITable"
355 RExen without fixed substrings. Similarly, it is assumed that
356 lengths of all the strings are no more than minlen, thus they
357 cannot come from lookahead.
358 (Or minlen should take into account lookahead.) */
359
2c2d71f5
JH
360/* A failure to find a constant substring means that there is no need to make
361 an expensive call to REx engine, thus we celebrate a failure. Similarly,
362 finding a substring too deep into the string means that less calls to
30944b6d
IZ
363 regtry() should be needed.
364
365 REx compiler's optimizer found 4 possible hints:
366 a) Anchored substring;
367 b) Fixed substring;
368 c) Whether we are anchored (beginning-of-line or \G);
369 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 370 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
371 string which does not contradict any of them.
372 */
2c2d71f5 373
6eb5f6b9
JH
374/* Most of decisions we do here should have been done at compile time.
375 The nodes of the REx which we used for the search should have been
376 deleted from the finite automaton. */
377
cad2e5aa
JH
378char *
379Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
380 char *strend, U32 flags, re_scream_pos_data *data)
381{
97aff369 382 dVAR;
b7953727 383 register I32 start_shift = 0;
cad2e5aa 384 /* Should be nonnegative! */
b7953727 385 register I32 end_shift = 0;
2c2d71f5
JH
386 register char *s;
387 register SV *check;
a1933d95 388 char *strbeg;
cad2e5aa 389 char *t;
a3b680e6 390 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 391 I32 ml_anch;
bd61b366
SS
392 register char *other_last = NULL; /* other substr checked before this */
393 char *check_at = NULL; /* check substr found at this pos */
1df70142 394 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 395#ifdef DEBUGGING
890ce7af
AL
396 const char * const i_strpos = strpos;
397 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 398#endif
a3621e74
YO
399
400 GET_RE_DEBUG_FLAGS_DECL;
401
a30b2f1f 402 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 403
b8d68ded 404 if (prog->reganch & ROPT_UTF8) {
a3621e74 405 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
406 "UTF-8 regex...\n"));
407 PL_reg_flags |= RF_utf8;
408 }
409
a3621e74 410 DEBUG_EXECUTE_r({
1df70142 411 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
412 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
413 strpos;
1df70142 414 const int len = PL_reg_match_utf8 ?
b8d68ded 415 strlen(s) : strend - strpos;
2a782b5b
JH
416 if (!PL_colorset)
417 reginitcolors();
b8d68ded 418 if (PL_reg_match_utf8)
a3621e74 419 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 420 "UTF-8 target...\n"));
2a782b5b 421 PerlIO_printf(Perl_debug_log,
a0288114 422 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 423 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
424 prog->precomp,
425 PL_colors[1],
426 (strlen(prog->precomp) > 60 ? "..." : ""),
427 PL_colors[0],
428 (int)(len > 60 ? 60 : len),
429 s, PL_colors[1],
430 (len > 60 ? "..." : "")
431 );
432 });
cad2e5aa 433
c344f387
JH
434 /* CHR_DIST() would be more correct here but it makes things slow. */
435 if (prog->minlen > strend - strpos) {
a3621e74 436 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 437 "String too short... [re_intuit_start]\n"));
cad2e5aa 438 goto fail;
2c2d71f5 439 }
a1933d95 440 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 441 PL_regeol = strend;
33b8afdf
JH
442 if (do_utf8) {
443 if (!prog->check_utf8 && prog->check_substr)
444 to_utf8_substr(prog);
445 check = prog->check_utf8;
446 } else {
447 if (!prog->check_substr && prog->check_utf8)
448 to_byte_substr(prog);
449 check = prog->check_substr;
450 }
451 if (check == &PL_sv_undef) {
a3621e74 452 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
453 "Non-utf string cannot match utf check string\n"));
454 goto fail;
455 }
2c2d71f5 456 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
457 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
458 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 459 && !multiline ) ); /* Check after \n? */
cad2e5aa 460
7e25d62c
JH
461 if (!ml_anch) {
462 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
463 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 464 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
465 && sv && !SvROK(sv)
466 && (strpos != strbeg)) {
a3621e74 467 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
468 goto fail;
469 }
470 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 471 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 472 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
473 I32 slen;
474
1aa99e6b 475 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
476 if (SvTAIL(check)) {
477 slen = SvCUR(check); /* >= 1 */
cad2e5aa 478
9041c2e3 479 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 480 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 481 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 482 goto fail_finish;
cad2e5aa
JH
483 }
484 /* Now should match s[0..slen-2] */
485 slen--;
3f7c398e 486 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 487 || (slen > 1
3f7c398e 488 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 489 report_neq:
a3621e74 490 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
491 goto fail_finish;
492 }
cad2e5aa 493 }
3f7c398e 494 else if (*SvPVX_const(check) != *s
653099ff 495 || ((slen = SvCUR(check)) > 1
3f7c398e 496 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 497 goto report_neq;
c315bfe8 498 check_at = s;
2c2d71f5 499 goto success_at_start;
7e25d62c 500 }
cad2e5aa 501 }
2c2d71f5 502 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 503 s = strpos;
2c2d71f5 504 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 505 end_shift = prog->minlen - start_shift -
653099ff 506 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 507 if (!ml_anch) {
a3b680e6 508 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 509 - (SvTAIL(check) != 0);
a3b680e6 510 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
511
512 if (end_shift < eshift)
513 end_shift = eshift;
514 }
cad2e5aa 515 }
2c2d71f5 516 else { /* Can match at random position */
cad2e5aa
JH
517 ml_anch = 0;
518 s = strpos;
2c2d71f5
JH
519 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
520 /* Should be nonnegative! */
521 end_shift = prog->minlen - start_shift -
653099ff 522 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
523 }
524
2c2d71f5 525#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 526 if (end_shift < 0)
6bbae5e6 527 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
528#endif
529
2c2d71f5
JH
530 restart:
531 /* Find a possible match in the region s..strend by looking for
532 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 533 if (flags & REXEC_SCREAM) {
cad2e5aa 534 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 535 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 536
2c2d71f5
JH
537 if (PL_screamfirst[BmRARE(check)] >= 0
538 || ( BmRARE(check) == '\n'
539 && (BmPREVIOUS(check) == SvCUR(check) - 1)
540 && SvTAIL(check) ))
9041c2e3 541 s = screaminstr(sv, check,
2c2d71f5 542 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 543 else
2c2d71f5 544 goto fail_finish;
4addbd3b
HS
545 /* we may be pointing at the wrong string */
546 if (s && RX_MATCH_COPIED(prog))
3f7c398e 547 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
548 if (data)
549 *data->scream_olds = s;
550 }
f33976b4 551 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
552 s = fbm_instr((U8*)(s + start_shift),
553 (U8*)(strend - end_shift),
7fba1cd6 554 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 555 else
1aa99e6b
IH
556 s = fbm_instr(HOP3(s, start_shift, strend),
557 HOP3(strend, -end_shift, strbeg),
7fba1cd6 558 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
559
560 /* Update the count-of-usability, remove useless subpatterns,
561 unshift s. */
2c2d71f5 562
a0288114 563 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 564 (s ? "Found" : "Did not find"),
33b8afdf 565 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 566 PL_colors[0],
7b0972df 567 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 568 SvPVX_const(check),
2c2d71f5
JH
569 PL_colors[1], (SvTAIL(check) ? "$" : ""),
570 (s ? " at offset " : "...\n") ) );
571
572 if (!s)
573 goto fail_finish;
574
6eb5f6b9
JH
575 check_at = s;
576
2c2d71f5 577 /* Finish the diagnostic message */
a3621e74 578 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
579
580 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
581 Start with the other substr.
582 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 583 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
584 *always* match. Probably should be marked during compile...
585 Probably it is right to do no SCREAM here...
586 */
587
33b8afdf 588 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 589 /* Take into account the "other" substring. */
2c2d71f5
JH
590 /* XXXX May be hopelessly wrong for UTF... */
591 if (!other_last)
6eb5f6b9 592 other_last = strpos;
33b8afdf 593 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
594 do_other_anchored:
595 {
890ce7af
AL
596 char * const last = HOP3c(s, -start_shift, strbeg);
597 char *last1, *last2;
2c2d71f5 598 char *s1 = s;
33b8afdf 599 SV* must;
2c2d71f5 600
2c2d71f5
JH
601 t = s - prog->check_offset_max;
602 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 603 && (!do_utf8
1aa99e6b 604 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 605 && t > strpos)))
30944b6d 606 /* EMPTY */;
2c2d71f5
JH
607 else
608 t = strpos;
1aa99e6b 609 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
610 if (t < other_last) /* These positions already checked */
611 t = other_last;
1aa99e6b 612 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
613 if (last < last1)
614 last1 = last;
615 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
616 /* On end-of-str: see comment below. */
33b8afdf
JH
617 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
618 if (must == &PL_sv_undef) {
619 s = (char*)NULL;
a3621e74 620 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
621 }
622 else
623 s = fbm_instr(
624 (unsigned char*)t,
625 HOP3(HOP3(last1, prog->anchored_offset, strend)
626 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
627 must,
7fba1cd6 628 multiline ? FBMrf_MULTILINE : 0
33b8afdf 629 );
a3621e74 630 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 631 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
632 (s ? "Found" : "Contradicts"),
633 PL_colors[0],
33b8afdf
JH
634 (int)(SvCUR(must)
635 - (SvTAIL(must)!=0)),
3f7c398e 636 SvPVX_const(must),
33b8afdf 637 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
638 if (!s) {
639 if (last1 >= last2) {
a3621e74 640 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
641 ", giving up...\n"));
642 goto fail_finish;
643 }
a3621e74 644 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 645 ", trying floating at offset %ld...\n",
1aa99e6b
IH
646 (long)(HOP3c(s1, 1, strend) - i_strpos)));
647 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
648 s = HOP3c(last, 1, strend);
2c2d71f5
JH
649 goto restart;
650 }
651 else {
a3621e74 652 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 653 (long)(s - i_strpos)));
1aa99e6b
IH
654 t = HOP3c(s, -prog->anchored_offset, strbeg);
655 other_last = HOP3c(s, 1, strend);
30944b6d 656 s = s1;
2c2d71f5
JH
657 if (t == strpos)
658 goto try_at_start;
2c2d71f5
JH
659 goto try_at_offset;
660 }
30944b6d 661 }
2c2d71f5
JH
662 }
663 else { /* Take into account the floating substring. */
33b8afdf
JH
664 char *last, *last1;
665 char *s1 = s;
666 SV* must;
667
668 t = HOP3c(s, -start_shift, strbeg);
669 last1 = last =
670 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
671 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
672 last = HOP3c(t, prog->float_max_offset, strend);
673 s = HOP3c(t, prog->float_min_offset, strend);
674 if (s < other_last)
675 s = other_last;
2c2d71f5 676 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
677 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
678 /* fbm_instr() takes into account exact value of end-of-str
679 if the check is SvTAIL(ed). Since false positives are OK,
680 and end-of-str is not later than strend we are OK. */
681 if (must == &PL_sv_undef) {
682 s = (char*)NULL;
a3621e74 683 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
684 }
685 else
2c2d71f5 686 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
687 (unsigned char*)last + SvCUR(must)
688 - (SvTAIL(must)!=0),
7fba1cd6 689 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 690 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
691 (s ? "Found" : "Contradicts"),
692 PL_colors[0],
693 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 694 SvPVX_const(must),
33b8afdf
JH
695 PL_colors[1], (SvTAIL(must) ? "$" : "")));
696 if (!s) {
697 if (last1 == last) {
a3621e74 698 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
699 ", giving up...\n"));
700 goto fail_finish;
2c2d71f5 701 }
a3621e74 702 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
703 ", trying anchored starting at offset %ld...\n",
704 (long)(s1 + 1 - i_strpos)));
705 other_last = last;
706 s = HOP3c(t, 1, strend);
707 goto restart;
708 }
709 else {
a3621e74 710 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
711 (long)(s - i_strpos)));
712 other_last = s; /* Fix this later. --Hugo */
713 s = s1;
714 if (t == strpos)
715 goto try_at_start;
716 goto try_at_offset;
717 }
2c2d71f5 718 }
cad2e5aa 719 }
2c2d71f5
JH
720
721 t = s - prog->check_offset_max;
2c2d71f5 722 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 723 && (!do_utf8
1aa99e6b
IH
724 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
725 && t > strpos))) {
2c2d71f5
JH
726 /* Fixed substring is found far enough so that the match
727 cannot start at strpos. */
728 try_at_offset:
cad2e5aa 729 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
730 /* Eventually fbm_*() should handle this, but often
731 anchored_offset is not 0, so this check will not be wasted. */
732 /* XXXX In the code below we prefer to look for "^" even in
733 presence of anchored substrings. And we search even
734 beyond the found float position. These pessimizations
735 are historical artefacts only. */
736 find_anchor:
2c2d71f5 737 while (t < strend - prog->minlen) {
cad2e5aa 738 if (*t == '\n') {
4ee3650e 739 if (t < check_at - prog->check_offset_min) {
33b8afdf 740 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
741 /* Since we moved from the found position,
742 we definitely contradict the found anchored
30944b6d
IZ
743 substr. Due to the above check we do not
744 contradict "check" substr.
745 Thus we can arrive here only if check substr
746 is float. Redo checking for "other"=="fixed".
747 */
9041c2e3 748 strpos = t + 1;
a3621e74 749 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 750 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
751 goto do_other_anchored;
752 }
4ee3650e
GS
753 /* We don't contradict the found floating substring. */
754 /* XXXX Why not check for STCLASS? */
cad2e5aa 755 s = t + 1;
a3621e74 756 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 757 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
758 goto set_useful;
759 }
4ee3650e
GS
760 /* Position contradicts check-string */
761 /* XXXX probably better to look for check-string
762 than for "\n", so one should lower the limit for t? */
a3621e74 763 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 764 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 765 other_last = strpos = s = t + 1;
cad2e5aa
JH
766 goto restart;
767 }
768 t++;
769 }
a3621e74 770 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 771 PL_colors[0], PL_colors[1]));
2c2d71f5 772 goto fail_finish;
cad2e5aa 773 }
f5952150 774 else {
a3621e74 775 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 776 PL_colors[0], PL_colors[1]));
f5952150 777 }
cad2e5aa
JH
778 s = t;
779 set_useful:
33b8afdf 780 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
781 }
782 else {
f5952150 783 /* The found string does not prohibit matching at strpos,
2c2d71f5 784 - no optimization of calling REx engine can be performed,
f5952150
GS
785 unless it was an MBOL and we are not after MBOL,
786 or a future STCLASS check will fail this. */
2c2d71f5
JH
787 try_at_start:
788 /* Even in this situation we may use MBOL flag if strpos is offset
789 wrt the start of the string. */
05b4157f 790 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 791 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
792 /* May be due to an implicit anchor of m{.*foo} */
793 && !(prog->reganch & ROPT_IMPLICIT))
794 {
cad2e5aa
JH
795 t = strpos;
796 goto find_anchor;
797 }
a3621e74 798 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 799 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 800 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 801 );
2c2d71f5 802 success_at_start:
30944b6d 803 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
804 && (do_utf8 ? (
805 prog->check_utf8 /* Could be deleted already */
806 && --BmUSEFUL(prog->check_utf8) < 0
807 && (prog->check_utf8 == prog->float_utf8)
808 ) : (
809 prog->check_substr /* Could be deleted already */
810 && --BmUSEFUL(prog->check_substr) < 0
811 && (prog->check_substr == prog->float_substr)
812 )))
66e933ab 813 {
cad2e5aa 814 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 815 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
816 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
817 if (do_utf8 ? prog->check_substr : prog->check_utf8)
818 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
819 prog->check_substr = prog->check_utf8 = NULL; /* disable */
820 prog->float_substr = prog->float_utf8 = NULL; /* clear */
821 check = NULL; /* abort */
cad2e5aa 822 s = strpos;
3cf5c195
IZ
823 /* XXXX This is a remnant of the old implementation. It
824 looks wasteful, since now INTUIT can use many
6eb5f6b9 825 other heuristics. */
cad2e5aa
JH
826 prog->reganch &= ~RE_USE_INTUIT;
827 }
828 else
829 s = strpos;
830 }
831
6eb5f6b9
JH
832 /* Last resort... */
833 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
834 if (prog->regstclass) {
835 /* minlen == 0 is possible if regstclass is \b or \B,
836 and the fixed substr is ''$.
837 Since minlen is already taken into account, s+1 is before strend;
838 accidentally, minlen >= 1 guaranties no false positives at s + 1
839 even for \b or \B. But (minlen? 1 : 0) below assumes that
840 regstclass does not come from lookahead... */
841 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
842 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 843 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 844 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 845 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 846 : 1);
a3b680e6 847 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 848 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 849 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
850 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
851 cl_l, strend)
852 : strend);
6eb5f6b9
JH
853
854 t = s;
9926ca43 855 cache_re(prog);
06b5626a 856 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
857 if (!s) {
858#ifdef DEBUGGING
cbbf8932 859 const char *what = NULL;
6eb5f6b9
JH
860#endif
861 if (endpos == strend) {
a3621e74 862 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
863 "Could not match STCLASS...\n") );
864 goto fail;
865 }
a3621e74 866 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 867 "This position contradicts STCLASS...\n") );
653099ff
GS
868 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
869 goto fail;
6eb5f6b9 870 /* Contradict one of substrings */
33b8afdf
JH
871 if (prog->anchored_substr || prog->anchored_utf8) {
872 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 873 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 874 hop_and_restart:
1aa99e6b 875 s = HOP3c(t, 1, strend);
66e933ab
GS
876 if (s + start_shift + end_shift > strend) {
877 /* XXXX Should be taken into account earlier? */
a3621e74 878 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
879 "Could not match STCLASS...\n") );
880 goto fail;
881 }
5e39e1e5
HS
882 if (!check)
883 goto giveup;
a3621e74 884 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 885 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
886 what, (long)(s + start_shift - i_strpos)) );
887 goto restart;
888 }
66e933ab 889 /* Have both, check_string is floating */
6eb5f6b9
JH
890 if (t + start_shift >= check_at) /* Contradicts floating=check */
891 goto retry_floating_check;
892 /* Recheck anchored substring, but not floating... */
9041c2e3 893 s = check_at;
5e39e1e5
HS
894 if (!check)
895 goto giveup;
a3621e74 896 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 897 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
898 (long)(other_last - i_strpos)) );
899 goto do_other_anchored;
900 }
60e71179
GS
901 /* Another way we could have checked stclass at the
902 current position only: */
903 if (ml_anch) {
904 s = t = t + 1;
5e39e1e5
HS
905 if (!check)
906 goto giveup;
a3621e74 907 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 908 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 909 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 910 goto try_at_offset;
66e933ab 911 }
33b8afdf 912 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 913 goto fail;
6eb5f6b9
JH
914 /* Check is floating subtring. */
915 retry_floating_check:
916 t = check_at - start_shift;
a3621e74 917 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
918 goto hop_and_restart;
919 }
b7953727 920 if (t != s) {
a3621e74 921 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 922 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
923 (long)(t - i_strpos), (long)(s - i_strpos))
924 );
925 }
926 else {
a3621e74 927 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
928 "Does not contradict STCLASS...\n");
929 );
930 }
6eb5f6b9 931 }
5e39e1e5 932 giveup:
a3621e74 933 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
934 PL_colors[4], (check ? "Guessed" : "Giving up"),
935 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 936 return s;
2c2d71f5
JH
937
938 fail_finish: /* Substring not found */
33b8afdf
JH
939 if (prog->check_substr || prog->check_utf8) /* could be removed already */
940 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 941 fail:
a3621e74 942 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 943 PL_colors[4], PL_colors[5]));
bd61b366 944 return NULL;
cad2e5aa 945}
9661b544 946
6eb5f6b9 947/* We know what class REx starts with. Try to find this position... */
3c3eec57 948STATIC char *
a3b680e6 949S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
a687059c 950{
27da23d5 951 dVAR;
1df70142 952 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 953 char *m;
d8093b23 954 STRLEN ln;
5dab1207 955 STRLEN lnc;
078c425b 956 register STRLEN uskip;
d8093b23
G
957 unsigned int c1;
958 unsigned int c2;
6eb5f6b9
JH
959 char *e;
960 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 961 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 962
6eb5f6b9
JH
963 /* We know what class it must start with. */
964 switch (OP(c)) {
6eb5f6b9 965 case ANYOF:
388cc4de 966 if (do_utf8) {
078c425b 967 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
968 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
969 !UTF8_IS_INVARIANT((U8)s[0]) ?
970 reginclass(c, (U8*)s, 0, do_utf8) :
971 REGINCLASS(c, (U8*)s)) {
972 if (tmp && (norun || regtry(prog, s)))
973 goto got_it;
974 else
975 tmp = doevery;
976 }
977 else
978 tmp = 1;
078c425b 979 s += uskip;
388cc4de
HS
980 }
981 }
982 else {
983 while (s < strend) {
984 STRLEN skip = 1;
985
986 if (REGINCLASS(c, (U8*)s) ||
987 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
988 /* The assignment of 2 is intentional:
989 * for the folded sharp s, the skip is 2. */
990 (skip = SHARP_S_SKIP))) {
991 if (tmp && (norun || regtry(prog, s)))
992 goto got_it;
993 else
994 tmp = doevery;
995 }
996 else
997 tmp = 1;
998 s += skip;
999 }
a0d0e21e 1000 }
6eb5f6b9 1001 break;
f33976b4
DB
1002 case CANY:
1003 while (s < strend) {
1004 if (tmp && (norun || regtry(prog, s)))
1005 goto got_it;
1006 else
1007 tmp = doevery;
1008 s++;
1009 }
1010 break;
6eb5f6b9 1011 case EXACTF:
5dab1207
NIS
1012 m = STRING(c);
1013 ln = STR_LEN(c); /* length to match in octets/bytes */
1014 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1015 if (UTF) {
a2a2844f 1016 STRLEN ulen1, ulen2;
5dab1207 1017 U8 *sm = (U8 *) m;
89ebb4a3
JH
1018 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1019 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a0374537 1020 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
a2a2844f
JH
1021
1022 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1023 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1024
89ebb4a3 1025 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1026 0, uniflags);
89ebb4a3 1027 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1028 0, uniflags);
5dab1207
NIS
1029 lnc = 0;
1030 while (sm < ((U8 *) m + ln)) {
1031 lnc++;
1032 sm += UTF8SKIP(sm);
1033 }
1aa99e6b
IH
1034 }
1035 else {
1036 c1 = *(U8*)m;
1037 c2 = PL_fold[c1];
1038 }
6eb5f6b9
JH
1039 goto do_exactf;
1040 case EXACTFL:
5dab1207
NIS
1041 m = STRING(c);
1042 ln = STR_LEN(c);
1043 lnc = (I32) ln;
d8093b23 1044 c1 = *(U8*)m;
6eb5f6b9
JH
1045 c2 = PL_fold_locale[c1];
1046 do_exactf:
db12adc6 1047 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1048
6eb5f6b9
JH
1049 if (norun && e < s)
1050 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1051
60a8b682
JH
1052 /* The idea in the EXACTF* cases is to first find the
1053 * first character of the EXACTF* node and then, if
1054 * necessary, case-insensitively compare the full
1055 * text of the node. The c1 and c2 are the first
1056 * characters (though in Unicode it gets a bit
1057 * more complicated because there are more cases
7f16dd3d
JH
1058 * than just upper and lower: one needs to use
1059 * the so-called folding case for case-insensitive
1060 * matching (called "loose matching" in Unicode).
1061 * ibcmp_utf8() will do just that. */
60a8b682 1062
1aa99e6b 1063 if (do_utf8) {
575cac57 1064 UV c, f;
89ebb4a3 1065 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1066 STRLEN len, foldlen;
a0374537 1067 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
09091399 1068 if (c1 == c2) {
5dab1207
NIS
1069 /* Upper and lower of 1st char are equal -
1070 * probably not a "letter". */
1aa99e6b 1071 while (s <= e) {
89ebb4a3 1072 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1073 uniflags);
80aecb99
JH
1074 if ( c == c1
1075 && (ln == len ||
66423254 1076 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1077 m, (char **)0, ln, (bool)UTF))
55da9344 1078 && (norun || regtry(prog, s)) )
1aa99e6b 1079 goto got_it;
80aecb99 1080 else {
1df70142 1081 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1082 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1083 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1084 if ( f != c
1085 && (f == c1 || f == c2)
1086 && (ln == foldlen ||
66423254
JH
1087 !ibcmp_utf8((char *) foldbuf,
1088 (char **)0, foldlen, do_utf8,
d07ddd77 1089 m,
eb160463 1090 (char **)0, ln, (bool)UTF))
80aecb99
JH
1091 && (norun || regtry(prog, s)) )
1092 goto got_it;
1093 }
1aa99e6b
IH
1094 s += len;
1095 }
09091399
JH
1096 }
1097 else {
1aa99e6b 1098 while (s <= e) {
89ebb4a3 1099 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1100 uniflags);
80aecb99 1101
60a8b682 1102 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1103 * Note that not all the possible combinations
1104 * are handled here: some of them are handled
1105 * by the standard folding rules, and some of
1106 * them (the character class or ANYOF cases)
1107 * are handled during compiletime in
1108 * regexec.c:S_regclass(). */
880bd946
JH
1109 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1110 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1111 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1112
1113 if ( (c == c1 || c == c2)
1114 && (ln == len ||
66423254 1115 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1116 m, (char **)0, ln, (bool)UTF))
55da9344 1117 && (norun || regtry(prog, s)) )
1aa99e6b 1118 goto got_it;
80aecb99 1119 else {
1df70142 1120 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1121 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1122 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1123 if ( f != c
1124 && (f == c1 || f == c2)
1125 && (ln == foldlen ||
a6872d42 1126 !ibcmp_utf8((char *) foldbuf,
66423254 1127 (char **)0, foldlen, do_utf8,
d07ddd77 1128 m,
eb160463 1129 (char **)0, ln, (bool)UTF))
80aecb99
JH
1130 && (norun || regtry(prog, s)) )
1131 goto got_it;
1132 }
1aa99e6b
IH
1133 s += len;
1134 }
09091399 1135 }
1aa99e6b
IH
1136 }
1137 else {
1138 if (c1 == c2)
1139 while (s <= e) {
1140 if ( *(U8*)s == c1
1141 && (ln == 1 || !(OP(c) == EXACTF
1142 ? ibcmp(s, m, ln)
1143 : ibcmp_locale(s, m, ln)))
1144 && (norun || regtry(prog, s)) )
1145 goto got_it;
1146 s++;
1147 }
1148 else
1149 while (s <= e) {
1150 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1151 && (ln == 1 || !(OP(c) == EXACTF
1152 ? ibcmp(s, m, ln)
1153 : ibcmp_locale(s, m, ln)))
1154 && (norun || regtry(prog, s)) )
1155 goto got_it;
1156 s++;
1157 }
b3c9acc1
IZ
1158 }
1159 break;
bbce6d69 1160 case BOUNDL:
3280af22 1161 PL_reg_flags |= RF_tainted;
bbce6d69 1162 /* FALL THROUGH */
a0d0e21e 1163 case BOUND:
ffc61ed2 1164 if (do_utf8) {
12d33761 1165 if (s == PL_bostr)
ffc61ed2
JH
1166 tmp = '\n';
1167 else {
6136c704 1168 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
a0374537 1169 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1170 }
1171 tmp = ((OP(c) == BOUND ?
9041c2e3 1172 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1173 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1174 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1175 if (tmp == !(OP(c) == BOUND ?
3568d838 1176 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1177 isALNUM_LC_utf8((U8*)s)))
1178 {
1179 tmp = !tmp;
1180 if ((norun || regtry(prog, s)))
1181 goto got_it;
1182 }
078c425b 1183 s += uskip;
a687059c 1184 }
a0d0e21e 1185 }
667bb95a 1186 else {
12d33761 1187 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1188 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1189 while (s < strend) {
1190 if (tmp ==
1191 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1192 tmp = !tmp;
1193 if ((norun || regtry(prog, s)))
1194 goto got_it;
1195 }
1196 s++;
a0ed51b3 1197 }
a0ed51b3 1198 }
6eb5f6b9 1199 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1200 goto got_it;
1201 break;
bbce6d69 1202 case NBOUNDL:
3280af22 1203 PL_reg_flags |= RF_tainted;
bbce6d69 1204 /* FALL THROUGH */
a0d0e21e 1205 case NBOUND:
ffc61ed2 1206 if (do_utf8) {
12d33761 1207 if (s == PL_bostr)
ffc61ed2
JH
1208 tmp = '\n';
1209 else {
6136c704 1210 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
a0374537 1211 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1212 }
1213 tmp = ((OP(c) == NBOUND ?
9041c2e3 1214 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1215 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1216 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1217 if (tmp == !(OP(c) == NBOUND ?
3568d838 1218 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1219 isALNUM_LC_utf8((U8*)s)))
1220 tmp = !tmp;
1221 else if ((norun || regtry(prog, s)))
1222 goto got_it;
078c425b 1223 s += uskip;
ffc61ed2 1224 }
a0d0e21e 1225 }
667bb95a 1226 else {
12d33761 1227 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1228 tmp = ((OP(c) == NBOUND ?
1229 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1230 while (s < strend) {
1231 if (tmp ==
1232 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1233 tmp = !tmp;
1234 else if ((norun || regtry(prog, s)))
1235 goto got_it;
1236 s++;
1237 }
a0ed51b3 1238 }
6eb5f6b9 1239 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1240 goto got_it;
1241 break;
a0d0e21e 1242 case ALNUM:
ffc61ed2 1243 if (do_utf8) {
1a4fad37 1244 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1245 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1246 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1247 if (tmp && (norun || regtry(prog, s)))
1248 goto got_it;
1249 else
1250 tmp = doevery;
1251 }
bbce6d69 1252 else
ffc61ed2 1253 tmp = 1;
078c425b 1254 s += uskip;
bbce6d69 1255 }
bbce6d69 1256 }
ffc61ed2
JH
1257 else {
1258 while (s < strend) {
1259 if (isALNUM(*s)) {
1260 if (tmp && (norun || regtry(prog, s)))
1261 goto got_it;
1262 else
1263 tmp = doevery;
1264 }
a0ed51b3 1265 else
ffc61ed2
JH
1266 tmp = 1;
1267 s++;
a0ed51b3 1268 }
a0ed51b3
LW
1269 }
1270 break;
bbce6d69 1271 case ALNUML:
3280af22 1272 PL_reg_flags |= RF_tainted;
ffc61ed2 1273 if (do_utf8) {
078c425b 1274 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1275 if (isALNUM_LC_utf8((U8*)s)) {
1276 if (tmp && (norun || regtry(prog, s)))
1277 goto got_it;
1278 else
1279 tmp = doevery;
1280 }
a687059c 1281 else
ffc61ed2 1282 tmp = 1;
078c425b 1283 s += uskip;
a0d0e21e 1284 }
a0d0e21e 1285 }
ffc61ed2
JH
1286 else {
1287 while (s < strend) {
1288 if (isALNUM_LC(*s)) {
1289 if (tmp && (norun || regtry(prog, s)))
1290 goto got_it;
1291 else
1292 tmp = doevery;
1293 }
a0ed51b3 1294 else
ffc61ed2
JH
1295 tmp = 1;
1296 s++;
a0ed51b3 1297 }
a0ed51b3
LW
1298 }
1299 break;
a0d0e21e 1300 case NALNUM:
ffc61ed2 1301 if (do_utf8) {
1a4fad37 1302 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1303 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1304 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1305 if (tmp && (norun || regtry(prog, s)))
1306 goto got_it;
1307 else
1308 tmp = doevery;
1309 }
bbce6d69 1310 else
ffc61ed2 1311 tmp = 1;
078c425b 1312 s += uskip;
bbce6d69 1313 }
bbce6d69 1314 }
ffc61ed2
JH
1315 else {
1316 while (s < strend) {
1317 if (!isALNUM(*s)) {
1318 if (tmp && (norun || regtry(prog, s)))
1319 goto got_it;
1320 else
1321 tmp = doevery;
1322 }
a0ed51b3 1323 else
ffc61ed2
JH
1324 tmp = 1;
1325 s++;
a0ed51b3 1326 }
a0ed51b3
LW
1327 }
1328 break;
bbce6d69 1329 case NALNUML:
3280af22 1330 PL_reg_flags |= RF_tainted;
ffc61ed2 1331 if (do_utf8) {
078c425b 1332 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1333 if (!isALNUM_LC_utf8((U8*)s)) {
1334 if (tmp && (norun || regtry(prog, s)))
1335 goto got_it;
1336 else
1337 tmp = doevery;
1338 }
a687059c 1339 else
ffc61ed2 1340 tmp = 1;
078c425b 1341 s += uskip;
a687059c 1342 }
a0d0e21e 1343 }
ffc61ed2
JH
1344 else {
1345 while (s < strend) {
1346 if (!isALNUM_LC(*s)) {
1347 if (tmp && (norun || regtry(prog, s)))
1348 goto got_it;
1349 else
1350 tmp = doevery;
1351 }
a0ed51b3 1352 else
ffc61ed2
JH
1353 tmp = 1;
1354 s++;
a0ed51b3 1355 }
a0ed51b3
LW
1356 }
1357 break;
a0d0e21e 1358 case SPACE:
ffc61ed2 1359 if (do_utf8) {
1a4fad37 1360 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1361 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1362 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1363 if (tmp && (norun || regtry(prog, s)))
1364 goto got_it;
1365 else
1366 tmp = doevery;
1367 }
a0d0e21e 1368 else
ffc61ed2 1369 tmp = 1;
078c425b 1370 s += uskip;
2304df62 1371 }
a0d0e21e 1372 }
ffc61ed2
JH
1373 else {
1374 while (s < strend) {
1375 if (isSPACE(*s)) {
1376 if (tmp && (norun || regtry(prog, s)))
1377 goto got_it;
1378 else
1379 tmp = doevery;
1380 }
a0ed51b3 1381 else
ffc61ed2
JH
1382 tmp = 1;
1383 s++;
a0ed51b3 1384 }
a0ed51b3
LW
1385 }
1386 break;
bbce6d69 1387 case SPACEL:
3280af22 1388 PL_reg_flags |= RF_tainted;
ffc61ed2 1389 if (do_utf8) {
078c425b 1390 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1391 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1392 if (tmp && (norun || regtry(prog, s)))
1393 goto got_it;
1394 else
1395 tmp = doevery;
1396 }
bbce6d69 1397 else
ffc61ed2 1398 tmp = 1;
078c425b 1399 s += uskip;
bbce6d69 1400 }
bbce6d69 1401 }
ffc61ed2
JH
1402 else {
1403 while (s < strend) {
1404 if (isSPACE_LC(*s)) {
1405 if (tmp && (norun || regtry(prog, s)))
1406 goto got_it;
1407 else
1408 tmp = doevery;
1409 }
a0ed51b3 1410 else
ffc61ed2
JH
1411 tmp = 1;
1412 s++;
a0ed51b3 1413 }
a0ed51b3
LW
1414 }
1415 break;
a0d0e21e 1416 case NSPACE:
ffc61ed2 1417 if (do_utf8) {
1a4fad37 1418 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1419 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1420 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1421 if (tmp && (norun || regtry(prog, s)))
1422 goto got_it;
1423 else
1424 tmp = doevery;
1425 }
a0d0e21e 1426 else
ffc61ed2 1427 tmp = 1;
078c425b 1428 s += uskip;
a687059c 1429 }
a0d0e21e 1430 }
ffc61ed2
JH
1431 else {
1432 while (s < strend) {
1433 if (!isSPACE(*s)) {
1434 if (tmp && (norun || regtry(prog, s)))
1435 goto got_it;
1436 else
1437 tmp = doevery;
1438 }
a0ed51b3 1439 else
ffc61ed2
JH
1440 tmp = 1;
1441 s++;
a0ed51b3 1442 }
a0ed51b3
LW
1443 }
1444 break;
bbce6d69 1445 case NSPACEL:
3280af22 1446 PL_reg_flags |= RF_tainted;
ffc61ed2 1447 if (do_utf8) {
078c425b 1448 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1449 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1450 if (tmp && (norun || regtry(prog, s)))
1451 goto got_it;
1452 else
1453 tmp = doevery;
1454 }
bbce6d69 1455 else
ffc61ed2 1456 tmp = 1;
078c425b 1457 s += uskip;
bbce6d69 1458 }
bbce6d69 1459 }
ffc61ed2
JH
1460 else {
1461 while (s < strend) {
1462 if (!isSPACE_LC(*s)) {
1463 if (tmp && (norun || regtry(prog, s)))
1464 goto got_it;
1465 else
1466 tmp = doevery;
1467 }
a0ed51b3 1468 else
ffc61ed2
JH
1469 tmp = 1;
1470 s++;
a0ed51b3 1471 }
a0ed51b3
LW
1472 }
1473 break;
a0d0e21e 1474 case DIGIT:
ffc61ed2 1475 if (do_utf8) {
1a4fad37 1476 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1477 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1478 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1479 if (tmp && (norun || regtry(prog, s)))
1480 goto got_it;
1481 else
1482 tmp = doevery;
1483 }
a0d0e21e 1484 else
ffc61ed2 1485 tmp = 1;
078c425b 1486 s += uskip;
2b69d0c2 1487 }
a0d0e21e 1488 }
ffc61ed2
JH
1489 else {
1490 while (s < strend) {
1491 if (isDIGIT(*s)) {
1492 if (tmp && (norun || regtry(prog, s)))
1493 goto got_it;
1494 else
1495 tmp = doevery;
1496 }
a0ed51b3 1497 else
ffc61ed2
JH
1498 tmp = 1;
1499 s++;
a0ed51b3 1500 }
a0ed51b3
LW
1501 }
1502 break;
b8c5462f
JH
1503 case DIGITL:
1504 PL_reg_flags |= RF_tainted;
ffc61ed2 1505 if (do_utf8) {
078c425b 1506 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1507 if (isDIGIT_LC_utf8((U8*)s)) {
1508 if (tmp && (norun || regtry(prog, s)))
1509 goto got_it;
1510 else
1511 tmp = doevery;
1512 }
b8c5462f 1513 else
ffc61ed2 1514 tmp = 1;
078c425b 1515 s += uskip;
b8c5462f 1516 }
b8c5462f 1517 }
ffc61ed2
JH
1518 else {
1519 while (s < strend) {
1520 if (isDIGIT_LC(*s)) {
1521 if (tmp && (norun || regtry(prog, s)))
1522 goto got_it;
1523 else
1524 tmp = doevery;
1525 }
b8c5462f 1526 else
ffc61ed2
JH
1527 tmp = 1;
1528 s++;
b8c5462f 1529 }
b8c5462f
JH
1530 }
1531 break;
a0d0e21e 1532 case NDIGIT:
ffc61ed2 1533 if (do_utf8) {
1a4fad37 1534 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1535 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1536 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1537 if (tmp && (norun || regtry(prog, s)))
1538 goto got_it;
1539 else
1540 tmp = doevery;
1541 }
a0d0e21e 1542 else
ffc61ed2 1543 tmp = 1;
078c425b 1544 s += uskip;
a687059c 1545 }
a0d0e21e 1546 }
ffc61ed2
JH
1547 else {
1548 while (s < strend) {
1549 if (!isDIGIT(*s)) {
1550 if (tmp && (norun || regtry(prog, s)))
1551 goto got_it;
1552 else
1553 tmp = doevery;
1554 }
a0ed51b3 1555 else
ffc61ed2
JH
1556 tmp = 1;
1557 s++;
a0ed51b3 1558 }
a0ed51b3
LW
1559 }
1560 break;
b8c5462f
JH
1561 case NDIGITL:
1562 PL_reg_flags |= RF_tainted;
ffc61ed2 1563 if (do_utf8) {
078c425b 1564 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1565 if (!isDIGIT_LC_utf8((U8*)s)) {
1566 if (tmp && (norun || regtry(prog, s)))
1567 goto got_it;
1568 else
1569 tmp = doevery;
1570 }
b8c5462f 1571 else
ffc61ed2 1572 tmp = 1;
078c425b 1573 s += uskip;
b8c5462f 1574 }
a0ed51b3 1575 }
ffc61ed2
JH
1576 else {
1577 while (s < strend) {
1578 if (!isDIGIT_LC(*s)) {
1579 if (tmp && (norun || regtry(prog, s)))
1580 goto got_it;
1581 else
1582 tmp = doevery;
1583 }
cf93c79d 1584 else
ffc61ed2
JH
1585 tmp = 1;
1586 s++;
b8c5462f 1587 }
b8c5462f
JH
1588 }
1589 break;
b3c9acc1 1590 default:
3c3eec57
GS
1591 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1592 break;
d6a28714 1593 }
6eb5f6b9
JH
1594 return 0;
1595 got_it:
1596 return s;
1597}
1598
1599/*
1600 - regexec_flags - match a regexp against a string
1601 */
1602I32
1603Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1604 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1605/* strend: pointer to null at end of string */
1606/* strbeg: real beginning of string */
1607/* minend: end of match must be >=minend after stringarg. */
1608/* data: May be used for some additional optimizations. */
1609/* nosave: For optimizations. */
1610{
97aff369 1611 dVAR;
6eb5f6b9
JH
1612 register char *s;
1613 register regnode *c;
1614 register char *startpos = stringarg;
6eb5f6b9
JH
1615 I32 minlen; /* must match at least this many chars */
1616 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1617 I32 end_shift = 0; /* Same for the end. */ /* CC */
1618 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1619 char *scream_olds = NULL;
6eb5f6b9 1620 SV* oreplsv = GvSV(PL_replgv);
1df70142 1621 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1622 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1623#ifdef DEBUGGING
6136c704
AL
1624 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1625 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1626#endif
a3621e74
YO
1627
1628 GET_RE_DEBUG_FLAGS_DECL;
1629
9d4ba2ae 1630 PERL_UNUSED_ARG(data);
a30b2f1f 1631 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9 1632
6eb5f6b9
JH
1633 cache_re(prog);
1634#ifdef DEBUGGING
aea4f609 1635 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1636#endif
1637
1638 /* Be paranoid... */
1639 if (prog == NULL || startpos == NULL) {
1640 Perl_croak(aTHX_ "NULL regexp parameter");
1641 return 0;
1642 }
1643
1644 minlen = prog->minlen;
61a36c01 1645 if (strend - startpos < minlen) {
a3621e74 1646 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1647 "String too short [regexec_flags]...\n"));
1648 goto phooey;
1aa99e6b 1649 }
6eb5f6b9 1650
6eb5f6b9
JH
1651 /* Check validity of program. */
1652 if (UCHARAT(prog->program) != REG_MAGIC) {
1653 Perl_croak(aTHX_ "corrupted regexp program");
1654 }
1655
1656 PL_reg_flags = 0;
1657 PL_reg_eval_set = 0;
1658 PL_reg_maxiter = 0;
1659
1660 if (prog->reganch & ROPT_UTF8)
1661 PL_reg_flags |= RF_utf8;
1662
1663 /* Mark beginning of line for ^ and lookbehind. */
1664 PL_regbol = startpos;
1665 PL_bostr = strbeg;
1666 PL_reg_sv = sv;
1667
1668 /* Mark end of line for $ (and such) */
1669 PL_regeol = strend;
1670
1671 /* see how far we have to get to not match where we matched before */
1672 PL_regtill = startpos+minend;
1673
1674 /* We start without call_cc context. */
1675 PL_reg_call_cc = 0;
1676
1677 /* If there is a "must appear" string, look for it. */
1678 s = startpos;
1679
1680 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1681 MAGIC *mg;
1682
1683 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1684 PL_reg_ganch = startpos;
1685 else if (sv && SvTYPE(sv) >= SVt_PVMG
1686 && SvMAGIC(sv)
14befaf4
DM
1687 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1688 && mg->mg_len >= 0) {
6eb5f6b9
JH
1689 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1690 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1691 if (s > PL_reg_ganch)
6eb5f6b9
JH
1692 goto phooey;
1693 s = PL_reg_ganch;
1694 }
1695 }
1696 else /* pos() not defined */
1697 PL_reg_ganch = strbeg;
1698 }
1699
a0714e2c 1700 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1701 re_scream_pos_data d;
1702
1703 d.scream_olds = &scream_olds;
1704 d.scream_pos = &scream_pos;
1705 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1706 if (!s) {
a3621e74 1707 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1708 goto phooey; /* not present */
3fa9c3d7 1709 }
6eb5f6b9
JH
1710 }
1711
a3621e74 1712 DEBUG_EXECUTE_r({
1df70142
AL
1713 const char * const s0 = UTF
1714 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1715 UNI_DISPLAY_REGEX)
1716 : prog->precomp;
1717 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1718 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1719 UNI_DISPLAY_REGEX) : startpos;
1df70142 1720 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1721 if (!PL_colorset)
1722 reginitcolors();
1723 PerlIO_printf(Perl_debug_log,
a0288114 1724 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1725 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1726 len0, len0, s0,
2a782b5b 1727 PL_colors[1],
9e55ce06 1728 len0 > 60 ? "..." : "",
2a782b5b 1729 PL_colors[0],
9e55ce06
JH
1730 (int)(len1 > 60 ? 60 : len1),
1731 s1, PL_colors[1],
1732 (len1 > 60 ? "..." : "")
2a782b5b
JH
1733 );
1734 });
6eb5f6b9
JH
1735
1736 /* Simplest case: anchored match need be tried only once. */
1737 /* [unless only anchor is BOL and multiline is set] */
1738 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1739 if (s == startpos && regtry(prog, startpos))
1740 goto got_it;
7fba1cd6 1741 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1742 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1743 {
1744 char *end;
1745
1746 if (minlen)
1747 dontbother = minlen - 1;
1aa99e6b 1748 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1749 /* for multiline we only have to try after newlines */
33b8afdf 1750 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1751 if (s == startpos)
1752 goto after_try;
1753 while (1) {
1754 if (regtry(prog, s))
1755 goto got_it;
1756 after_try:
1757 if (s >= end)
1758 goto phooey;
1759 if (prog->reganch & RE_USE_INTUIT) {
1760 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1761 if (!s)
1762 goto phooey;
1763 }
1764 else
1765 s++;
1766 }
1767 } else {
1768 if (s > startpos)
1769 s--;
1770 while (s < end) {
1771 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1772 if (regtry(prog, s))
1773 goto got_it;
1774 }
1775 }
1776 }
1777 }
1778 goto phooey;
1779 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1780 if (regtry(prog, PL_reg_ganch))
1781 goto got_it;
1782 goto phooey;
1783 }
1784
1785 /* Messy cases: unanchored match. */
33b8afdf 1786 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1787 /* we have /x+whatever/ */
1788 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1789 char ch;
bf93d4cc
GS
1790#ifdef DEBUGGING
1791 int did_match = 0;
1792#endif
33b8afdf
JH
1793 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1794 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1795 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1796
1aa99e6b 1797 if (do_utf8) {
6eb5f6b9
JH
1798 while (s < strend) {
1799 if (*s == ch) {
a3621e74 1800 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1801 if (regtry(prog, s)) goto got_it;
1802 s += UTF8SKIP(s);
1803 while (s < strend && *s == ch)
1804 s += UTF8SKIP(s);
1805 }
1806 s += UTF8SKIP(s);
1807 }
1808 }
1809 else {
1810 while (s < strend) {
1811 if (*s == ch) {
a3621e74 1812 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1813 if (regtry(prog, s)) goto got_it;
1814 s++;
1815 while (s < strend && *s == ch)
1816 s++;
1817 }
1818 s++;
1819 }
1820 }
a3621e74 1821 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1822 PerlIO_printf(Perl_debug_log,
b7953727
JH
1823 "Did not find anchored character...\n")
1824 );
6eb5f6b9 1825 }
a0714e2c
SS
1826 else if (prog->anchored_substr != NULL
1827 || prog->anchored_utf8 != NULL
1828 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1829 && prog->float_max_offset < strend - s)) {
1830 SV *must;
1831 I32 back_max;
1832 I32 back_min;
1833 char *last;
6eb5f6b9 1834 char *last1; /* Last position checked before */
bf93d4cc
GS
1835#ifdef DEBUGGING
1836 int did_match = 0;
1837#endif
33b8afdf
JH
1838 if (prog->anchored_substr || prog->anchored_utf8) {
1839 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1840 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1841 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1842 back_max = back_min = prog->anchored_offset;
1843 } else {
1844 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1845 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1846 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1847 back_max = prog->float_max_offset;
1848 back_min = prog->float_min_offset;
1849 }
1850 if (must == &PL_sv_undef)
1851 /* could not downgrade utf8 check substring, so must fail */
1852 goto phooey;
1853
1854 last = HOP3c(strend, /* Cannot start after this */
1855 -(I32)(CHR_SVLEN(must)
1856 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1857
1858 if (s > PL_bostr)
1859 last1 = HOPc(s, -1);
1860 else
1861 last1 = s - 1; /* bogus */
1862
a0288114 1863 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1864 check_substr==must. */
1865 scream_pos = -1;
1866 dontbother = end_shift;
1867 strend = HOPc(strend, -dontbother);
1868 while ( (s <= last) &&
9041c2e3 1869 ((flags & REXEC_SCREAM)
1aa99e6b 1870 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1871 end_shift, &scream_pos, 0))
1aa99e6b 1872 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1873 (unsigned char*)strend, must,
7fba1cd6 1874 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1875 /* we may be pointing at the wrong string */
1876 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1877 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1878 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1879 if (HOPc(s, -back_max) > last1) {
1880 last1 = HOPc(s, -back_min);
1881 s = HOPc(s, -back_max);
1882 }
1883 else {
52657f30 1884 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1885
1886 last1 = HOPc(s, -back_min);
52657f30 1887 s = t;
6eb5f6b9 1888 }
1aa99e6b 1889 if (do_utf8) {
6eb5f6b9
JH
1890 while (s <= last1) {
1891 if (regtry(prog, s))
1892 goto got_it;
1893 s += UTF8SKIP(s);
1894 }
1895 }
1896 else {
1897 while (s <= last1) {
1898 if (regtry(prog, s))
1899 goto got_it;
1900 s++;
1901 }
1902 }
1903 }
a3621e74 1904 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1905 PerlIO_printf(Perl_debug_log,
a0288114 1906 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1907 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1908 ? "anchored" : "floating"),
1909 PL_colors[0],
1910 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1911 SvPVX_const(must),
b7953727
JH
1912 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1913 );
6eb5f6b9
JH
1914 goto phooey;
1915 }
155aba94 1916 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1917 if (minlen) {
1918 I32 op = (U8)OP(prog->regstclass);
66e933ab 1919 /* don't bother with what can't match */
f14c76ed
RGS
1920 if (PL_regkind[op] != EXACT && op != CANY)
1921 strend = HOPc(strend, -(minlen - 1));
1922 }
a3621e74 1923 DEBUG_EXECUTE_r({
ffc61ed2 1924 SV *prop = sv_newmortal();
cfd0369c
NC
1925 const char *s0;
1926 const char *s1;
9e55ce06
JH
1927 int len0;
1928 int len1;
1929
ffc61ed2 1930 regprop(prop, c);
9e55ce06 1931 s0 = UTF ?
3f7c398e 1932 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1933 UNI_DISPLAY_REGEX) :
cfd0369c 1934 SvPVX_const(prop);
9e55ce06
JH
1935 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1936 s1 = UTF ?
c728cb41 1937 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1938 len1 = UTF ? SvCUR(dsv1) : strend - s;
1939 PerlIO_printf(Perl_debug_log,
a0288114 1940 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1941 len0, len0, s0,
1942 len1, len1, s1);
ffc61ed2 1943 });
06b5626a 1944 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1945 goto got_it;
a3621e74 1946 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1947 }
1948 else {
1949 dontbother = 0;
a0714e2c 1950 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1951 /* Trim the end. */
d6a28714 1952 char *last;
33b8afdf
JH
1953 SV* float_real;
1954
1955 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1956 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1957 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1958
1959 if (flags & REXEC_SCREAM) {
33b8afdf 1960 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1961 end_shift, &scream_pos, 1); /* last one */
1962 if (!last)
ffc61ed2 1963 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1964 /* we may be pointing at the wrong string */
1965 else if (RX_MATCH_COPIED(prog))
3f7c398e 1966 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1967 }
d6a28714
JH
1968 else {
1969 STRLEN len;
cfd0369c 1970 const char * const little = SvPV_const(float_real, len);
d6a28714 1971
33b8afdf 1972 if (SvTAIL(float_real)) {
d6a28714
JH
1973 if (memEQ(strend - len + 1, little, len - 1))
1974 last = strend - len + 1;
7fba1cd6 1975 else if (!multiline)
9041c2e3 1976 last = memEQ(strend - len, little, len)
bd61b366 1977 ? strend - len : NULL;
b8c5462f 1978 else
d6a28714
JH
1979 goto find_last;
1980 } else {
1981 find_last:
9041c2e3 1982 if (len)
d6a28714 1983 last = rninstr(s, strend, little, little + len);
b8c5462f 1984 else
a0288114 1985 last = strend; /* matching "$" */
b8c5462f 1986 }
b8c5462f 1987 }
bf93d4cc 1988 if (last == NULL) {
a3621e74 1989 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1990 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1991 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1992 goto phooey; /* Should not happen! */
1993 }
d6a28714
JH
1994 dontbother = strend - last + prog->float_min_offset;
1995 }
1996 if (minlen && (dontbother < minlen))
1997 dontbother = minlen - 1;
1998 strend -= dontbother; /* this one's always in bytes! */
1999 /* We don't know much -- general case. */
1aa99e6b 2000 if (do_utf8) {
d6a28714
JH
2001 for (;;) {
2002 if (regtry(prog, s))
2003 goto got_it;
2004 if (s >= strend)
2005 break;
b8c5462f 2006 s += UTF8SKIP(s);
d6a28714
JH
2007 };
2008 }
2009 else {
2010 do {
2011 if (regtry(prog, s))
2012 goto got_it;
2013 } while (s++ < strend);
2014 }
2015 }
2016
2017 /* Failure. */
2018 goto phooey;
2019
2020got_it:
2021 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2022
2023 if (PL_reg_eval_set) {
2024 /* Preserve the current value of $^R */
2025 if (oreplsv != GvSV(PL_replgv))
2026 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2027 restored, the value remains
2028 the same. */
acfe0abc 2029 restore_pos(aTHX_ 0);
d6a28714
JH
2030 }
2031
2032 /* make sure $`, $&, $', and $digit will work later */
2033 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2034 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2035 if (flags & REXEC_COPY_STR) {
2036 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2037#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2038 if ((SvIsCOW(sv)
2039 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2040 if (DEBUG_C_TEST) {
2041 PerlIO_printf(Perl_debug_log,
2042 "Copy on write: regexp capture, type %d\n",
2043 (int) SvTYPE(sv));
2044 }
2045 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2046 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2047 assert (SvPOKp(prog->saved_copy));
2048 } else
2049#endif
2050 {
2051 RX_MATCH_COPIED_on(prog);
2052 s = savepvn(strbeg, i);
2053 prog->subbeg = s;
2054 }
d6a28714 2055 prog->sublen = i;
d6a28714
JH
2056 }
2057 else {
2058 prog->subbeg = strbeg;
2059 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2060 }
2061 }
9041c2e3 2062
d6a28714
JH
2063 return 1;
2064
2065phooey:
a3621e74 2066 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2067 PL_colors[4], PL_colors[5]));
d6a28714 2068 if (PL_reg_eval_set)
acfe0abc 2069 restore_pos(aTHX_ 0);
d6a28714
JH
2070 return 0;
2071}
2072
2073/*
2074 - regtry - try match at specific point
2075 */
2076STATIC I32 /* 0 failure, 1 success */
2077S_regtry(pTHX_ regexp *prog, char *startpos)
2078{
97aff369 2079 dVAR;
d6a28714
JH
2080 register I32 i;
2081 register I32 *sp;
2082 register I32 *ep;
2083 CHECKPOINT lastcp;
a3621e74 2084 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2085
02db2b7b
IZ
2086#ifdef DEBUGGING
2087 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2088#endif
d6a28714
JH
2089 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2090 MAGIC *mg;
2091
2092 PL_reg_eval_set = RS_init;
a3621e74 2093 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2094 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2095 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2096 ));
e8347627 2097 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2098 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2099 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2100 SAVETMPS;
2101 /* Apparently this is not needed, judging by wantarray. */
e8347627 2102 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2103 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2104
2105 if (PL_reg_sv) {
2106 /* Make $_ available to executed code. */
2107 if (PL_reg_sv != DEFSV) {
59f00321 2108 SAVE_DEFSV;
d6a28714 2109 DEFSV = PL_reg_sv;
b8c5462f 2110 }
d6a28714 2111
9041c2e3 2112 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2113 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2114 /* prepare for quick setting of pos */
d300d9fa
NC
2115#ifdef PERL_OLD_COPY_ON_WRITE
2116 if (SvIsCOW(sv))
2117 sv_force_normal_flags(sv, 0);
2118#endif
2119 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2120 &PL_vtbl_mglob, NULL, 0);
d6a28714 2121 mg->mg_len = -1;
b8c5462f 2122 }
d6a28714
JH
2123 PL_reg_magic = mg;
2124 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2125 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2126 }
09687e5a 2127 if (!PL_reg_curpm) {
a02a5408 2128 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2129#ifdef USE_ITHREADS
2130 {
2131 SV* repointer = newSViv(0);
577e12cc 2132 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2133 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2134 av_push(PL_regex_padav,repointer);
2135 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2136 PL_regex_pad = AvARRAY(PL_regex_padav);
2137 }
2138#endif
2139 }
aaa362c4 2140 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2141 PL_reg_oldcurpm = PL_curpm;
2142 PL_curpm = PL_reg_curpm;
2143 if (RX_MATCH_COPIED(prog)) {
2144 /* Here is a serious problem: we cannot rewrite subbeg,
2145 since it may be needed if this match fails. Thus
2146 $` inside (?{}) could fail... */
2147 PL_reg_oldsaved = prog->subbeg;
2148 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2149#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2150 PL_nrs = prog->saved_copy;
2151#endif
d6a28714
JH
2152 RX_MATCH_COPIED_off(prog);
2153 }
2154 else
bd61b366 2155 PL_reg_oldsaved = NULL;
d6a28714
JH
2156 prog->subbeg = PL_bostr;
2157 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2158 }
973dddac 2159 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2160 PL_reginput = startpos;
2161 PL_regstartp = prog->startp;
2162 PL_regendp = prog->endp;
2163 PL_reglastparen = &prog->lastparen;
a01268b5 2164 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2165 prog->lastparen = 0;
03994de8 2166 prog->lastcloseparen = 0;
d6a28714 2167 PL_regsize = 0;
a3621e74 2168 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2169 if (PL_reg_start_tmpl <= prog->nparens) {
2170 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2171 if(PL_reg_start_tmp)
2172 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2173 else
a02a5408 2174 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2175 }
2176
2177 /* XXXX What this code is doing here?!!! There should be no need
2178 to do this again and again, PL_reglastparen should take care of
3dd2943c 2179 this! --ilya*/
dafc8851
JH
2180
2181 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2182 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2183 * PL_reglastparen), is not needed at all by the test suite
2184 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2185 * enough, for building DynaLoader, or otherwise this
2186 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2187 * will happen. Meanwhile, this code *is* needed for the
2188 * above-mentioned test suite tests to succeed. The common theme
2189 * on those tests seems to be returning null fields from matches.
2190 * --jhi */
dafc8851 2191#if 1
d6a28714
JH
2192 sp = prog->startp;
2193 ep = prog->endp;
2194 if (prog->nparens) {
eb160463 2195 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2196 *++sp = -1;
2197 *++ep = -1;
2198 }
2199 }
dafc8851 2200#endif
02db2b7b 2201 REGCP_SET(lastcp);
d6a28714
JH
2202 if (regmatch(prog->program + 1)) {
2203 prog->endp[0] = PL_reginput - PL_bostr;
2204 return 1;
2205 }
02db2b7b 2206 REGCP_UNWIND(lastcp);
d6a28714
JH
2207 return 0;
2208}
2209
02db2b7b
IZ
2210#define RE_UNWIND_BRANCH 1
2211#define RE_UNWIND_BRANCHJ 2
2212
2213union re_unwind_t;
2214
2215typedef struct { /* XX: makes sense to enlarge it... */
2216 I32 type;
2217 I32 prev;
2218 CHECKPOINT lastcp;
2219} re_unwind_generic_t;
2220
2221typedef struct {
2222 I32 type;
2223 I32 prev;
2224 CHECKPOINT lastcp;
2225 I32 lastparen;
2226 regnode *next;
2227 char *locinput;
2228 I32 nextchr;
2229#ifdef DEBUGGING
2230 int regindent;
2231#endif
2232} re_unwind_branch_t;
2233
2234typedef union re_unwind_t {
2235 I32 type;
2236 re_unwind_generic_t generic;
2237 re_unwind_branch_t branch;
2238} re_unwind_t;
2239
8ba1375e
MJD
2240#define sayYES goto yes
2241#define sayNO goto no
e0f9d4a8 2242#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2243#define sayYES_FINAL goto yes_final
2244#define sayYES_LOUD goto yes_loud
2245#define sayNO_FINAL goto no_final
2246#define sayNO_SILENT goto do_no
2247#define saySAME(x) if (x) goto yes; else goto no
2248
3ab3c9b4
HS
2249#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2250#define POSCACHE_SEEN 1 /* we know what we're caching */
2251#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2252#define CACHEsayYES STMT_START { \
e822a8b4 2253 if (st->whilem.cache_offset | st->whilem.cache_bit) { \
3ab3c9b4
HS
2254 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2255 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2256 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2257 /* cache records failure, but this is success */ \
2258 DEBUG_r( \
2259 PerlIO_printf(Perl_debug_log, \
2260 "%*s (remove success from failure cache)\n", \
2261 REPORT_CODE_OFF+PL_regindent*2, "") \
2262 ); \
e822a8b4 2263 PL_reg_poscache[st->whilem.cache_offset] &= ~(1<<st->whilem.cache_bit); \
3ab3c9b4
HS
2264 } \
2265 } \
2266 sayYES; \
2267} STMT_END
2268#define CACHEsayNO STMT_START { \
e822a8b4 2269 if (st->whilem.cache_offset | st->whilem.cache_bit) { \
3ab3c9b4
HS
2270 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2271 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2272 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2273 /* cache records success, but this is failure */ \
2274 DEBUG_r( \
2275 PerlIO_printf(Perl_debug_log, \
2276 "%*s (remove failure from success cache)\n", \
2277 REPORT_CODE_OFF+PL_regindent*2, "") \
2278 ); \
e822a8b4 2279 PL_reg_poscache[st->whilem.cache_offset] &= ~(1<<st->whilem.cache_bit); \
3ab3c9b4
HS
2280 } \
2281 } \
2282 sayNO; \
2283} STMT_END
2284
a3621e74
YO
2285/* this is used to determine how far from the left messages like
2286 'failed...' are printed. Currently 29 makes these messages line
2287 up with the opcode they refer to. Earlier perls used 25 which
2288 left these messages outdented making reviewing a debug output
2289 quite difficult.
2290*/
2291#define REPORT_CODE_OFF 29
2292
2293
2294/* Make sure there is a test for this +1 options in re_tests */
2295#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2296
5d9a96ca
DM
2297/* grab a new slab and return the first slot in it */
2298
2299STATIC regmatch_state *
2300S_push_slab(pTHX)
2301{
2302 regmatch_slab *s = PL_regmatch_slab->next;
2303 if (!s) {
2304 Newx(s, 1, regmatch_slab);
2305 s->prev = PL_regmatch_slab;
2306 s->next = NULL;
2307 PL_regmatch_slab->next = s;
2308 }
2309 PL_regmatch_slab = s;
2310 return &s->states[0];
2311}
5b47454d 2312
95b24440
DM
2313/* simulate a recursive call to regmatch */
2314
2315#define REGMATCH(ns, where) \
5d9a96ca
DM
2316 st->scan = scan; \
2317 scan = (ns); \
2318 st->resume_state = resume_##where; \
95b24440
DM
2319 goto start_recurse; \
2320 resume_point_##where:
2321
d6a28714
JH
2322/*
2323 - regmatch - main matching routine
2324 *
2325 * Conceptually the strategy is simple: check to see whether the current
2326 * node matches, call self recursively to see whether the rest matches,
2327 * and then act accordingly. In practice we make some effort to avoid
2328 * recursion, in particular by going through "ordinary" nodes (that don't
2329 * need to know whether the rest of the match failed) by a loop instead of
2330 * by recursion.
2331 */
2332/* [lwall] I've hoisted the register declarations to the outer block in order to
2333 * maybe save a little bit of pushing and popping on the stack. It also takes
2334 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2335 *
2336 * This function used to be heavily recursive, but since this had the
2337 * effect of blowing the CPU stack on complex regexes, it has been
2338 * restructured to be iterative, and to save state onto the heap rather
2339 * than the stack. Essentially whereever regmatch() used to be called, it
2340 * pushes the current state, notes where to return, then jumps back into
2341 * the main loop.
2342 *
2343 * Originally the structure of this function used to look something like
2344
2345 S_regmatch() {
2346 int a = 1, b = 2;
2347 ...
2348 while (scan != NULL) {
5d9a96ca 2349 a++; // do stuff with a and b
95b24440
DM
2350 ...
2351 switch (OP(scan)) {
2352 case FOO: {
2353 int local = 3;
2354 ...
2355 if (regmatch(...)) // recurse
2356 goto yes;
2357 }
2358 ...
2359 }
2360 }
2361 yes:
2362 return 1;
2363 }
2364
2365 * Now it looks something like this:
2366
5d9a96ca 2367 typedef struct {
95b24440
DM
2368 int a, b, local;
2369 int resume_state;
5d9a96ca 2370 } regmatch_state;
95b24440
DM
2371
2372 S_regmatch() {
5d9a96ca
DM
2373 regmatch_state *st = new();
2374 int depth=0;
2375 st->a++; // do stuff with a and b
95b24440
DM
2376 ...
2377 while (scan != NULL) {
2378 ...
2379 switch (OP(scan)) {
2380 case FOO: {
5d9a96ca 2381 st->local = 3;
95b24440 2382 ...
5d9a96ca
DM
2383 st->scan = scan;
2384 scan = ...;
2385 st->resume_state = resume_FOO;
2386 goto start_recurse; // recurse
95b24440 2387
5d9a96ca
DM
2388 resume_point_FOO:
2389 if (result)
95b24440
DM
2390 goto yes;
2391 }
2392 ...
2393 }
5d9a96ca
DM
2394 start_recurse:
2395 st = new(); push a new state
2396 st->a = 1; st->b = 2;
2397 depth++;
95b24440 2398 }
5d9a96ca 2399 yes:
95b24440 2400 result = 1;
5d9a96ca
DM
2401 if (depth--) {
2402 st = pop();
95b24440
DM
2403 switch (resume_state) {
2404 case resume_FOO:
2405 goto resume_point_FOO;
2406 ...
2407 }
2408 }
2409 return result
2410 }
2411
2412 * WARNING: this means that any line in this function that contains a
2413 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2414 * regmatch() using gotos instead. Thus the values of any local variables
2415 * not saved in the regmatch_state structure will have been lost when
2416 * execution resumes on the next line .
5d9a96ca
DM
2417 *
2418 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2419 * PL_regmatch_state always points to the currently active state, and
2420 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2421 * The first time regmatch is called, the first slab is allocated, and is
2422 * never freed until interpreter desctruction. When the slab is full,
2423 * a new one is allocated chained to the end. At exit from regmatch, slabs
2424 * allocated since entry are freed.
d6a28714 2425 */
95b24440
DM
2426
2427
d6a28714
JH
2428STATIC I32 /* 0 failure, 1 success */
2429S_regmatch(pTHX_ regnode *prog)
2430{
27da23d5 2431 dVAR;
95b24440 2432 register const bool do_utf8 = PL_reg_match_utf8;
a0374537 2433 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
95b24440 2434
5d9a96ca
DM
2435 regmatch_slab *orig_slab;
2436 regmatch_state *orig_state;
a3621e74 2437
5d9a96ca
DM
2438 /* the current state. This is a cached copy of PL_regmatch_state */
2439 register regmatch_state *st;
95b24440 2440
5d9a96ca
DM
2441 /* cache heavy used fields of st in registers */
2442 register regnode *scan;
2443 register regnode *next;
2444 register I32 n = 0; /* initialize to shut up compiler warning */
2445 register char *locinput = PL_reginput;
95b24440 2446
5d9a96ca
DM
2447 /* these variables are NOT saved during a recusive RFEGMATCH: */
2448 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2449 bool result; /* return value of S_regmatch */
2450 regnode *inner; /* Next node in internal branch. */
2451 int depth = 0; /* depth of recursion */
95b24440
DM
2452
2453#ifdef DEBUGGING
ab74612d 2454 SV *re_debug_flags = NULL;
a3621e74 2455 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2456 PL_regindent++;
2457#endif
2458
5d9a96ca
DM
2459 /* on first ever call to regmatch, allocate first slab */
2460 if (!PL_regmatch_slab) {
2461 Newx(PL_regmatch_slab, 1, regmatch_slab);
2462 PL_regmatch_slab->prev = NULL;
2463 PL_regmatch_slab->next = NULL;
2464 PL_regmatch_state = &PL_regmatch_slab->states[0] - 1;
2465 }
2466
2467 /* remember current high-water mark for exit */
2468 /* XXX this should be done with SAVE* instead */
2469 orig_slab = PL_regmatch_slab;
2470 orig_state = PL_regmatch_state;
2471
2472 /* grab next free state slot */
2473 st = ++PL_regmatch_state;
2474 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
2475 st = PL_regmatch_state = S_push_slab(aTHX);
2476
2477 st->minmod = 0;
2478 st->sw = 0;
2479 st->logical = 0;
2480 st->unwind = 0;
2481 st->cc = NULL;
d6a28714
JH
2482 /* Note that nextchr is a byte even in UTF */
2483 nextchr = UCHARAT(locinput);
2484 scan = prog;
2485 while (scan != NULL) {
8ba1375e 2486
a3621e74 2487 DEBUG_EXECUTE_r( {
6136c704 2488 SV * const prop = sv_newmortal();
1df70142
AL
2489 const int docolor = *PL_colors[0];
2490 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2491 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2492 /* The part of the string before starttry has one color
2493 (pref0_len chars), between starttry and current
2494 position another one (pref_len - pref0_len chars),
2495 after the current position the third one.
2496 We assume that pref0_len <= pref_len, otherwise we
2497 decrease pref0_len. */
9041c2e3 2498 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2499 ? (5 + taill) - l : locinput - PL_bostr;
2500 int pref0_len;
d6a28714 2501
df1ffd02 2502 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2503 pref_len++;
2504 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2505 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2506 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2507 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2508 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2509 l--;
d6a28714
JH
2510 if (pref0_len < 0)
2511 pref0_len = 0;
2512 if (pref0_len > pref_len)
2513 pref0_len = pref_len;
2514 regprop(prop, scan);
2a782b5b 2515 {
1df70142 2516 const char * const s0 =
f14c76ed 2517 do_utf8 && OP(scan) != CANY ?
95b24440 2518 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2519 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2520 locinput - pref_len;
1df70142
AL
2521 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2522 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2523 pv_uni_display(PERL_DEBUG_PAD(1),
2524 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2525 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2526 locinput - pref_len + pref0_len;
1df70142
AL
2527 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2528 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2529 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2530 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2531 locinput;
1df70142 2532 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2533 PerlIO_printf(Perl_debug_log,
2534 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2535 (IV)(locinput - PL_bostr),
2536 PL_colors[4],
2537 len0, s0,
2538 PL_colors[5],
2539 PL_colors[2],
2540 len1, s1,
2541 PL_colors[3],
2542 (docolor ? "" : "> <"),
2543 PL_colors[0],
2544 len2, s2,
2545 PL_colors[1],
2546 15 - l - pref_len + 1,
2547 "",
2548 (IV)(scan - PL_regprogram), PL_regindent*2, "",
3f7c398e 2549 SvPVX_const(prop));
2a782b5b
JH
2550 }
2551 });
d6a28714
JH
2552
2553 next = scan + NEXT_OFF(scan);
2554 if (next == scan)
2555 next = NULL;
2556
2557 switch (OP(scan)) {
2558 case BOL:
7fba1cd6 2559 if (locinput == PL_bostr)
d6a28714
JH
2560 {
2561 /* regtill = regbol; */
b8c5462f
JH
2562 break;
2563 }
d6a28714
JH
2564 sayNO;
2565 case MBOL:
12d33761
HS
2566 if (locinput == PL_bostr ||
2567 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2568 {
b8c5462f
JH
2569 break;
2570 }
d6a28714
JH
2571 sayNO;
2572 case SBOL:
c2a73568 2573 if (locinput == PL_bostr)
b8c5462f 2574 break;
d6a28714
JH
2575 sayNO;
2576 case GPOS:
2577 if (locinput == PL_reg_ganch)
2578 break;
2579 sayNO;
2580 case EOL:
d6a28714
JH
2581 goto seol;
2582 case MEOL:
d6a28714 2583 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2584 sayNO;
b8c5462f 2585 break;
d6a28714
JH
2586 case SEOL:
2587 seol:
2588 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2589 sayNO;
d6a28714 2590 if (PL_regeol - locinput > 1)
b8c5462f 2591 sayNO;
b8c5462f 2592 break;
d6a28714
JH
2593 case EOS:
2594 if (PL_regeol != locinput)
b8c5462f 2595 sayNO;
d6a28714 2596 break;
ffc61ed2 2597 case SANY:
d6a28714 2598 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2599 sayNO;
f33976b4
DB
2600 if (do_utf8) {
2601 locinput += PL_utf8skip[nextchr];
2602 if (locinput > PL_regeol)
2603 sayNO;
2604 nextchr = UCHARAT(locinput);
2605 }
2606 else
2607 nextchr = UCHARAT(++locinput);
2608 break;
2609 case CANY:
2610 if (!nextchr && locinput >= PL_regeol)
2611 sayNO;
b8c5462f 2612 nextchr = UCHARAT(++locinput);
a0d0e21e 2613 break;
ffc61ed2 2614 case REG_ANY:
1aa99e6b
IH
2615 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2616 sayNO;
2617 if (do_utf8) {
b8c5462f 2618 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2619 if (locinput > PL_regeol)
2620 sayNO;
a0ed51b3 2621 nextchr = UCHARAT(locinput);
a0ed51b3 2622 }
1aa99e6b
IH
2623 else
2624 nextchr = UCHARAT(++locinput);
a0ed51b3 2625 break;
a3621e74
YO
2626
2627
2628
2629 /*
2630 traverse the TRIE keeping track of all accepting states
2631 we transition through until we get to a failing node.
2632
a3621e74
YO
2633
2634 */
5b47454d 2635 case TRIE:
a3621e74
YO
2636 case TRIEF:
2637 case TRIEFL:
2638 {
a3621e74
YO
2639 U8 *uc = ( U8* )locinput;
2640 U32 state = 1;
2641 U16 charid = 0;
2642 U32 base = 0;
2643 UV uvc = 0;
2644 STRLEN len = 0;
2645 STRLEN foldlen = 0;
a3621e74
YO
2646 U8 *uscan = (U8*)NULL;
2647 STRLEN bufflen=0;
95b24440 2648 SV *sv_accept_buff = NULL;
5b47454d
DM
2649 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2650 trie_type = do_utf8 ?
2651 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2652 : trie_plain;
2653
7087a21c
NC
2654 /* what trie are we using right now */
2655 reg_trie_data *trie
2656 = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
e822a8b4 2657 st->trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2658 result = 0;
a3621e74
YO
2659
2660 while ( state && uc <= (U8*)PL_regeol ) {
2661
5b47454d 2662 if (trie->states[ state ].wordnum) {
e822a8b4 2663 if (!st->trie.accepted ) {
5b47454d
DM
2664 ENTER;
2665 SAVETMPS;
2666 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2667 sv_accept_buff=newSV(bufflen *
2668 sizeof(reg_trie_accepted) - 1);
2669 SvCUR_set(sv_accept_buff,
2670 sizeof(reg_trie_accepted));
2671 SvPOK_on(sv_accept_buff);
2672 sv_2mortal(sv_accept_buff);
e822a8b4 2673 st->trie.accept_buff =
5b47454d
DM
2674 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2675 }
2676 else {
e822a8b4 2677 if (st->trie.accepted >= bufflen) {
5b47454d 2678 bufflen *= 2;
e822a8b4 2679 st->trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2680 SvGROW(sv_accept_buff,
2681 bufflen * sizeof(reg_trie_accepted));
2682 }
2683 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2684 + sizeof(reg_trie_accepted));
2685 }
e822a8b4
DM
2686 st->trie.accept_buff[st->trie.accepted].wordnum = trie->states[state].wordnum;
2687 st->trie.accept_buff[st->trie.accepted].endpos = uc;
2688 ++st->trie.accepted;
5b47454d 2689 }
a3621e74
YO
2690
2691 base = trie->states[ state ].trans.base;
2692
2693 DEBUG_TRIE_EXECUTE_r(
2694 PerlIO_printf( Perl_debug_log,
e4584336 2695 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2696 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
e822a8b4 2697 (UV)state, (UV)base, (UV)st->trie.accepted );
a3621e74
YO
2698 );
2699
2700 if ( base ) {
5b47454d
DM
2701 switch (trie_type) {
2702 case trie_uft8_fold:
a3621e74
YO
2703 if ( foldlen>0 ) {
2704 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2705 foldlen -= len;
2706 uscan += len;
2707 len=0;
2708 } else {
1df70142 2709 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2710 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2711 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2712 foldlen -= UNISKIP( uvc );
2713 uscan = foldbuf + UNISKIP( uvc );
2714 }
5b47454d
DM
2715 break;
2716 case trie_utf8:
2717 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2718 &len, uniflags );
2719 break;
2720 case trie_plain:
e4584336 2721 uvc = (UV)*uc;
a3621e74
YO
2722 len = 1;
2723 }
2724
5b47454d
DM
2725 if (uvc < 256) {
2726 charid = trie->charmap[ uvc ];
2727 }
2728 else {
2729 charid = 0;
2730 if (trie->widecharmap) {
2731 SV** svpp = (SV**)NULL;
2732 svpp = hv_fetch(trie->widecharmap,
2733 (char*)&uvc, sizeof(UV), 0);
2734 if (svpp)
2735 charid = (U16)SvIV(*svpp);
2736 }
2737 }
a3621e74 2738
5b47454d
DM
2739 if (charid &&
2740 (base + charid > trie->uniquecharcount )
2741 && (base + charid - 1 - trie->uniquecharcount
2742 < trie->lasttrans)
2743 && trie->trans[base + charid - 1 -
2744 trie->uniquecharcount].check == state)
2745 {
2746 state = trie->trans[base + charid - 1 -
2747 trie->uniquecharcount ].next;
2748 }
2749 else {
2750 state = 0;
2751 }
2752 uc += len;
2753
2754 }
2755 else {
a3621e74
YO
2756 state = 0;
2757 }
2758 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2759 PerlIO_printf( Perl_debug_log,
2760 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2761 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2762 );
2763 }
e822a8b4 2764 if (!st->trie.accepted )
a3621e74 2765 sayNO;
a3621e74
YO
2766
2767 /*
2768 There was at least one accepting state that we
2769 transitioned through. Presumably the number of accepting
2770 states is going to be low, typically one or two. So we
2771 simply scan through to find the one with lowest wordnum.
2772 Once we find it, we swap the last state into its place
2773 and decrement the size. We then try to match the rest of
2774 the pattern at the point where the word ends, if we
2775 succeed then we end the loop, otherwise the loop
2776 eventually terminates once all of the accepting states
2777 have been tried.
2778 */
a3621e74 2779
e822a8b4 2780 if ( st->trie.accepted == 1 ) {
a3621e74 2781 DEBUG_EXECUTE_r({
e822a8b4 2782 SV **tmp = av_fetch( trie->words, st->trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2783 PerlIO_printf( Perl_debug_log,
2784 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2785 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
e822a8b4 2786 st->trie.accept_buff[ 0 ].wordnum,
cfd0369c 2787 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2788 PL_colors[5] );
2789 });
e822a8b4 2790 PL_reginput = (char *)st->trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2791 /* in this case we free tmps/leave before we call regmatch
2792 as we wont be using accept_buff again. */
2793 FREETMPS;
2794 LEAVE;
95b24440
DM
2795 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2796 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2797 } else {
2798 DEBUG_EXECUTE_r(
e4584336 2799 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
e822a8b4 2800 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->trie.accepted,
a3621e74
YO
2801 PL_colors[5] );
2802 );
e822a8b4 2803 while ( !result && st->trie.accepted-- ) {
a3621e74
YO
2804 U32 best = 0;
2805 U32 cur;
e822a8b4 2806 for( cur = 1 ; cur <= st->trie.accepted ; cur++ ) {
e4584336
RB
2807 DEBUG_TRIE_EXECUTE_r(
2808 PerlIO_printf( Perl_debug_log,
2809 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2810 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
e822a8b4
DM
2811 (IV)best, st->trie.accept_buff[ best ].wordnum, (IV)cur,
2812 st->trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2813 );
a3621e74 2814
e822a8b4
DM
2815 if (st->trie.accept_buff[cur].wordnum <
2816 st->trie.accept_buff[best].wordnum)
2817 best = cur;
a3621e74
YO
2818 }
2819 DEBUG_EXECUTE_r({
e822a8b4 2820 SV ** const tmp = av_fetch( trie->words, st->trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2821 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2822 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
e822a8b4 2823 st->trie.accept_buff[best].wordnum,
cfd0369c 2824 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2825 PL_colors[5] );
2826 });
e822a8b4
DM
2827 if ( best<st->trie.accepted ) {
2828 reg_trie_accepted tmp = st->trie.accept_buff[ best ];
2829 st->trie.accept_buff[ best ] = st->trie.accept_buff[ st->trie.accepted ];
2830 st->trie.accept_buff[ st->trie.accepted ] = tmp;
2831 best = st->trie.accepted;
a3621e74 2832 }
e822a8b4 2833 PL_reginput = (char *)st->trie.accept_buff[ best ].endpos;
a3621e74
YO
2834
2835 /*
2836 as far as I can tell we only need the SAVETMPS/FREETMPS
2837 for re's with EVAL in them but I'm leaving them in for
2838 all until I can be sure.
2839 */
2840 SAVETMPS;
95b24440
DM
2841 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2842 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2843 FREETMPS;
2844 }
2845 FREETMPS;
2846 LEAVE;
2847 }
2848
95b24440 2849 if (result) {
a3621e74
YO
2850 sayYES;
2851 } else {
2852 sayNO;
2853 }
2854 }
2855 /* unreached codepoint */
95b24440
DM
2856 case EXACT: {
2857 char *s = STRING(scan);
5d9a96ca 2858 st->ln = STR_LEN(scan);
eb160463 2859 if (do_utf8 != UTF) {
bc517b45 2860 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2861 char *l = locinput;
5d9a96ca 2862 const char *e = s + st->ln;
a72c7584 2863
5ff6fc6d
JH
2864 if (do_utf8) {
2865 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2866 while (s < e) {
a3b680e6 2867 STRLEN ulen;
1aa99e6b 2868 if (l >= PL_regeol)
5ff6fc6d
JH
2869 sayNO;
2870 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2871 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2872 uniflags))
5ff6fc6d 2873 sayNO;
bc517b45 2874 l += ulen;
5ff6fc6d 2875 s ++;
1aa99e6b 2876 }
5ff6fc6d
JH
2877 }
2878 else {
2879 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2880 while (s < e) {
a3b680e6 2881 STRLEN ulen;
1aa99e6b
IH
2882 if (l >= PL_regeol)
2883 sayNO;
5ff6fc6d 2884 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2885 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2886 uniflags))
1aa99e6b 2887 sayNO;
bc517b45 2888 s += ulen;
a72c7584 2889 l ++;
1aa99e6b 2890 }
5ff6fc6d 2891 }
1aa99e6b
IH
2892 locinput = l;
2893 nextchr = UCHARAT(locinput);
2894 break;
2895 }
bc517b45 2896 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2897 /* Inline the first character, for speed. */
2898 if (UCHARAT(s) != nextchr)
2899 sayNO;
5d9a96ca 2900 if (PL_regeol - locinput < st->ln)
d6a28714 2901 sayNO;
5d9a96ca 2902 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2903 sayNO;
5d9a96ca 2904 locinput += st->ln;
d6a28714
JH
2905 nextchr = UCHARAT(locinput);
2906 break;
95b24440 2907 }
d6a28714 2908 case EXACTFL:
b8c5462f
JH
2909 PL_reg_flags |= RF_tainted;
2910 /* FALL THROUGH */
95b24440
DM
2911 case EXACTF: {
2912 char *s = STRING(scan);
5d9a96ca 2913 st->ln = STR_LEN(scan);
d6a28714 2914
d07ddd77
JH
2915 if (do_utf8 || UTF) {
2916 /* Either target or the pattern are utf8. */
d6a28714 2917 char *l = locinput;
d07ddd77 2918 char *e = PL_regeol;
bc517b45 2919
5d9a96ca 2920 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2921 l, &e, 0, do_utf8)) {
5486206c
JH
2922 /* One more case for the sharp s:
2923 * pack("U0U*", 0xDF) =~ /ss/i,
2924 * the 0xC3 0x9F are the UTF-8
2925 * byte sequence for the U+00DF. */
2926 if (!(do_utf8 &&
2927 toLOWER(s[0]) == 's' &&
5d9a96ca 2928 st->ln >= 2 &&
5486206c
JH
2929 toLOWER(s[1]) == 's' &&
2930 (U8)l[0] == 0xC3 &&
2931 e - l >= 2 &&
2932 (U8)l[1] == 0x9F))
2933 sayNO;
2934 }
d07ddd77
JH
2935 locinput = e;
2936 nextchr = UCHARAT(locinput);
2937 break;
a0ed51b3 2938 }
d6a28714 2939
bc517b45
JH
2940 /* Neither the target and the pattern are utf8. */
2941
d6a28714
JH
2942 /* Inline the first character, for speed. */
2943 if (UCHARAT(s) != nextchr &&
2944 UCHARAT(s) != ((OP(scan) == EXACTF)
2945 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2946 sayNO;
5d9a96ca 2947 if (PL_regeol - locinput < st->ln)
b8c5462f 2948 sayNO;
5d9a96ca
DM
2949 if (st->ln > 1 && (OP(scan) == EXACTF
2950 ? ibcmp(s, locinput, st->ln)
2951 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2952 sayNO;
5d9a96ca 2953 locinput += st->ln;
d6a28714 2954 nextchr = UCHARAT(locinput);
a0d0e21e 2955 break;
95b24440 2956 }
d6a28714 2957 case ANYOF:
ffc61ed2 2958 if (do_utf8) {
9e55ce06
JH
2959 STRLEN inclasslen = PL_regeol - locinput;
2960
ba7b4546 2961 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2962 sayNO_ANYOF;
ffc61ed2
JH
2963 if (locinput >= PL_regeol)
2964 sayNO;
0f0076b4 2965 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2966 nextchr = UCHARAT(locinput);
e0f9d4a8 2967 break;
ffc61ed2
JH
2968 }
2969 else {
2970 if (nextchr < 0)
2971 nextchr = UCHARAT(locinput);
7d3e948e 2972 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2973 sayNO_ANYOF;
ffc61ed2
JH
2974 if (!nextchr && locinput >= PL_regeol)
2975 sayNO;
2976 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2977 break;
2978 }
2979 no_anyof:
2980 /* If we might have the case of the German sharp s
2981 * in a casefolding Unicode character class. */
2982
ebc501f0
JH
2983 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2984 locinput += SHARP_S_SKIP;
e0f9d4a8 2985 nextchr = UCHARAT(locinput);
ffc61ed2 2986 }
e0f9d4a8
JH
2987 else
2988 sayNO;
b8c5462f 2989 break;
d6a28714 2990 case ALNUML:
b8c5462f
JH
2991 PL_reg_flags |= RF_tainted;
2992 /* FALL THROUGH */
d6a28714 2993 case ALNUM:
b8c5462f 2994 if (!nextchr)
4633a7c4 2995 sayNO;
ffc61ed2 2996 if (do_utf8) {
1a4fad37 2997 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 2998 if (!(OP(scan) == ALNUM
3568d838 2999 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3000 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3001 {
3002 sayNO;
a0ed51b3 3003 }
b8c5462f 3004 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3005 nextchr = UCHARAT(locinput);
3006 break;
3007 }
ffc61ed2 3008 if (!(OP(scan) == ALNUM
d6a28714 3009 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3010 sayNO;
b8c5462f 3011 nextchr = UCHARAT(++locinput);
a0d0e21e 3012 break;
d6a28714 3013 case NALNUML:
b8c5462f
JH
3014 PL_reg_flags |= RF_tainted;
3015 /* FALL THROUGH */
d6a28714
JH
3016 case NALNUM:
3017 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3018 sayNO;
ffc61ed2 3019 if (do_utf8) {
1a4fad37 3020 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3021 if (OP(scan) == NALNUM
3568d838 3022 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3023 : isALNUM_LC_utf8((U8*)locinput))
3024 {
b8c5462f 3025 sayNO;
d6a28714 3026 }
b8c5462f
JH
3027 locinput += PL_utf8skip[nextchr];
3028 nextchr = UCHARAT(locinput);
3029 break;
3030 }
ffc61ed2 3031 if (OP(scan) == NALNUM
d6a28714 3032 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3033 sayNO;
76e3520e 3034 nextchr = UCHARAT(++locinput);
a0d0e21e 3035 break;
d6a28714
JH
3036 case BOUNDL:
3037 case NBOUNDL:
3280af22 3038 PL_reg_flags |= RF_tainted;
bbce6d69 3039 /* FALL THROUGH */
d6a28714
JH
3040 case BOUND:
3041 case NBOUND:
3042 /* was last char in word? */
ffc61ed2 3043 if (do_utf8) {
12d33761 3044 if (locinput == PL_bostr)
5d9a96ca 3045 st->ln = '\n';
ffc61ed2 3046 else {
a3b680e6 3047 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3048
a0374537 3049 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
3050 }
3051 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3052 st->ln = isALNUM_uni(st->ln);
1a4fad37 3053 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3054 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3055 }
3056 else {
5d9a96ca 3057 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3058 n = isALNUM_LC_utf8((U8*)locinput);
3059 }
a0ed51b3 3060 }
d6a28714 3061 else {
5d9a96ca 3062 st->ln = (locinput != PL_bostr) ?
12d33761 3063 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3064 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3065 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3066 n = isALNUM(nextchr);
3067 }
3068 else {
5d9a96ca 3069 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3070 n = isALNUM_LC(nextchr);
3071 }
d6a28714 3072 }
5d9a96ca 3073 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3074 OP(scan) == BOUNDL))
3075 sayNO;
a0ed51b3 3076 break;
d6a28714 3077 case SPACEL:
3280af22 3078 PL_reg_flags |= RF_tainted;
bbce6d69 3079 /* FALL THROUGH */
d6a28714 3080 case SPACE:
9442cb0e 3081 if (!nextchr)
4633a7c4 3082 sayNO;
1aa99e6b 3083 if (do_utf8) {
fd400ab9 3084 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3085 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3086 if (!(OP(scan) == SPACE
3568d838 3087 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3088 : isSPACE_LC_utf8((U8*)locinput)))
3089 {
3090 sayNO;
3091 }
3092 locinput += PL_utf8skip[nextchr];
3093 nextchr = UCHARAT(locinput);
3094 break;
d6a28714 3095 }
ffc61ed2
JH
3096 if (!(OP(scan) == SPACE
3097 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3098 sayNO;
3099 nextchr = UCHARAT(++locinput);
3100 }
3101 else {
3102 if (!(OP(scan) == SPACE
3103 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3104 sayNO;
3105 nextchr = UCHARAT(++locinput);
a0ed51b3 3106 }
a0ed51b3 3107 break;
d6a28714 3108 case NSPACEL:
3280af22 3109 PL_reg_flags |= RF_tainted;
bbce6d69 3110 /* FALL THROUGH */
d6a28714 3111 case NSPACE:
9442cb0e 3112 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3113 sayNO;
1aa99e6b 3114 if (do_utf8) {
1a4fad37 3115 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3116 if (OP(scan) == NSPACE
3568d838 3117 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3118 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3119 {
3120 sayNO;
3121 }
3122 locinput += PL_utf8skip[nextchr];
3123 nextchr = UCHARAT(locinput);
3124 break;
a0ed51b3 3125 }
ffc61ed2 3126 if (OP(scan) == NSPACE
d6a28714 3127 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3128 sayNO;
76e3520e 3129 nextchr = UCHARAT(++locinput);
a0d0e21e 3130 break;
d6a28714 3131 case DIGITL:
a0ed51b3
LW
3132 PL_reg_flags |= RF_tainted;
3133 /* FALL THROUGH */
d6a28714 3134 case DIGIT:
9442cb0e 3135 if (!nextchr)
a0ed51b3 3136 sayNO;
1aa99e6b 3137 if (do_utf8) {
1a4fad37 3138 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3139 if (!(OP(scan) == DIGIT
3568d838 3140 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3141 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3142 {
a0ed51b3 3143 sayNO;
dfe13c55 3144 }
6f06b55f 3145 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3146 nextchr = UCHARAT(locinput);
3147 break;
3148 }
ffc61ed2 3149 if (!(OP(scan) == DIGIT
9442cb0e 3150 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3151 sayNO;
3152 nextchr = UCHARAT(++locinput);
3153 break;
d6a28714 3154 case NDIGITL:
b8c5462f
JH
3155 PL_reg_flags |= RF_tainted;
3156 /* FALL THROUGH */
d6a28714 3157 case NDIGIT:
9442cb0e 3158 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3159 sayNO;
1aa99e6b 3160 if (do_utf8) {
1a4fad37 3161 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3162 if (OP(scan) == NDIGIT
3568d838 3163 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3164 : isDIGIT_LC_utf8((U8*)locinput))
3165 {
a0ed51b3 3166 sayNO;
9442cb0e 3167 }
6f06b55f 3168 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3169 nextchr = UCHARAT(locinput);
3170 break;
3171 }
ffc61ed2 3172 if (OP(scan) == NDIGIT
9442cb0e 3173 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3174 sayNO;
3175 nextchr = UCHARAT(++locinput);
3176 break;
3177 case CLUMP:
b7c83a7e 3178 if (locinput >= PL_regeol)
a0ed51b3 3179 sayNO;
b7c83a7e 3180 if (do_utf8) {
1a4fad37 3181 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3182 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3183 sayNO;
3184 locinput += PL_utf8skip[nextchr];
3185 while (locinput < PL_regeol &&
3186 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3187 locinput += UTF8SKIP(locinput);
3188 if (locinput > PL_regeol)
3189 sayNO;
eb08e2da
JH
3190 }
3191 else
3192 locinput++;
a0ed51b3
LW
3193 nextchr = UCHARAT(locinput);
3194 break;
c8756f30 3195 case REFFL:
3280af22 3196 PL_reg_flags |= RF_tainted;
c8756f30 3197 /* FALL THROUGH */
c277df42 3198 case REF:
95b24440
DM
3199 case REFF: {
3200 char *s;
c277df42 3201 n = ARG(scan); /* which paren pair */
5d9a96ca 3202 st->ln = PL_regstartp[n];
2c2d71f5 3203 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3204 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3205 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3206 if (st->ln == PL_regendp[n])
a0d0e21e 3207 break;
a0ed51b3 3208
5d9a96ca 3209 s = PL_bostr + st->ln;
1aa99e6b 3210 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3211 char *l = locinput;
a3b680e6 3212 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3213 /*
3214 * Note that we can't do the "other character" lookup trick as
3215 * in the 8-bit case (no pun intended) because in Unicode we
3216 * have to map both upper and title case to lower case.
3217 */
3218 if (OP(scan) == REFF) {
3219 while (s < e) {
a3b680e6
AL
3220 STRLEN ulen1, ulen2;
3221 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3222 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3223
a0ed51b3
LW
3224 if (l >= PL_regeol)
3225 sayNO;
a2a2844f
JH
3226 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3227 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3228 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3229 sayNO;
a2a2844f
JH
3230 s += ulen1;
3231 l += ulen2;
a0ed51b3
LW
3232 }
3233 }
3234 locinput = l;
3235 nextchr = UCHARAT(locinput);
3236 break;
3237 }
3238
a0d0e21e 3239 /* Inline the first character, for speed. */
76e3520e 3240 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3241 (OP(scan) == REF ||
3242 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3243 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3244 sayNO;
5d9a96ca
DM
3245 st->ln = PL_regendp[n] - st->ln;
3246 if (locinput + st->ln > PL_regeol)
4633a7c4 3247 sayNO;
5d9a96ca
DM
3248 if (st->ln > 1 && (OP(scan) == REF
3249 ? memNE(s, locinput, st->ln)
c8756f30 3250 : (OP(scan) == REFF
5d9a96ca
DM
3251 ? ibcmp(s, locinput, st->ln)
3252 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3253 sayNO;
5d9a96ca 3254 locinput += st->ln;
76e3520e 3255 nextchr = UCHARAT(locinput);
a0d0e21e 3256 break;
95b24440 3257 }
a0d0e21e
LW
3258
3259 case NOTHING:
c277df42 3260 case TAIL:
a0d0e21e
LW
3261 break;
3262 case BACK:
3263 break;
c277df42
IZ
3264 case EVAL:
3265 {
3266 dSP;
6136c704
AL
3267 OP_4tree * const oop = PL_op;
3268 COP * const ocurcop = PL_curcop;
f3548bdc 3269 PAD *old_comppad;
c277df42 3270 SV *ret;
6136c704 3271 struct regexp * const oreg = PL_reg_re;
9041c2e3 3272
c277df42 3273 n = ARG(scan);
533c011a 3274 PL_op = (OP_4tree*)PL_regdata->data[n];
a3621e74 3275 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f3548bdc 3276 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 3277 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 3278
8e5e9ebe 3279 {
6136c704 3280 SV ** const before = SP;
8e5e9ebe
RGS
3281 CALLRUNOPS(aTHX); /* Scalar context. */
3282 SPAGAIN;
3283 if (SP == before)
075aa684 3284 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3285 else {
3286 ret = POPs;
3287 PUTBACK;
3288 }
3289 }
3290
0f5d15d6 3291 PL_op = oop;
f3548bdc 3292 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 3293 PL_curcop = ocurcop;
5d9a96ca
DM
3294 if (st->logical) {
3295 if (st->logical == 2) { /* Postponed subexpression. */
0f5d15d6 3296 regexp *re;
6136c704 3297 MAGIC *mg = NULL;
0f5d15d6 3298 re_cc_state state;
cb50f42d 3299 int toggleutf;
faf82a0b 3300 register SV *sv;
0f5d15d6 3301
faf82a0b
AE
3302 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3303 mg = mg_find(sv, PERL_MAGIC_qr);
3304 else if (SvSMAGICAL(ret)) {
3305 if (SvGMAGICAL(ret))
3306 sv_unmagic(ret, PERL_MAGIC_qr);
3307 else
3308 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3309 }
faf82a0b 3310
0f5d15d6
IZ
3311 if (mg) {
3312 re = (regexp *)mg->mg_obj;
df0003d4 3313 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3314 }
3315 else {
3316 STRLEN len;
6136c704 3317 const char * const t = SvPV_const(ret, len);
0f5d15d6 3318 PMOP pm;
a3b680e6
AL
3319 char * const oprecomp = PL_regprecomp;
3320 const I32 osize = PL_regsize;
3321 const I32 onpar = PL_regnpar;
0f5d15d6 3322
5fcd1c1b 3323 Zero(&pm, 1, PMOP);
cb50f42d 3324 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3325 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3326 if (!(SvFLAGS(ret)
faf82a0b
AE
3327 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3328 | SVs_GMG)))
14befaf4
DM
3329 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3330 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
3331 PL_regprecomp = oprecomp;
3332 PL_regsize = osize;
3333 PL_regnpar = onpar;
3334 }
a3621e74 3335 DEBUG_EXECUTE_r(
9041c2e3 3336 PerlIO_printf(Perl_debug_log,
a0288114 3337 "Entering embedded \"%s%.60s%s%s\"\n",
0f5d15d6
IZ
3338 PL_colors[0],
3339 re->precomp,
3340 PL_colors[1],
3341 (strlen(re->precomp) > 60 ? "..." : ""))
3342 );
3343 state.node = next;
3344 state.prev = PL_reg_call_cc;
5d9a96ca 3345 state.cc = st->cc;
0f5d15d6
IZ
3346 state.re = PL_reg_re;
3347
5d9a96ca 3348 st->cc = 0;
9041c2e3 3349
e822a8b4
DM
3350 st->eval.cp = regcppush(0); /* Save *all* the positions. */
3351 REGCP_SET(st->eval.lastcp);
0f5d15d6
IZ
3352 cache_re(re);
3353 state.ss = PL_savestack_ix;
3354 *PL_reglastparen = 0;
a01268b5 3355 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
3356 PL_reg_call_cc = &state;
3357 PL_reginput = locinput;
cb50f42d
YST
3358 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3359 ((re->reganch & ROPT_UTF8) != 0);
3360 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3361
3362 /* XXXX This is too dramatic a measure... */
3363 PL_reg_maxiter = 0;
3364
95b24440 3365 /* XXX the only recursion left in regmatch() */
0f5d15d6 3366 if (regmatch(re->program + 1)) {
2c914db6
IZ
3367 /* Even though we succeeded, we need to restore
3368 global variables, since we may be wrapped inside
3369 SUSPEND, thus the match may be not finished yet. */
3370
3371 /* XXXX Do this only if SUSPENDed? */
3372 PL_reg_call_cc = state.prev;
5d9a96ca 3373 st->cc = state.cc;
2c914db6
IZ
3374 PL_reg_re = state.re;
3375 cache_re(PL_reg_re);
cb50f42d 3376 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
3377
3378 /* XXXX This is too dramatic a measure... */
3379 PL_reg_maxiter = 0;
3380
3381 /* These are needed even if not SUSPEND. */
0f5d15d6 3382 ReREFCNT_dec(re);
e822a8b4 3383 regcpblow(st->eval.cp);
0f5d15d6
IZ
3384 sayYES;
3385 }
0f5d15d6 3386 ReREFCNT_dec(re);
e822a8b4 3387 REGCP_UNWIND(st->eval.lastcp);
0f5d15d6
IZ
3388 regcppop();
3389 PL_reg_call_cc = state.prev;
5d9a96ca 3390 st->cc = state.cc;
0f5d15d6 3391 PL_reg_re = state.re;
d3790889 3392 cache_re(PL_reg_re);
cb50f42d 3393 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3394
3395 /* XXXX This is too dramatic a measure... */
3396 PL_reg_maxiter = 0;
3397
5d9a96ca 3398 st->logical = 0;
0f5d15d6
IZ
3399 sayNO;
3400 }
5d9a96ca
DM
3401 st->sw = SvTRUE(ret);
3402 st->logical = 0;
a0ed51b3 3403 }
080c2dec 3404 else {
3280af22 3405 sv_setsv(save_scalar(PL_replgv), ret);
080c2dec
AE
3406 cache_re(oreg);
3407 }
c277df42
IZ
3408 break;
3409 }
a0d0e21e 3410 case OPEN:
c277df42 3411 n = ARG(scan); /* which paren pair */
3280af22
NIS
3412 PL_reg_start_tmp[n] = locinput;
3413 if (n > PL_regsize)
3414 PL_regsize = n;
a0d0e21e
LW
3415 break;
3416 case CLOSE:
c277df42 3417 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3418 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3419 PL_regendp[n] = locinput - PL_bostr;
eb160463 3420 if (n > (I32)*PL_reglastparen)
3280af22 3421 *PL_reglastparen = n;
a01268b5 3422 *PL_reglastcloseparen = n;
a0d0e21e 3423 break;
c277df42
IZ
3424 case GROUPP:
3425 n = ARG(scan); /* which paren pair */
5d9a96ca 3426 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3427 break;
3428 case IFTHEN:
2c2d71f5 3429 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3430 if (st->sw)
c277df42
IZ
3431 next = NEXTOPER(NEXTOPER(scan));
3432 else {
3433 next = scan + ARG(scan);
3434 if (OP(next) == IFTHEN) /* Fake one. */
3435 next = NEXTOPER(NEXTOPER(next));
3436 }
3437 break;
3438 case LOGICAL:
5d9a96ca 3439 st->logical = scan->flags;
c277df42 3440 break;
2ab05381 3441/*******************************************************************
a0374537
DM
3442 cc points to the regmatch_state associated with the most recent CURLYX.
3443 This struct contains info about the innermost (...)* loop (an
3444 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3445
3446 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3447
95b24440 3448 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3449
a0374537 3450 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3451 with the starting point at WHILEM node;
2ab05381
IZ
3452
3453 3) Each hit of WHILEM node tries to match A and Z (in the order
3454 depending on the current iteration, min/max of {min,max} and
3455 greediness). The information about where are nodes for "A"
a0374537 3456 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3457 was already matched, and greediness.
3458
3459 4) After A matches, the same WHILEM node is hit again.
3460
95b24440 3461 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3462 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3463 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3464 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3465 of the external loop.
3466
a0374537 3467 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3468 and whatever it mentions via ->next, and additional attached trees
3469 corresponding to temporarily unset infoblocks as in "5" above.
3470
95b24440 3471 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3472 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3473 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3474 infoblocks are drawn below the "reset" infoblock.
3475
3476 In fact in the picture below we do not show failed matches for Z and T
3477 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3478 more obvious *why* one needs to *temporary* unset infoblocks.]
3479
3480 Matched REx position InfoBlocks Comment
3481 (Y(A)*?Z)*?T x
3482 Y(A)*?Z)*?T x <- O
3483 Y (A)*?Z)*?T x <- O
3484 Y A)*?Z)*?T x <- O <- I
3485 YA )*?Z)*?T x <- O <- I
3486 YA A)*?Z)*?T x <- O <- I
3487 YAA )*?Z)*?T x <- O <- I
3488 YAA Z)*?T x <- O # Temporary unset I
3489 I
3490
3491 YAAZ Y(A)*?Z)*?T x <- O
3492 I
3493
3494 YAAZY (A)*?Z)*?T x <- O
3495 I
3496
3497 YAAZY A)*?Z)*?T x <- O <- I
3498 I
3499
3500 YAAZYA )*?Z)*?T x <- O <- I
3501 I
3502
3503 YAAZYA Z)*?T x <- O # Temporary unset I
3504 I,I
3505
3506 YAAZYAZ )*?T x <- O
3507 I,I
3508
3509 YAAZYAZ T x # Temporary unset O
3510 O
3511 I,I
3512
3513 YAAZYAZT x
3514 O
3515 I,I
3516 *******************************************************************/
95b24440 3517
a0d0e21e 3518 case CURLYX: {
cb434fcc
IZ
3519 /* No need to save/restore up to this paren */
3520 I32 parenfloor = scan->flags;
c277df42
IZ
3521
3522 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3523 next += ARG(next);
cb434fcc
IZ
3524 /* XXXX Probably it is better to teach regpush to support
3525 parenfloor > PL_regsize... */
eb160463 3526 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3527 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537
DM
3528
3529 st->curlyx.cp = PL_savestack_ix;
3530 st->curlyx.outercc = st->cc;
3531 st->cc = st;
3532 /* these fields contain the state of the current curly.
3533 * they are accessed by subsequent WHILEMs;
3534 * cur and lastloc are also updated by WHILEM */
3535 st->curlyx.parenfloor = parenfloor;
3536 st->curlyx.cur = -1; /* this will be updated by WHILEM */
3537 st->curlyx.min = ARG1(scan);
3538 st->curlyx.max = ARG2(scan);
3539 st->curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3540 st->curlyx.lastloc = 0;
3541 /* st->next and st->minmod are also read by WHILEM */
3542
3280af22 3543 PL_reginput = locinput;
95b24440
DM
3544 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3545 /*** all unsaved local vars undefined at this point */
e822a8b4 3546 regcpblow(st->curlyx.cp);
a0374537 3547 st->cc = st->curlyx.outercc;
95b24440 3548 saySAME(result);
a0d0e21e 3549 }
5f66b61c 3550 /* NOTREACHED */
a0d0e21e
LW
3551 case WHILEM: {
3552 /*
3553 * This is really hard to understand, because after we match
3554 * what we're trying to match, we must make sure the rest of
2c2d71f5 3555 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3556 * to go back UP the parse tree by recursing ever deeper. And
3557 * if it fails, we have to reset our parent's current state
3558 * that we can try again after backing off.
3559 */
3560
a0374537 3561 st->whilem.lastloc = st->cc->curlyx.lastloc; /* Detection of 0-len. */
e822a8b4
DM
3562 st->whilem.cache_offset = 0;
3563 st->whilem.cache_bit = 0;
c277df42 3564
a0374537 3565 n = st->cc->curlyx.cur + 1; /* how many we know we matched */
3280af22 3566 PL_reginput = locinput;
a0d0e21e 3567
a3621e74 3568 DEBUG_EXECUTE_r(
9041c2e3 3569 PerlIO_printf(Perl_debug_log,
91f3b821 3570 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3571 REPORT_CODE_OFF+PL_regindent*2, "",
a0374537
DM
3572 (long)n, (long)st->cc->curlyx.min,
3573 (long)st->cc->curlyx.max, PTR2UV(st->cc))
c277df42 3574 );
4633a7c4 3575
a0d0e21e
LW
3576 /* If degenerate scan matches "", assume scan done. */
3577
a0374537 3578 if (locinput == st->cc->curlyx.lastloc && n >= st->cc->curlyx.min) {
e822a8b4 3579 st->whilem.savecc = st->cc;
a0374537 3580 st->cc = st->cc->curlyx.outercc;
5d9a96ca 3581 if (st->cc)
a0374537 3582 st->ln = st->cc->curlyx.cur;
a3621e74 3583 DEBUG_EXECUTE_r(
c3464db5
DD
3584 PerlIO_printf(Perl_debug_log,
3585 "%*s empty match detected, try continuation...\n",
3280af22 3586 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3587 );
e822a8b4 3588 REGMATCH(st->whilem.savecc->next, WHILEM1);
95b24440 3589 /*** all unsaved local vars undefined at this point */
e822a8b4 3590 st->cc = st->whilem.savecc;
95b24440 3591 if (result)
4633a7c4 3592 sayYES;
a0374537
DM
3593 if (st->cc->curlyx.outercc)
3594 st->cc->curlyx.outercc->curlyx.cur = st->ln;
4633a7c4 3595 sayNO;
a0d0e21e
LW
3596 }
3597
3598 /* First just match a string of min scans. */
3599
a0374537
DM
3600 if (n < st->cc->curlyx.min) {
3601 st->cc->curlyx.cur = n;
3602 st->cc->curlyx.lastloc = locinput;
3603 REGMATCH(st->cc->curlyx.scan, WHILEM2);
95b24440
DM
3604 /*** all unsaved local vars undefined at this point */
3605 if (result)
4633a7c4 3606 sayYES;
a0374537
DM
3607 st->cc->curlyx.cur = n - 1;
3608 st->cc->curlyx.lastloc = st->whilem.lastloc;
4633a7c4 3609 sayNO;
a0d0e21e
LW
3610 }
3611
2c2d71f5
JH
3612 if (scan->flags) {
3613 /* Check whether we already were at this position.
3614 Postpone detection until we know the match is not
3615 *that* much linear. */
3616 if (!PL_reg_maxiter) {
3617 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3618 PL_reg_leftiter = PL_reg_maxiter;
3619 }
3620 if (PL_reg_leftiter-- == 0) {
a3b680e6 3621 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3622 if (PL_reg_poscache) {
eb160463 3623 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3624 Renew(PL_reg_poscache, size, char);
3625 PL_reg_poscache_size = size;
3626 }
3627 Zero(PL_reg_poscache, size, char);
3628 }
3629 else {
3630 PL_reg_poscache_size = size;
a02a5408 3631 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3632 }
a3621e74 3633 DEBUG_EXECUTE_r(
2c2d71f5
JH
3634 PerlIO_printf(Perl_debug_log,
3635 "%sDetected a super-linear match, switching on caching%s...\n",
3636 PL_colors[4], PL_colors[5])
3637 );
3638 }
3639 if (PL_reg_leftiter < 0) {
e822a8b4 3640 st->whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3641
e822a8b4
DM
3642 st->whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3643 + st->whilem.cache_offset * (scan->flags>>4);
3644 st->whilem.cache_bit = st->whilem.cache_offset % 8;
3645 st->whilem.cache_offset /= 8;
3646 if (PL_reg_poscache[st->whilem.cache_offset] & (1<<st->whilem.cache_bit)) {
a3621e74 3647 DEBUG_EXECUTE_r(
2c2d71f5
JH
3648 PerlIO_printf(Perl_debug_log,
3649 "%*s already tried at this position...\n",
3650 REPORT_CODE_OFF+PL_regindent*2, "")
3651 );
3ab3c9b4
HS
3652 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3653 /* cache records success */
c2b0868c
HS
3654 sayYES;
3655 else
3ab3c9b4 3656 /* cache records failure */
c2b0868c 3657 sayNO_SILENT;
2c2d71f5 3658 }
e822a8b4 3659 PL_reg_poscache[st->whilem.cache_offset] |= (1<<st->whilem.cache_bit);
2c2d71f5
JH
3660 }
3661 }
3662
a0d0e21e
LW
3663 /* Prefer next over scan for minimal matching. */
3664
5d9a96ca 3665 if (st->cc->minmod) {
e822a8b4 3666 st->whilem.savecc = st->cc;
a0374537 3667 st->cc = st->cc->curlyx.outercc;
5d9a96ca 3668 if (st->cc)
a0374537
DM
3669 st->ln = st->cc->curlyx.cur;
3670 st->whilem.cp = regcppush(st->whilem.savecc->curlyx.parenfloor);
e822a8b4
DM
3671 REGCP_SET(st->whilem.lastcp);
3672 REGMATCH(st->whilem.savecc->next, WHILEM3);
95b24440 3673 /*** all unsaved local vars undefined at this point */
e822a8b4 3674 st->cc = st->whilem.savecc;
95b24440 3675 if (result) {
e822a8b4 3676 regcpblow(st->whilem.cp);
3ab3c9b4 3677 CACHEsayYES; /* All done. */
5f05dabc 3678 }
e822a8b4 3679 REGCP_UNWIND(st->whilem.lastcp);
5f05dabc 3680 regcppop();
a0374537
DM
3681 if (st->cc->curlyx.outercc)
3682 st->cc->curlyx.outercc->curlyx.cur = st->ln;
a0d0e21e 3683
a0374537 3684 if (n >= st->cc->curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3685 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3686 && !(PL_reg_flags & RF_warned)) {
3687 PL_reg_flags |= RF_warned;
9014280d 3688 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3689 "Complex regular subexpression recursion",
3690 REG_INFTY - 1);
c277df42 3691 }
3ab3c9b4 3692 CACHEsayNO;
c277df42 3693 }
a687059c 3694
a3621e74 3695 DEBUG_EXECUTE_r(
c3464db5
DD
3696 PerlIO_printf(Perl_debug_log,
3697 "%*s trying longer...\n",
3280af22 3698 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3699 );
a0d0e21e 3700 /* Try scanning more and see if it helps. */
3280af22 3701 PL_reginput = locinput;
a0374537
DM
3702 st->cc->curlyx.cur = n;
3703 st->cc->curlyx.lastloc = locinput;
3704 st->whilem.cp = regcppush(st->cc->curlyx.parenfloor);
e822a8b4 3705 REGCP_SET(st->whilem.lastcp);
a0374537 3706 REGMATCH(st->cc->curlyx.scan, WHILEM4);
95b24440
DM
3707 /*** all unsaved local vars undefined at this point */
3708 if (result) {
e822a8b4 3709 regcpblow(st->whilem.cp);
3ab3c9b4 3710 CACHEsayYES;
5f05dabc 3711 }
e822a8b4 3712 REGCP_UNWIND(st->whilem.lastcp);
5f05dabc 3713 regcppop();
a0374537
DM
3714 st->cc->curlyx.cur = n - 1;
3715 st->cc->curlyx.lastloc = st->whilem.lastloc;
3ab3c9b4 3716 CACHEsayNO;
a0d0e21e
LW
3717 }
3718
3719 /* Prefer scan over next for maximal matching. */
3720
a0374537
DM
3721 if (n < st->cc->curlyx.max) { /* More greed allowed? */
3722 st->whilem.cp = regcppush(st->cc->curlyx.parenfloor);
3723 st->cc->curlyx.cur = n;
3724 st->cc->curlyx.lastloc = locinput;
e822a8b4 3725 REGCP_SET(st->whilem.lastcp);
a0374537 3726 REGMATCH(st->cc->curlyx.scan, WHILEM5);
95b24440
DM
3727 /*** all unsaved local vars undefined at this point */
3728 if (result) {
e822a8b4 3729 regcpblow(st->whilem.cp);
3ab3c9b4 3730 CACHEsayYES;
5f05dabc 3731 }
e822a8b4 3732 REGCP_UNWIND(st->whilem.lastcp);
a0d0e21e 3733 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3734 PL_reginput = locinput;
a3621e74 3735 DEBUG_EXECUTE_r(
c3464db5
DD
3736 PerlIO_printf(Perl_debug_log,
3737 "%*s failed, try continuation...\n",
3280af22 3738 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3739 );
3740 }
9041c2e3 3741 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3742 && !(PL_reg_flags & RF_warned)) {
3280af22 3743 PL_reg_flags |= RF_warned;
9014280d 3744 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3745 "Complex regular subexpression recursion",
3746 REG_INFTY - 1);
a0d0e21e
LW
3747 }
3748
3749 /* Failed deeper matches of scan, so see if this one works. */
e822a8b4 3750 st->whilem.savecc = st->cc;
a0374537 3751 st->cc = st->cc->curlyx.outercc;
5d9a96ca 3752 if (st->cc)
a0374537 3753 st->ln = st->cc->curlyx.cur;
e822a8b4 3754 REGMATCH(st->whilem.savecc->next, WHILEM6);
95b24440 3755 /*** all unsaved local vars undefined at this point */
e822a8b4 3756 st->cc = st->whilem.savecc;
95b24440 3757 if (result)
3ab3c9b4 3758 CACHEsayYES;
a0374537
DM
3759 if (st->cc->curlyx.outercc)
3760 st->cc->curlyx.outercc->curlyx.cur = st->ln;
3761 st->cc->curlyx.cur = n - 1;
3762 st->cc->curlyx.lastloc = st->whilem.lastloc;
3ab3c9b4 3763 CACHEsayNO;
a0d0e21e 3764 }
5f66b61c 3765 /* NOTREACHED */
9041c2e3 3766 case BRANCHJ:
c277df42
IZ
3767 next = scan + ARG(scan);
3768 if (next == scan)
3769 next = NULL;
3770 inner = NEXTOPER(NEXTOPER(scan));
3771 goto do_branch;
9041c2e3 3772 case BRANCH:
c277df42
IZ
3773 inner = NEXTOPER(scan);
3774 do_branch:
3775 {
e822a8b4
DM
3776 I32 type;
3777 type = OP(scan);
3778 if (OP(next) != type) /* No choice. */
c277df42 3779 next = inner; /* Avoid recursion. */
a0d0e21e 3780 else {
a3b680e6 3781 const I32 lastparen = *PL_reglastparen;
02db2b7b 3782 /* Put unwinding data on stack */
6136c704
AL
3783 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3784 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3785
5d9a96ca
DM
3786 uw->prev = st->unwind;
3787 st->unwind = unwind1;
e822a8b4 3788 uw->type = ((type == BRANCH)
02db2b7b
IZ
3789 ? RE_UNWIND_BRANCH
3790 : RE_UNWIND_BRANCHJ);
3791 uw->lastparen = lastparen;
3792 uw->next = next;
3793 uw->locinput = locinput;
3794 uw->nextchr = nextchr;
3795#ifdef DEBUGGING
3796 uw->regindent = ++PL_regindent;
3797#endif
c277df42 3798
02db2b7b
IZ
3799 REGCP_SET(uw->lastcp);
3800
3801 /* Now go into the first branch */
3802 next = inner;
a687059c 3803 }
a0d0e21e
LW
3804 }
3805 break;
3806 case MINMOD:
5d9a96ca 3807 st->minmod = 1;
a0d0e21e 3808 break;
c277df42
IZ
3809 case CURLYM:
3810 {
e822a8b4 3811 st->curlym.l = st->curlym.matches = 0;
9041c2e3 3812
c277df42 3813 /* We suppose that the next guy does not need
0e788c72 3814 backtracking: in particular, it is of constant non-zero length,
c277df42 3815 and has no parenths to influence future backrefs. */
5d9a96ca 3816 st->ln = ARG1(scan); /* min to match */
c277df42 3817 n = ARG2(scan); /* max to match */
e822a8b4
DM
3818 st->curlym.paren = scan->flags;
3819 if (st->curlym.paren) {
3820 if (st->curlym.paren > PL_regsize)
3821 PL_regsize = st->curlym.paren;
3822 if (st->curlym.paren > (I32)*PL_reglastparen)
3823 *PL_reglastparen = st->curlym.paren;
c277df42 3824 }
dc45a647 3825 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
e822a8b4 3826 if (st->curlym.paren)
c277df42 3827 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3828 PL_reginput = locinput;
e822a8b4
DM
3829 st->curlym.maxwanted = st->minmod ? st->ln : n;
3830 if (st->curlym.maxwanted) {
3831 while (PL_reginput < PL_regeol && st->curlym.matches < st->curlym.maxwanted) {
95b24440
DM
3832 REGMATCH(scan, CURLYM1);
3833 /*** all unsaved local vars undefined at this point */
3834 if (!result)
6407bf3b 3835 break;
e822a8b4
DM
3836 /* on first match, determine length, curlym.l */
3837 if (!st->curlym.matches++) {
6407bf3b
DM
3838 if (PL_reg_match_utf8) {
3839 char *s = locinput;
3840 while (s < PL_reginput) {
e822a8b4 3841 st->curlym.l++;
6407bf3b
DM
3842 s += UTF8SKIP(s);
3843 }
3844 }
3845 else {
e822a8b4 3846 st->curlym.l = PL_reginput - locinput;
6407bf3b 3847 }
e822a8b4
DM
3848 if (st->curlym.l == 0) {
3849 st->curlym.matches = st->curlym.maxwanted;
6407bf3b
DM
3850 break;
3851 }
3852 }
3853 locinput = PL_reginput;
3854 }
3855 }
3856
3857 PL_reginput = locinput;
3858
5d9a96ca
DM
3859 if (st->minmod) {
3860 st->minmod = 0;
e822a8b4 3861 if (st->ln && st->curlym.matches < st->ln)
c277df42 3862 sayNO;
cca55fe3 3863 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3864 regnode *text_node = next;
3865
cca55fe3 3866 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3867
e822a8b4 3868 if (! HAS_TEXT(text_node)) st->curlym.c1 = st->curlym.c2 = -1000;
5f80c4cf 3869 else {
cca55fe3 3870 if (PL_regkind[(U8)OP(text_node)] == REF) {
e822a8b4 3871 st->curlym.c1 = st->curlym.c2 = -1000;
44a68960 3872 goto assume_ok_MM;
cca55fe3 3873 }
e822a8b4 3874 else { st->curlym.c1 = (U8)*STRING(text_node); }
af5decee 3875 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
e822a8b4 3876 st->curlym.c2 = PL_fold[st->curlym.c1];
af5decee 3877 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
e822a8b4 3878 st->curlym.c2 = PL_fold_locale[st->curlym.c1];
5f80c4cf 3879 else
e822a8b4 3880 st->curlym.c2 = st->curlym.c1;
5f80c4cf 3881 }
a0ed51b3
LW
3882 }
3883 else
e822a8b4 3884 st->curlym.c1 = st->curlym.c2 = -1000;
cca55fe3 3885 assume_ok_MM:
e822a8b4 3886 REGCP_SET(st->curlym.lastcp);
5d9a96ca 3887 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
c277df42 3888 /* If it could work, try it. */
e822a8b4
DM
3889 if (st->curlym.c1 == -1000 ||
3890 UCHARAT(PL_reginput) == st->curlym.c1 ||
3891 UCHARAT(PL_reginput) == st->curlym.c2)
c277df42 3892 {
e822a8b4 3893 if (st->curlym.paren) {
5d9a96ca 3894 if (st->ln) {
e822a8b4
DM
3895 PL_regstartp[st->curlym.paren] =
3896 HOPc(PL_reginput, -st->curlym.l) - PL_bostr;
3897 PL_regendp[st->curlym.paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3898 }
3899 else
e822a8b4 3900 PL_regendp[st->curlym.paren] = -1;
c277df42 3901 }
95b24440
DM
3902 REGMATCH(next, CURLYM2);
3903 /*** all unsaved local vars undefined at this point */
3904 if (result)
c277df42 3905 sayYES;
e822a8b4 3906 REGCP_UNWIND(st->curlym.lastcp);
c277df42
IZ
3907 }
3908 /* Couldn't or didn't -- move forward. */
3280af22 3909 PL_reginput = locinput;
95b24440
DM
3910 REGMATCH(scan, CURLYM3);
3911 /*** all unsaved local vars undefined at this point */
3912 if (result) {
5d9a96ca 3913 st->ln++;
3280af22 3914 locinput = PL_reginput;
c277df42
IZ
3915 }
3916 else
3917 sayNO;
3918 }
a0ed51b3
LW
3919 }
3920 else {
a3621e74 3921 DEBUG_EXECUTE_r(
5c0ca799 3922 PerlIO_printf(Perl_debug_log,
95b24440
DM
3923 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3924 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
e822a8b4 3925 (IV) st->curlym.matches, (IV)st->curlym.l)
c277df42 3926 );
e822a8b4 3927 if (st->curlym.matches >= st->ln) {
cca55fe3 3928 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3929 regnode *text_node = next;
3930
cca55fe3 3931 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3932
e822a8b4 3933 if (! HAS_TEXT(text_node)) st->curlym.c1 = st->curlym.c2 = -1000;
5f80c4cf 3934 else {
cca55fe3 3935 if (PL_regkind[(U8)OP(text_node)] == REF) {
e822a8b4 3936 st->curlym.c1 = st->curlym.c2 = -1000;
44a68960 3937 goto assume_ok_REG;
cca55fe3 3938 }
e822a8b4 3939 else { st->curlym.c1 = (U8)*STRING(text_node); }
cca55fe3 3940
af5decee 3941 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
e822a8b4 3942 st->curlym.c2 = PL_fold[st->curlym.c1];
af5decee 3943 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
e822a8b4 3944 st->curlym.c2 = PL_fold_locale[st->curlym.c1];
5f80c4cf 3945 else
e822a8b4 3946 st->curlym.c2 = st->curlym.c1;
5f80c4cf 3947 }
a0ed51b3
LW
3948 }
3949 else
e822a8b4 3950 st->curlym.c1 = st->curlym.c2 = -1000;
c277df42 3951 }
cca55fe3 3952 assume_ok_REG:
e822a8b4
DM
3953 REGCP_SET(st->curlym.lastcp);
3954 while (st->curlym.matches >= st->ln) {
c277df42 3955 /* If it could work, try it. */
e822a8b4
DM
3956 if (st->curlym.c1 == -1000 ||
3957 UCHARAT(PL_reginput) == st->curlym.c1 ||
3958 UCHARAT(PL_reginput) == st->curlym.c2)
a0ed51b3 3959 {
a3621e74 3960 DEBUG_EXECUTE_r(
6407bf3b
DM
3961 PerlIO_printf(Perl_debug_log,
3962 "%*s trying tail with matches=%"IVdf"...\n",
3963 (int)(REPORT_CODE_OFF+PL_regindent*2),
e822a8b4 3964 "", (IV)st->curlym.matches)
a0ed51b3 3965 );
e822a8b4
DM
3966 if (st->curlym.paren) {
3967 if (st->curlym.matches) {
3968 PL_regstartp[st->curlym.paren]
3969 = HOPc(PL_reginput, -st->curlym.l) - PL_bostr;
3970 PL_regendp[st->curlym.paren] = PL_reginput - PL_bostr;
c277df42 3971 }
a0ed51b3 3972 else
e822a8b4 3973 PL_regendp[st->curlym.paren] = -1;
c277df42 3974 }
95b24440
DM
3975 REGMATCH(next, CURLYM4);
3976 /*** all unsaved local vars undefined at this point */
3977 if (result)
a0ed51b3 3978 sayYES;
e822a8b4 3979 REGCP_UNWIND(st->curlym.lastcp);
a0ed51b3 3980 }
c277df42 3981 /* Couldn't or didn't -- back up. */
e822a8b4
DM
3982 st->curlym.matches--;
3983 locinput = HOPc(locinput, -st->curlym.l);
3280af22 3984 PL_reginput = locinput;
c277df42
IZ
3985 }
3986 }
3987 sayNO;
5f66b61c 3988 /* NOTREACHED */
c277df42
IZ
3989 break;
3990 }
3991 case CURLYN:
e822a8b4
DM
3992 st->plus.paren = scan->flags; /* Which paren to set */
3993 if (st->plus.paren > PL_regsize)
3994 PL_regsize = st->plus.paren;
3995 if (st->plus.paren > (I32)*PL_reglastparen)
3996 *PL_reglastparen = st->plus.paren;
5d9a96ca 3997 st->ln = ARG1(scan); /* min to match */
c277df42 3998 n = ARG2(scan); /* max to match */
dc45a647 3999 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4000 goto repeat;
a0d0e21e 4001 case CURLY:
e822a8b4 4002 st->plus.paren = 0;
5d9a96ca 4003 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4004 n = ARG2(scan); /* max to match */
dc45a647 4005 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4006 goto repeat;
4007 case STAR:
5d9a96ca 4008 st->ln = 0;
c277df42 4009 n = REG_INFTY;
a0d0e21e 4010 scan = NEXTOPER(scan);
e822a8b4 4011 st->plus.paren = 0;
a0d0e21e
LW
4012 goto repeat;
4013 case PLUS:
5d9a96ca 4014 st->ln = 1;
c277df42
IZ
4015 n = REG_INFTY;
4016 scan = NEXTOPER(scan);
e822a8b4 4017 st->plus.paren = 0;
c277df42 4018 repeat:
a0d0e21e
LW
4019 /*
4020 * Lookahead to avoid useless match attempts
4021 * when we know what character comes next.
4022 */
5f80c4cf
JP
4023
4024 /*
4025 * Used to only do .*x and .*?x, but now it allows
4026 * for )'s, ('s and (?{ ... })'s to be in the way
4027 * of the quantifier and the EXACT-like node. -- japhy
4028 */
4029
cca55fe3 4030 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4031 U8 *s;
4032 regnode *text_node = next;
4033
cca55fe3 4034 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4035
e822a8b4 4036 if (! HAS_TEXT(text_node)) st->plus.c1 = st->plus.c2 = -1000;
5f80c4cf 4037 else {
cca55fe3 4038 if (PL_regkind[(U8)OP(text_node)] == REF) {
e822a8b4 4039 st->plus.c1 = st->plus.c2 = -1000;
44a68960 4040 goto assume_ok_easy;
cca55fe3
JP
4041 }
4042 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4043
4044 if (!UTF) {
e822a8b4 4045 st->plus.c2 = st->plus.c1 = *s;
f65d3ee7 4046 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
e822a8b4 4047 st->plus.c2 = PL_fold[st->plus.c1];
f65d3ee7 4048 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
e822a8b4 4049 st->plus.c2 = PL_fold_locale[st->plus.c1];
1aa99e6b 4050 }
5f80c4cf 4051 else { /* UTF */
f65d3ee7 4052 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4053 STRLEN ulen1, ulen2;
89ebb4a3
JH
4054 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4055 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4056
4057 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4058 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4059
e822a8b4 4060 st->plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4061 uniflags);
e822a8b4 4062 st->plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4063 uniflags);
5f80c4cf
JP
4064 }
4065 else {
e822a8b4 4066 st->plus.c2 = st->plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4067 uniflags);
5f80c4cf 4068 }
1aa99e6b
IH
4069 }
4070 }
bbce6d69 4071 }
a0d0e21e 4072 else
e822a8b4 4073 st->plus.c1 = st->plus.c2 = -1000;
cca55fe3 4074 assume_ok_easy:
3280af22 4075 PL_reginput = locinput;
5d9a96ca
DM
4076 if (st->minmod) {
4077 st->minmod = 0;
4078 if (st->ln && regrepeat(scan, st->ln) < st->ln)
4633a7c4 4079 sayNO;
a0ed51b3 4080 locinput = PL_reginput;
e822a8b4
DM
4081 REGCP_SET(st->plus.lastcp);
4082 if (st->plus.c1 != -1000) {
4083 st->plus.old = locinput;
4084 st->plus.count = 0;
0fe9bf95 4085
1aa99e6b 4086 if (n == REG_INFTY) {
e822a8b4 4087 st->plus.e = PL_regeol - 1;
1aa99e6b 4088 if (do_utf8)
e822a8b4
DM
4089 while (UTF8_IS_CONTINUATION(*(U8*)st->plus.e))
4090 st->plus.e--;
1aa99e6b
IH
4091 }
4092 else if (do_utf8) {
5d9a96ca 4093 int m = n - st->ln;
e822a8b4
DM
4094 for (st->plus.e = locinput;
4095 m >0 && st->plus.e + UTF8SKIP(st->plus.e) <= PL_regeol; m--)
4096 st->plus.e += UTF8SKIP(st->plus.e);
1aa99e6b
IH
4097 }
4098 else {
e822a8b4
DM
4099 st->plus.e = locinput + n - st->ln;
4100 if (st->plus.e >= PL_regeol)
4101 st->plus.e = PL_regeol - 1;
1aa99e6b 4102 }
0fe9bf95
IZ
4103 while (1) {
4104 /* Find place 'next' could work */
1aa99e6b 4105 if (!do_utf8) {
e822a8b4
DM
4106 if (st->plus.c1 == st->plus.c2) {
4107 while (locinput <= st->plus.e &&
4108 UCHARAT(locinput) != st->plus.c1)
1aa99e6b
IH
4109 locinput++;
4110 } else {
e822a8b4
DM
4111 while (locinput <= st->plus.e
4112 && UCHARAT(locinput) != st->plus.c1
4113 && UCHARAT(locinput) != st->plus.c2)
1aa99e6b
IH
4114 locinput++;
4115 }
e822a8b4 4116 st->plus.count = locinput - st->plus.old;
1aa99e6b
IH
4117 }
4118 else {
e822a8b4 4119 if (st->plus.c1 == st->plus.c2) {
a3b680e6 4120 STRLEN len;
872c91ae
JH
4121 /* count initialised to
4122 * utf8_distance(old, locinput) */
e822a8b4 4123 while (locinput <= st->plus.e &&
872c91ae 4124 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4125 UTF8_MAXBYTES, &len,
e822a8b4 4126 uniflags) != (UV)st->plus.c1) {
1aa99e6b 4127 locinput += len;
e822a8b4 4128 st->plus.count++;
b2f2f093 4129 }
1aa99e6b 4130 } else {
a3b680e6 4131 STRLEN len;
872c91ae
JH
4132 /* count initialised to
4133 * utf8_distance(old, locinput) */
e822a8b4 4134 while (locinput <= st->plus.e) {
872c91ae 4135 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4136 UTF8_MAXBYTES, &len,
041457d9 4137 uniflags);
e822a8b4 4138 if (c == (UV)st->plus.c1 || c == (UV)st->plus.c2)
1aa99e6b 4139 break;
b2f2f093 4140 locinput += len;
e822a8b4 4141 st->plus.count++;
1aa99e6b
IH
4142 }
4143 }
0fe9bf95 4144 }
e822a8b4 4145 if (locinput > st->plus.e)
0fe9bf95
IZ
4146 sayNO;
4147 /* PL_reginput == old now */
e822a8b4 4148 if (locinput != st->plus.old) {
5d9a96ca 4149 st->ln = 1; /* Did some */
e822a8b4 4150 if (regrepeat(scan, st->plus.count) < st->plus.count)
0fe9bf95
IZ
4151 sayNO;
4152 }
4153 /* PL_reginput == locinput now */
e822a8b4 4154 TRYPAREN(st->plus.paren, st->ln, locinput, PLUS1);
95b24440 4155 /*** all unsaved local vars undefined at this point */
0fe9bf95 4156 PL_reginput = locinput; /* Could be reset... */
e822a8b4 4157 REGCP_UNWIND(st->plus.lastcp);
0fe9bf95 4158 /* Couldn't or didn't -- move forward. */
e822a8b4 4159 st->plus.old = locinput;
1aa99e6b
IH
4160 if (do_utf8)
4161 locinput += UTF8SKIP(locinput);
4162 else
4163 locinput++;
e822a8b4 4164 st->plus.count = 1;
0fe9bf95
IZ
4165 }
4166 }
4167 else
5d9a96ca 4168 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4169 UV c;
e822a8b4 4170 if (st->plus.c1 != -1000) {
1aa99e6b 4171 if (do_utf8)
872c91ae 4172 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4173 UTF8_MAXBYTES, 0,
041457d9 4174 uniflags);
1aa99e6b 4175 else
9041c2e3 4176 c = UCHARAT(PL_reginput);
2390ecbc 4177 /* If it could work, try it. */
e822a8b4 4178 if (c == (UV)st->plus.c1 || c == (UV)st->plus.c2)
2390ecbc 4179 {
e822a8b4 4180 TRYPAREN(st->plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4181 /*** all unsaved local vars undefined at this point */
e822a8b4 4182 REGCP_UNWIND(st->plus.lastcp);
2390ecbc 4183 }
1aa99e6b 4184 }
a0d0e21e 4185 /* If it could work, try it. */
e822a8b4 4186 else if (st->plus.c1 == -1000)
bbce6d69 4187 {
e822a8b4 4188 TRYPAREN(st->plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4189 /*** all unsaved local vars undefined at this point */
e822a8b4 4190 REGCP_UNWIND(st->plus.lastcp);
bbce6d69 4191 }
c277df42 4192 /* Couldn't or didn't -- move forward. */
a0ed51b3 4193 PL_reginput = locinput;
a0d0e21e 4194 if (regrepeat(scan, 1)) {
5d9a96ca 4195 st->ln++;
a0ed51b3
LW
4196 locinput = PL_reginput;
4197 }
4198 else
4633a7c4 4199 sayNO;
a0d0e21e
LW
4200 }
4201 }
4202 else {
4203 n = regrepeat(scan, n);
a0ed51b3 4204 locinput = PL_reginput;
5d9a96ca 4205 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4206 (OP(next) != MEOL ||
15272685
HS
4207 OP(next) == SEOL || OP(next) == EOS))
4208 {
5d9a96ca 4209 st->ln = n; /* why back off? */
1aeab75a
GS
4210 /* ...because $ and \Z can match before *and* after
4211 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4212 We should back off by one in this case. */
4213 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4214 st->ln--;
1aeab75a 4215 }
e822a8b4 4216 REGCP_SET(st->plus.lastcp);
1d5c262f 4217 {
8fa7f367 4218 UV c = 0;
5d9a96ca 4219 while (n >= st->ln) {
e822a8b4 4220 if (st->plus.c1 != -1000) {
1aa99e6b 4221 if (do_utf8)
872c91ae 4222 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4223 UTF8_MAXBYTES, 0,
041457d9 4224 uniflags);
1aa99e6b 4225 else
9041c2e3 4226 c = UCHARAT(PL_reginput);
1aa99e6b 4227 }
c277df42 4228 /* If it could work, try it. */
e822a8b4 4229 if (st->plus.c1 == -1000 || c == (UV)st->plus.c1 || c == (UV)st->plus.c2)
c277df42 4230 {
e822a8b4 4231 TRYPAREN(st->plus.paren, n, PL_reginput, PLUS4);
95b24440 4232 /*** all unsaved local vars undefined at this point */
e822a8b4 4233 REGCP_UNWIND(st->plus.lastcp);
c277df42
IZ
4234 }
4235 /* Couldn't or didn't -- back up. */
4236 n--;
dfe13c55 4237 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4238 }
a0d0e21e
LW
4239 }
4240 }
4633a7c4 4241 sayNO;
c277df42 4242 break;
a0d0e21e 4243 case END:
0f5d15d6 4244 if (PL_reg_call_cc) {
e822a8b4
DM
4245 st->end.cur_call_cc = PL_reg_call_cc;
4246 st->end.end_re = PL_reg_re;
6136c704
AL
4247
4248 /* Save *all* the positions. */
e822a8b4
DM
4249 st->end.cp = regcppush(0);
4250 REGCP_SET(st->end.lastcp);
6136c704
AL
4251
4252 /* Restore parens of the caller. */
95b24440
DM
4253 {
4254 I32 tmp = PL_savestack_ix;
4255 PL_savestack_ix = PL_reg_call_cc->ss;
4256 regcppop();
4257 PL_savestack_ix = tmp;
4258 }
6136c704
AL
4259
4260 /* Make position available to the callcc. */
4261 PL_reginput = locinput;
4262
0f5d15d6 4263 cache_re(PL_reg_call_cc->re);
e822a8b4 4264 st->end.savecc = st->cc;
5d9a96ca 4265 st->cc = PL_reg_call_cc->cc;
0f5d15d6 4266 PL_reg_call_cc = PL_reg_call_cc->prev;
e822a8b4 4267 REGMATCH(st->end.cur_call_cc->node, END);
95b24440
DM
4268 /*** all unsaved local vars undefined at this point */
4269 if (result) {
e822a8b4
DM
4270 PL_reg_call_cc = st->end.cur_call_cc;
4271 regcpblow(st->end.cp);
0f5d15d6
IZ
4272 sayYES;
4273 }
e822a8b4 4274 REGCP_UNWIND(st->end.lastcp);
0f5d15d6 4275 regcppop();
e822a8b4
DM
4276 PL_reg_call_cc = st->end.cur_call_cc;
4277 st->cc = st->end.savecc;
4278 PL_reg_re = st->end.end_re;
4279 cache_re(st->end.end_re);
0f5d15d6 4280
a3621e74 4281 DEBUG_EXECUTE_r(
0f5d15d6
IZ
4282 PerlIO_printf(Perl_debug_log,
4283 "%*s continuation failed...\n",
4284 REPORT_CODE_OFF+PL_regindent*2, "")
4285 );
7821416a 4286 sayNO_SILENT;
0f5d15d6 4287 }
7821416a 4288 if (locinput < PL_regtill) {
a3621e74 4289 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4290 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4291 PL_colors[4],
4292 (long)(locinput - PL_reg_starttry),
4293 (long)(PL_regtill - PL_reg_starttry),
4294 PL_colors[5]));
4295 sayNO_FINAL; /* Cannot match: too short. */
4296 }
4297 PL_reginput = locinput; /* put where regtry can find it */
4298 sayYES_FINAL; /* Success! */
7e5428c5 4299 case SUCCEED:
3280af22 4300 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4301 sayYES_LOUD; /* Success! */
c277df42
IZ
4302 case SUSPEND:
4303 n = 1;
9fe1d20c 4304 PL_reginput = locinput;
9041c2e3 4305 goto do_ifmatch;
a0d0e21e 4306 case UNLESSM:
c277df42 4307 n = 0;
a0ed51b3 4308 if (scan->flags) {
52657f30 4309 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4310 if (!s)
4311 goto say_yes;
4312 PL_reginput = s;
a0ed51b3
LW
4313 }
4314 else
4315 PL_reginput = locinput;
c277df42
IZ
4316 goto do_ifmatch;
4317 case IFMATCH:
4318 n = 1;
a0ed51b3 4319 if (scan->flags) {
52657f30 4320 char * const s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4321 if (!s)
4322 goto say_no;
4323 PL_reginput = s;
a0ed51b3
LW
4324 }
4325 else
4326 PL_reginput = locinput;
4327
c277df42 4328 do_ifmatch:
95b24440
DM
4329 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4330 /*** all unsaved local vars undefined at this point */
4331 if (result != n) {
c277df42 4332 say_no:
5d9a96ca
DM
4333 if (st->logical) {
4334 st->logical = 0;
4335 st->sw = 0;
c277df42 4336 goto do_longjump;
a0ed51b3
LW
4337 }
4338 else
c277df42
IZ
4339 sayNO;
4340 }
4341 say_yes:
5d9a96ca
DM
4342 if (st->logical) {
4343 st->logical = 0;
4344 st->sw = 1;
c277df42 4345 }
fe44a5e8 4346 if (OP(scan) == SUSPEND) {
3280af22 4347 locinput = PL_reginput;
565764a8 4348 nextchr = UCHARAT(locinput);
fe44a5e8 4349 }
c277df42
IZ
4350 /* FALL THROUGH. */
4351 case LONGJMP:
4352 do_longjump:
4353 next = scan + ARG(scan);
4354 if (next == scan)
4355 next = NULL;
a0d0e21e
LW
4356 break;
4357 default:
b900a521 4358 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4359 PTR2UV(scan), OP(scan));
cea2e8a9 4360 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4361 }
95b24440 4362
02db2b7b 4363 reenter:
a0d0e21e 4364 scan = next;
95b24440
DM
4365 continue;
4366 /* NOTREACHED */
4367
4368 /* simulate recursively calling regmatch(), but without actually
4369 * recursing - ie save the current state on the heap rather than on
4370 * the stack, then re-enter the loop. This avoids complex regexes
4371 * blowing the processor stack */
4372
4373 start_recurse:
4374 {
5d9a96ca
DM
4375 /* push new state */
4376 regmatch_state *oldst = st;
4377
4378 depth++;
4379
4380 /* grab the next free state slot */
4381 st++;
4382 if (st > &(PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1]))
4383 st = S_push_slab(aTHX);
4384 PL_regmatch_state = st;
4385
4386 oldst->next = next;
4387 oldst->n = n;
4388 oldst->locinput = locinput;
4389 oldst->reg_call_cc = PL_reg_call_cc;
4390
4391 st->cc = oldst->cc;
95b24440
DM
4392 locinput = PL_reginput;
4393 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4394 st->minmod = 0;
4395 st->sw = 0;
4396 st->logical = 0;
4397 st->unwind = 0;
95b24440
DM
4398#ifdef DEBUGGING
4399 PL_regindent++;
4400#endif
4401 }
a0d0e21e 4402 }
a687059c 4403
a0d0e21e
LW
4404 /*
4405 * We get here only if there's trouble -- normally "case END" is
4406 * the terminating point.
4407 */
cea2e8a9 4408 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4409 /*NOTREACHED*/
4633a7c4
LW
4410 sayNO;
4411
7821416a 4412yes_loud:
a3621e74 4413 DEBUG_EXECUTE_r(
7821416a
IZ
4414 PerlIO_printf(Perl_debug_log,
4415 "%*s %scould match...%s\n",
e4584336 4416 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4417 );
4418 goto yes;
4419yes_final:
a3621e74 4420 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4421 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4422yes:
4423#ifdef DEBUGGING
3280af22 4424 PL_regindent--;
4633a7c4 4425#endif
02db2b7b 4426
95b24440
DM
4427 result = 1;
4428 goto exit_level;
4633a7c4
LW
4429
4430no:
a3621e74 4431 DEBUG_EXECUTE_r(
7821416a
IZ
4432 PerlIO_printf(Perl_debug_log,
4433 "%*s %sfailed...%s\n",
e4584336 4434 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4435 );
4436 goto do_no;
4437no_final:
4438do_no:
5d9a96ca
DM
4439 if (st->unwind) {
4440 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4441
4442 switch (uw->type) {
4443 case RE_UNWIND_BRANCH:
4444 case RE_UNWIND_BRANCHJ:
4445 {
6136c704 4446 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4447 const I32 lastparen = uwb->lastparen;
9041c2e3 4448
02db2b7b
IZ
4449 REGCP_UNWIND(uwb->lastcp);
4450 for (n = *PL_reglastparen; n > lastparen; n--)
4451 PL_regendp[n] = -1;
4452 *PL_reglastparen = n;
4453 scan = next = uwb->next;
9041c2e3
NIS
4454 if ( !scan ||
4455 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4456 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4457 st->unwind = uwb->prev;
02db2b7b
IZ
4458#ifdef DEBUGGING
4459 PL_regindent--;
4460#endif
4461 goto do_no;
4462 }
4463 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4464 if ((n = (uwb->type == RE_UNWIND_BRANCH
4465 ? NEXT_OFF(next) : ARG(next))))
4466 next += n;
4467 else
4468 next = NULL; /* XXXX Needn't unwinding in this case... */
4469 uwb->next = next;
4470 next = NEXTOPER(scan);
4471 if (uwb->type == RE_UNWIND_BRANCHJ)
4472 next = NEXTOPER(next);
4473 locinput = uwb->locinput;
4474 nextchr = uwb->nextchr;
4475#ifdef DEBUGGING
4476 PL_regindent = uwb->regindent;
4477#endif
4478
4479 goto reenter;
4480 }
5f66b61c 4481 /* NOTREACHED */
02db2b7b
IZ
4482 default:
4483 Perl_croak(aTHX_ "regexp unwind memory corruption");
4484 }
5f66b61c 4485 /* NOTREACHED */
02db2b7b 4486 }
4633a7c4 4487#ifdef DEBUGGING
3280af22 4488 PL_regindent--;
4633a7c4 4489#endif
95b24440
DM
4490 result = 0;
4491exit_level:
5d9a96ca
DM
4492
4493 if (depth--) {
95b24440 4494 /* restore previous state and re-enter */
5d9a96ca
DM
4495 st--;
4496 if (st < &PL_regmatch_slab->states[0]) {
4497 PL_regmatch_slab = PL_regmatch_slab->prev;
4498 st = &PL_regmatch_slab->states[PERL_REGMATCH_SLAB_SLOTS-1];
4499 }
4500 PL_regmatch_state = st;
95b24440 4501
5d9a96ca
DM
4502 PL_reg_call_cc = st->reg_call_cc;
4503 scan = st->scan;
4504 next = st->next;
4505 n = st->n;
4506 locinput = st->locinput;
95b24440 4507 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4508
4509 switch (st->resume_state) {
95b24440
DM
4510 case resume_TRIE1:
4511 goto resume_point_TRIE1;
4512 case resume_TRIE2:
4513 goto resume_point_TRIE2;
4514 case resume_CURLYX:
4515 goto resume_point_CURLYX;
4516 case resume_WHILEM1:
4517 goto resume_point_WHILEM1;
4518 case resume_WHILEM2:
4519 goto resume_point_WHILEM2;
4520 case resume_WHILEM3:
4521 goto resume_point_WHILEM3;
4522 case resume_WHILEM4:
4523 goto resume_point_WHILEM4;
4524 case resume_WHILEM5:
4525 goto resume_point_WHILEM5;
4526 case resume_WHILEM6:
4527 goto resume_point_WHILEM6;
4528 case resume_CURLYM1:
4529 goto resume_point_CURLYM1;
4530 case resume_CURLYM2:
4531 goto resume_point_CURLYM2;
4532 case resume_CURLYM3:
4533 goto resume_point_CURLYM3;
4534 case resume_CURLYM4:
4535 goto resume_point_CURLYM4;
4536 case resume_IFMATCH:
4537 goto resume_point_IFMATCH;
4538 case resume_PLUS1:
4539 goto resume_point_PLUS1;
4540 case resume_PLUS2:
4541 goto resume_point_PLUS2;
4542 case resume_PLUS3:
4543 goto resume_point_PLUS3;
4544 case resume_PLUS4:
4545 goto resume_point_PLUS4;
4546 case resume_END:
4547 goto resume_point_END;
4548 default:
4549 Perl_croak(aTHX_ "regexp resume memory corruption");
4550 }
4551 /* NOTREACHED */
4552 }
5d9a96ca
DM
4553 /* restore original high-water mark */
4554 PL_regmatch_slab = orig_slab;
4555 PL_regmatch_state = orig_state;
4556
4557 /* free all slabs above current one */
4558 if (orig_slab->next) {
4559 regmatch_slab *osl, *sl = orig_slab->next;
4560 orig_slab->next = NULL;
4561 while (sl) {
4562 osl = sl;
4563 sl = sl->next;
4564 }
4565 }
4566
95b24440
DM
4567 return result;
4568
a687059c
LW
4569}
4570
4571/*
4572 - regrepeat - repeatedly match something simple, report how many
4573 */
4574/*
4575 * [This routine now assumes that it will only match on things of length 1.
4576 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4577 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4578 */
76e3520e 4579STATIC I32
a3b680e6 4580S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4581{
27da23d5 4582 dVAR;
a0d0e21e 4583 register char *scan;
a0d0e21e 4584 register I32 c;
3280af22 4585 register char *loceol = PL_regeol;
a0ed51b3 4586 register I32 hardcount = 0;
53c4c00c 4587 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4588
3280af22 4589 scan = PL_reginput;
faf11cac
HS
4590 if (max == REG_INFTY)
4591 max = I32_MAX;
4592 else if (max < loceol - scan)
a0d0e21e 4593 loceol = scan + max;
a0d0e21e 4594 switch (OP(p)) {
22c35a8c 4595 case REG_ANY:
1aa99e6b 4596 if (do_utf8) {
ffc61ed2 4597 loceol = PL_regeol;
1aa99e6b 4598 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4599 scan += UTF8SKIP(scan);
4600 hardcount++;
4601 }
4602 } else {
4603 while (scan < loceol && *scan != '\n')
4604 scan++;
a0ed51b3
LW
4605 }
4606 break;
ffc61ed2 4607 case SANY:
def8e4ea
JH
4608 if (do_utf8) {
4609 loceol = PL_regeol;
a0804c9e 4610 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4611 scan += UTF8SKIP(scan);
4612 hardcount++;
4613 }
4614 }
4615 else
4616 scan = loceol;
a0ed51b3 4617 break;
f33976b4
DB
4618 case CANY:
4619 scan = loceol;
4620 break;
090f7165
JH
4621 case EXACT: /* length of string is 1 */
4622 c = (U8)*STRING(p);
4623 while (scan < loceol && UCHARAT(scan) == c)
4624 scan++;
bbce6d69 4625 break;
4626 case EXACTF: /* length of string is 1 */
cd439c50 4627 c = (U8)*STRING(p);
bbce6d69 4628 while (scan < loceol &&
22c35a8c 4629 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4630 scan++;
4631 break;
4632 case EXACTFL: /* length of string is 1 */
3280af22 4633 PL_reg_flags |= RF_tainted;
cd439c50 4634 c = (U8)*STRING(p);
bbce6d69 4635 while (scan < loceol &&
22c35a8c 4636 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4637 scan++;
4638 break;
4639 case ANYOF:
ffc61ed2
JH
4640 if (do_utf8) {
4641 loceol = PL_regeol;
cfc92286
JH
4642 while (hardcount < max && scan < loceol &&
4643 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4644 scan += UTF8SKIP(scan);
4645 hardcount++;
4646 }
4647 } else {
7d3e948e 4648 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4649 scan++;
4650 }
a0d0e21e
LW
4651 break;
4652 case ALNUM:
1aa99e6b 4653 if (do_utf8) {
ffc61ed2 4654 loceol = PL_regeol;
1a4fad37 4655 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4656 while (hardcount < max && scan < loceol &&
3568d838 4657 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4658 scan += UTF8SKIP(scan);
4659 hardcount++;
4660 }
4661 } else {
4662 while (scan < loceol && isALNUM(*scan))
4663 scan++;
a0ed51b3
LW
4664 }
4665 break;
bbce6d69 4666 case ALNUML:
3280af22 4667 PL_reg_flags |= RF_tainted;
1aa99e6b 4668 if (do_utf8) {
ffc61ed2 4669 loceol = PL_regeol;
1aa99e6b
IH
4670 while (hardcount < max && scan < loceol &&
4671 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4672 scan += UTF8SKIP(scan);
4673 hardcount++;
4674 }
4675 } else {
4676 while (scan < loceol && isALNUM_LC(*scan))
4677 scan++;
a0ed51b3
LW
4678 }
4679 break;
a0d0e21e 4680 case NALNUM:
1aa99e6b 4681 if (do_utf8) {
ffc61ed2 4682 loceol = PL_regeol;
1a4fad37 4683 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4684 while (hardcount < max && scan < loceol &&
3568d838 4685 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4686 scan += UTF8SKIP(scan);
4687 hardcount++;
4688 }
4689 } else {
4690 while (scan < loceol && !isALNUM(*scan))
4691 scan++;
a0ed51b3
LW
4692 }
4693 break;
bbce6d69 4694 case NALNUML:
3280af22 4695 PL_reg_flags |= RF_tainted;
1aa99e6b 4696 if (do_utf8) {
ffc61ed2 4697 loceol = PL_regeol;
1aa99e6b
IH
4698 while (hardcount < max && scan < loceol &&
4699 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4700 scan += UTF8SKIP(scan);
4701 hardcount++;
4702 }
4703 } else {
4704 while (scan < loceol && !isALNUM_LC(*scan))
4705 scan++;
a0ed51b3
LW
4706 }
4707 break;
a0d0e21e 4708 case SPACE:
1aa99e6b 4709 if (do_utf8) {
ffc61ed2 4710 loceol = PL_regeol;
1a4fad37 4711 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4712 while (hardcount < max && scan < loceol &&
3568d838
JH
4713 (*scan == ' ' ||
4714 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4715 scan += UTF8SKIP(scan);
4716 hardcount++;
4717 }
4718 } else {
4719 while (scan < loceol && isSPACE(*scan))
4720 scan++;
a0ed51b3
LW
4721 }
4722 break;
bbce6d69 4723 case SPACEL:
3280af22 4724 PL_reg_flags |= RF_tainted;
1aa99e6b 4725 if (do_utf8) {
ffc61ed2 4726 loceol = PL_regeol;
1aa99e6b 4727 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4728 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4729 scan += UTF8SKIP(scan);
4730 hardcount++;
4731 }
4732 } else {
4733 while (scan < loceol && isSPACE_LC(*scan))
4734 scan++;
a0ed51b3
LW
4735 }
4736 break;
a0d0e21e 4737 case NSPACE:
1aa99e6b 4738 if (do_utf8) {
ffc61ed2 4739 loceol = PL_regeol;
1a4fad37 4740 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4741 while (hardcount < max && scan < loceol &&
3568d838
JH
4742 !(*scan == ' ' ||
4743 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4744 scan += UTF8SKIP(scan);
4745 hardcount++;
4746 }
4747 } else {
4748 while (scan < loceol && !isSPACE(*scan))
4749 scan++;
4750 break;
a0ed51b3 4751 }
bbce6d69 4752 case NSPACEL:
3280af22 4753 PL_reg_flags |= RF_tainted;
1aa99e6b 4754 if (do_utf8) {
ffc61ed2 4755 loceol = PL_regeol;
1aa99e6b 4756 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4757 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4758 scan += UTF8SKIP(scan);
4759 hardcount++;
4760 }
4761 } else {
4762 while (scan < loceol && !isSPACE_LC(*scan))
4763 scan++;
a0ed51b3
LW
4764 }
4765 break;
a0d0e21e 4766 case DIGIT:
1aa99e6b 4767 if (do_utf8) {
ffc61ed2 4768 loceol = PL_regeol;
1a4fad37 4769 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4770 while (hardcount < max && scan < loceol &&
3568d838 4771 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4772 scan += UTF8SKIP(scan);
4773 hardcount++;
4774 }
4775 } else {
4776 while (scan < loceol && isDIGIT(*scan))
4777 scan++;
a0ed51b3
LW
4778 }
4779 break;
a0d0e21e 4780 case NDIGIT:
1aa99e6b 4781 if (do_utf8) {
ffc61ed2 4782 loceol = PL_regeol;
1a4fad37 4783 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4784 while (hardcount < max && scan < loceol &&
3568d838 4785 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4786 scan += UTF8SKIP(scan);
4787 hardcount++;
4788 }
4789 } else {
4790 while (scan < loceol && !isDIGIT(*scan))
4791 scan++;
a0ed51b3
LW
4792 }
4793 break;
a0d0e21e
LW
4794 default: /* Called on something of 0 width. */
4795 break; /* So match right here or not at all. */
4796 }
a687059c 4797
a0ed51b3
LW
4798 if (hardcount)
4799 c = hardcount;
4800 else
4801 c = scan - PL_reginput;
3280af22 4802 PL_reginput = scan;
a687059c 4803
a3621e74 4804 DEBUG_r({
ab74612d 4805 SV *re_debug_flags = NULL;
6136c704 4806 SV * const prop = sv_newmortal();
a3621e74
YO
4807 GET_RE_DEBUG_FLAGS;
4808 DEBUG_EXECUTE_r({
c277df42 4809 regprop(prop, p);
9041c2e3
NIS
4810 PerlIO_printf(Perl_debug_log,
4811 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4812 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4813 });
a3621e74 4814 });
9041c2e3 4815
a0d0e21e 4816 return(c);
a687059c
LW
4817}
4818
c277df42
IZ
4819
4820/*
ffc61ed2
JH
4821- regclass_swash - prepare the utf8 swash
4822*/
4823
4824SV *
a3b680e6 4825Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4826{
97aff369 4827 dVAR;
9e55ce06
JH
4828 SV *sw = NULL;
4829 SV *si = NULL;
4830 SV *alt = NULL;
ffc61ed2
JH
4831
4832 if (PL_regdata && PL_regdata->count) {
a3b680e6 4833 const U32 n = ARG(node);
ffc61ed2
JH
4834
4835 if (PL_regdata->what[n] == 's') {
890ce7af
AL
4836 SV * const rv = (SV*)PL_regdata->data[n];
4837 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4838 SV **const ary = AvARRAY(av);
9e55ce06 4839 SV **a, **b;
9041c2e3 4840
711a919c 4841 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4842 * documentation of these array elements. */
4843
b11f357e 4844 si = *ary;
8f7f7219 4845 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4846 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4847
ffc61ed2
JH
4848 if (a)
4849 sw = *a;
4850 else if (si && doinit) {
4851 sw = swash_init("utf8", "", si, 1, 0);
4852 (void)av_store(av, 1, sw);
4853 }
9e55ce06
JH
4854 if (b)
4855 alt = *b;
ffc61ed2
JH
4856 }
4857 }
4858
9e55ce06
JH
4859 if (listsvp)
4860 *listsvp = si;
4861 if (altsvp)
4862 *altsvp = alt;
ffc61ed2
JH
4863
4864 return sw;
4865}
4866
4867/*
ba7b4546 4868 - reginclass - determine if a character falls into a character class
832705d4
JH
4869
4870 The n is the ANYOF regnode, the p is the target string, lenp
4871 is pointer to the maximum length of how far to go in the p
4872 (if the lenp is zero, UTF8SKIP(p) is used),
4873 do_utf8 tells whether the target string is in UTF-8.
4874
bbce6d69 4875 */
4876
76e3520e 4877STATIC bool
a3b680e6 4878S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4879{
27da23d5 4880 dVAR;
a3b680e6 4881 const char flags = ANYOF_FLAGS(n);
bbce6d69 4882 bool match = FALSE;
cc07378b 4883 UV c = *p;
ae9ddab8 4884 STRLEN len = 0;
9e55ce06 4885 STRLEN plen;
1aa99e6b 4886
19f67299
TS
4887 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4888 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
a0374537
DM
4889 ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
4890 UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
19f67299
TS
4891 if (len == (STRLEN)-1)
4892 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4893 }
bbce6d69 4894
0f0076b4 4895 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4896 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4897 if (lenp)
4898 *lenp = 0;
ffc61ed2 4899 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4900 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4901 match = TRUE;
bbce6d69 4902 }
3568d838 4903 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4904 match = TRUE;
ffc61ed2 4905 if (!match) {
9e55ce06 4906 AV *av;
890ce7af 4907 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4908
4909 if (sw) {
3568d838 4910 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4911 match = TRUE;
4912 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4913 if (!match && lenp && av) {
4914 I32 i;
9e55ce06 4915 for (i = 0; i <= av_len(av); i++) {
890ce7af 4916 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 4917 STRLEN len;
890ce7af 4918 const char * const s = SvPV_const(sv, len);
9e55ce06 4919
061b10df 4920 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4921 *lenp = len;
4922 match = TRUE;
4923 break;
4924 }
4925 }
4926 }
4927 if (!match) {
89ebb4a3 4928 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4929 STRLEN tmplen;
4930
9e55ce06
JH
4931 to_utf8_fold(p, tmpbuf, &tmplen);
4932 if (swash_fetch(sw, tmpbuf, do_utf8))
4933 match = TRUE;
4934 }
ffc61ed2
JH
4935 }
4936 }
bbce6d69 4937 }
9e55ce06 4938 if (match && lenp && *lenp == 0)
0f0076b4 4939 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4940 }
1aa99e6b 4941 if (!match && c < 256) {
ffc61ed2
JH
4942 if (ANYOF_BITMAP_TEST(n, c))
4943 match = TRUE;
4944 else if (flags & ANYOF_FOLD) {
eb160463 4945 U8 f;
a0ed51b3 4946
ffc61ed2
JH
4947 if (flags & ANYOF_LOCALE) {
4948 PL_reg_flags |= RF_tainted;
4949 f = PL_fold_locale[c];
4950 }
4951 else
4952 f = PL_fold[c];
4953 if (f != c && ANYOF_BITMAP_TEST(n, f))
4954 match = TRUE;
4955 }
4956
4957 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4958 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4959 if (
4960 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4961 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4962 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4963 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4964 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4965 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4966 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4967 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4968 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4969 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4970 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4971 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4972 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4973 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4974 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4975 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4976 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4977 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4978 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4979 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4980 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4981 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4982 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4983 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4984 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4985 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4986 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4987 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4988 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4989 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4990 ) /* How's that for a conditional? */
4991 {
4992 match = TRUE;
4993 }
a0ed51b3 4994 }
a0ed51b3
LW
4995 }
4996
a0ed51b3
LW
4997 return (flags & ANYOF_INVERT) ? !match : match;
4998}
161b471a 4999
dfe13c55 5000STATIC U8 *
5f66b61c 5001S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 5002{
97aff369 5003 dVAR;
a0ed51b3 5004 if (off >= 0) {
1aa99e6b 5005 while (off-- && s < lim) {
ffc61ed2 5006 /* XXX could check well-formedness here */
a0ed51b3 5007 s += UTF8SKIP(s);
ffc61ed2 5008 }
a0ed51b3
LW
5009 }
5010 else {
5011 while (off++) {
1aa99e6b 5012 if (s > lim) {
a0ed51b3 5013 s--;
ffc61ed2 5014 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5015 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5016 s--;
ffc61ed2
JH
5017 }
5018 /* XXX could check well-formedness here */
a0ed51b3
LW
5019 }
5020 }
5021 }
5022 return s;
5023}
161b471a 5024
dfe13c55 5025STATIC U8 *
5f66b61c 5026S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5027{
97aff369 5028 dVAR;
a0ed51b3 5029 if (off >= 0) {
1aa99e6b 5030 while (off-- && s < lim) {
ffc61ed2 5031 /* XXX could check well-formedness here */
a0ed51b3 5032 s += UTF8SKIP(s);
ffc61ed2 5033 }
a0ed51b3
LW
5034 if (off >= 0)
5035 return 0;
5036 }
5037 else {
5038 while (off++) {
1aa99e6b 5039 if (s > lim) {
a0ed51b3 5040 s--;
ffc61ed2 5041 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5042 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5043 s--;
ffc61ed2
JH
5044 }
5045 /* XXX could check well-formedness here */
a0ed51b3
LW
5046 }
5047 else
5048 break;
5049 }
5050 if (off <= 0)
5051 return 0;
5052 }
5053 return s;
5054}
51371543 5055
51371543 5056static void
acfe0abc 5057restore_pos(pTHX_ void *arg)
51371543 5058{
97aff369 5059 dVAR;
9d4ba2ae 5060 PERL_UNUSED_ARG(arg);
51371543
GS
5061 if (PL_reg_eval_set) {
5062 if (PL_reg_oldsaved) {
5063 PL_reg_re->subbeg = PL_reg_oldsaved;
5064 PL_reg_re->sublen = PL_reg_oldsavedlen;
f8c7b90f 5065#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
5066 PL_reg_re->saved_copy = PL_nrs;
5067#endif
51371543
GS
5068 RX_MATCH_COPIED_on(PL_reg_re);
5069 }
5070 PL_reg_magic->mg_len = PL_reg_oldpos;
5071 PL_reg_eval_set = 0;
5072 PL_curpm = PL_reg_oldcurpm;
5073 }
5074}
33b8afdf
JH
5075
5076STATIC void
5077S_to_utf8_substr(pTHX_ register regexp *prog)
5078{
33b8afdf 5079 if (prog->float_substr && !prog->float_utf8) {
9d4ba2ae 5080 SV* sv;
f2b990bf 5081 prog->float_utf8 = sv = newSVsv(prog->float_substr);
33b8afdf
JH
5082 sv_utf8_upgrade(sv);
5083 if (SvTAIL(prog->float_substr))
5084 SvTAIL_on(sv);
5085 if (prog->float_substr == prog->check_substr)
5086 prog->check_utf8 = sv;
5087 }
5088 if (prog->anchored_substr && !prog->anchored_utf8) {
9d4ba2ae 5089 SV* sv;
f2b990bf 5090 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
33b8afdf
JH
5091 sv_utf8_upgrade(sv);
5092 if (SvTAIL(prog->anchored_substr))
5093 SvTAIL_on(sv);
5094 if (prog->anchored_substr == prog->check_substr)
5095 prog->check_utf8 = sv;
5096 }
5097}
5098
5099STATIC void
5100S_to_byte_substr(pTHX_ register regexp *prog)
5101{
97aff369 5102 dVAR;
33b8afdf 5103 if (prog->float_utf8 && !prog->float_substr) {
9d4ba2ae 5104 SV* sv;
f2b990bf 5105 prog->float_substr = sv = newSVsv(prog->float_utf8);
33b8afdf
JH
5106 if (sv_utf8_downgrade(sv, TRUE)) {
5107 if (SvTAIL(prog->float_utf8))
5108 SvTAIL_on(sv);
5109 } else {
5110 SvREFCNT_dec(sv);
5111 prog->float_substr = sv = &PL_sv_undef;
5112 }
5113 if (prog->float_utf8 == prog->check_utf8)
5114 prog->check_substr = sv;
5115 }
5116 if (prog->anchored_utf8 && !prog->anchored_substr) {
9d4ba2ae 5117 SV* sv;
f2b990bf 5118 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
33b8afdf
JH
5119 if (sv_utf8_downgrade(sv, TRUE)) {
5120 if (SvTAIL(prog->anchored_utf8))
5121 SvTAIL_on(sv);
5122 } else {
5123 SvREFCNT_dec(sv);
5124 prog->anchored_substr = sv = &PL_sv_undef;
5125 }
5126 if (prog->anchored_utf8 == prog->check_utf8)
5127 prog->check_substr = sv;
5128 }
5129}
66610fdd
RGS
5130
5131/*
5132 * Local variables:
5133 * c-indentation-style: bsd
5134 * c-basic-offset: 4
5135 * indent-tabs-mode: t
5136 * End:
5137 *
37442d52
RGS
5138 * ex: set ts=8 sts=4 sw=4 noet:
5139 */