This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[doc-patch] clarify -Accflags in INSTALL
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
a687059c 58/*
e50aee73 59 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
60 *
61 * Copyright (c) 1986 by University of Toronto.
62 * Written by Henry Spencer. Not derived from licensed software.
63 *
64 * Permission is granted to anyone to use this software for any
65 * purpose on any computer system, and to redistribute it freely,
66 * subject to the following restrictions:
67 *
68 * 1. The author is not responsible for the consequences of use of
69 * this software, no matter how awful, even if they arise
70 * from defects in it.
71 *
72 * 2. The origin of this software must not be misrepresented, either
73 * by explicit claim or by omission.
74 *
75 * 3. Altered versions must be plainly marked as such, and must not
76 * be misrepresented as being the original software.
77 *
78 **** Alterations to Henry's code are...
79 ****
4bb101f2 80 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 81 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 82 ****
9ef589d8
LW
83 **** You may distribute under the terms of either the GNU General Public
84 **** License or the Artistic License, as specified in the README file.
a687059c
LW
85 *
86 * Beware that some of this code is subtly aware of the way operator
87 * precedence is structured in regular expressions. Serious changes in
88 * regular-expression syntax might require a total rethink.
89 */
90#include "EXTERN.h"
864dbfa3 91#define PERL_IN_REGEXEC_C
a687059c 92#include "perl.h"
0f5d15d6 93
a687059c
LW
94#include "regcomp.h"
95
c277df42
IZ
96#define RF_tainted 1 /* tainted information used? */
97#define RF_warned 2 /* warned about big count? */
ce862d02 98#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
99#define RF_utf8 8 /* String contains multibyte chars? */
100
eb160463 101#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
102
103#define RS_init 1 /* eval environment created */
104#define RS_set 2 /* replsv value is set */
c277df42 105
a687059c
LW
106#ifndef STATIC
107#define STATIC static
108#endif
109
ba7b4546 110#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 111
c277df42
IZ
112/*
113 * Forwards.
114 */
115
33b8afdf 116#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 117#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 118
dfe13c55
GS
119#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
120#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
121#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
122#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
123#define HOPc(pos,off) ((char*)HOP(pos,off))
124#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 125
efb30f32 126#define HOPBACK(pos, off) ( \
e54858b0 127 (PL_reg_match_utf8) \
efb30f32
HS
128 ? reghopmaybe((U8*)pos, -off) \
129 : (pos - off >= PL_bostr) \
130 ? (U8*)(pos - off) \
131 : (U8*)NULL \
132)
133#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
134
1aa99e6b
IH
135#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
136#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
137#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
138#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
139#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
140#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
141
1a4fad37
AL
142#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
143 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
144#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
145#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
146#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
147#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 148
5f80c4cf 149/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
150#define JUMPABLE(rn) ( \
151 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
152 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
153 OP(rn) == PLUS || OP(rn) == MINMOD || \
154 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
155)
156
cca55fe3
JP
157#define HAS_TEXT(rn) ( \
158 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
159)
e2d8ce26 160
a84d97b6
HS
161/*
162 Search for mandatory following text node; for lookahead, the text must
163 follow but for lookbehind (rn->flags != 0) we skip to the next step.
164*/
cca55fe3 165#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 166 while (JUMPABLE(rn)) \
a84d97b6 167 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 168 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
169 else if (OP(rn) == PLUS) \
170 rn = NEXTOPER(rn); \
a84d97b6
HS
171 else if (OP(rn) == IFMATCH) \
172 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 173 else rn += NEXT_OFF(rn); \
5f80c4cf 174} STMT_END
74750237 175
acfe0abc 176static void restore_pos(pTHX_ void *arg);
51371543 177
76e3520e 178STATIC CHECKPOINT
cea2e8a9 179S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 180{
97aff369 181 dVAR;
a3b680e6 182 const int retval = PL_savestack_ix;
b1ce53c5 183#define REGCP_PAREN_ELEMS 4
a3b680e6 184 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
185 int p;
186
e49a9654
IH
187 if (paren_elems_to_push < 0)
188 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
189
a01268b5 190#define REGCP_OTHER_ELEMS 6
4b3c1a47 191 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 192 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 193/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
194 SSPUSHINT(PL_regendp[p]);
195 SSPUSHINT(PL_regstartp[p]);
3280af22 196 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
197 SSPUSHINT(p);
198 }
b1ce53c5 199/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
200 SSPUSHINT(PL_regsize);
201 SSPUSHINT(*PL_reglastparen);
a01268b5 202 SSPUSHINT(*PL_reglastcloseparen);
3280af22 203 SSPUSHPTR(PL_reginput);
41123dfd
JH
204#define REGCP_FRAME_ELEMS 2
205/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
206 * are needed for the regexp context stack bookkeeping. */
207 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 208 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 209
a0d0e21e
LW
210 return retval;
211}
212
c277df42 213/* These are needed since we do not localize EVAL nodes: */
a3621e74 214# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 215 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 216 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 217
a3621e74 218# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 219 PerlIO_printf(Perl_debug_log, \
faccc32b 220 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 221 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 222
76e3520e 223STATIC char *
cea2e8a9 224S_regcppop(pTHX)
a0d0e21e 225{
97aff369 226 dVAR;
b1ce53c5 227 I32 i;
a0d0e21e
LW
228 U32 paren = 0;
229 char *input;
b1ce53c5 230
a3621e74
YO
231 GET_RE_DEBUG_FLAGS_DECL;
232
b1ce53c5 233 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 234 i = SSPOPINT;
b1ce53c5
JH
235 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
236 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 237 input = (char *) SSPOPPTR;
a01268b5 238 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
239 *PL_reglastparen = SSPOPINT;
240 PL_regsize = SSPOPINT;
b1ce53c5
JH
241
242 /* Now restore the parentheses context. */
41123dfd
JH
243 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
244 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 245 I32 tmps;
a0d0e21e 246 paren = (U32)SSPOPINT;
3280af22 247 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
248 PL_regstartp[paren] = SSPOPINT;
249 tmps = SSPOPINT;
3280af22
NIS
250 if (paren <= *PL_reglastparen)
251 PL_regendp[paren] = tmps;
a3621e74 252 DEBUG_EXECUTE_r(
c3464db5 253 PerlIO_printf(Perl_debug_log,
b900a521 254 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 255 (UV)paren, (IV)PL_regstartp[paren],
b900a521 256 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 257 (IV)PL_regendp[paren],
3280af22 258 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 259 );
a0d0e21e 260 }
a3621e74 261 DEBUG_EXECUTE_r(
eb160463 262 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 263 PerlIO_printf(Perl_debug_log,
faccc32b
JH
264 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
265 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
266 }
267 );
daf18116 268#if 1
dafc8851
JH
269 /* It would seem that the similar code in regtry()
270 * already takes care of this, and in fact it is in
271 * a better location to since this code can #if 0-ed out
272 * but the code in regtry() is needed or otherwise tests
273 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
274 * (as of patchlevel 7877) will fail. Then again,
275 * this code seems to be necessary or otherwise
276 * building DynaLoader will fail:
277 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
278 * --jhi */
eb160463
GS
279 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
280 if ((I32)paren > PL_regsize)
cf93c79d
IZ
281 PL_regstartp[paren] = -1;
282 PL_regendp[paren] = -1;
a0d0e21e 283 }
dafc8851 284#endif
a0d0e21e
LW
285 return input;
286}
287
0f5d15d6
IZ
288typedef struct re_cc_state
289{
290 I32 ss;
291 regnode *node;
292 struct re_cc_state *prev;
293 CURCUR *cc;
294 regexp *re;
295} re_cc_state;
296
02db2b7b 297#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 298
95b24440 299#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
300 if (paren) { \
301 if (n) { \
302 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
303 PL_regendp[paren] = input - PL_bostr; \
304 } \
305 else \
306 PL_regendp[paren] = -1; \
307 } \
95b24440
DM
308 REGMATCH(next, where); \
309 if (result) \
29d1e993
HS
310 sayYES; \
311 if (paren && n) \
312 PL_regendp[paren] = -1; \
313}
314
315
a687059c 316/*
e50aee73 317 * pregexec and friends
a687059c
LW
318 */
319
320/*
c277df42 321 - pregexec - match a regexp against a string
a687059c 322 */
c277df42 323I32
864dbfa3 324Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 325 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
326/* strend: pointer to null at end of string */
327/* strbeg: real beginning of string */
328/* minend: end of match must be >=minend after stringarg. */
329/* nosave: For optimizations. */
330{
331 return
9041c2e3 332 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
333 nosave ? 0 : REXEC_COPY_STR);
334}
0f5d15d6
IZ
335
336STATIC void
cea2e8a9 337S_cache_re(pTHX_ regexp *prog)
0f5d15d6 338{
97aff369 339 dVAR;
0f5d15d6
IZ
340 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
341#ifdef DEBUGGING
342 PL_regprogram = prog->program;
343#endif
344 PL_regnpar = prog->nparens;
9041c2e3
NIS
345 PL_regdata = prog->data;
346 PL_reg_re = prog;
0f5d15d6 347}
22e551b9 348
9041c2e3 349/*
cad2e5aa
JH
350 * Need to implement the following flags for reg_anch:
351 *
352 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
353 * USE_INTUIT_ML
354 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
355 * INTUIT_AUTORITATIVE_ML
356 * INTUIT_ONCE_NOML - Intuit can match in one location only.
357 * INTUIT_ONCE_ML
358 *
359 * Another flag for this function: SECOND_TIME (so that float substrs
360 * with giant delta may be not rechecked).
361 */
362
363/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
364
3f7c398e 365/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
366 Otherwise, only SvCUR(sv) is used to get strbeg. */
367
368/* XXXX We assume that strpos is strbeg unless sv. */
369
6eb5f6b9
JH
370/* XXXX Some places assume that there is a fixed substring.
371 An update may be needed if optimizer marks as "INTUITable"
372 RExen without fixed substrings. Similarly, it is assumed that
373 lengths of all the strings are no more than minlen, thus they
374 cannot come from lookahead.
375 (Or minlen should take into account lookahead.) */
376
2c2d71f5
JH
377/* A failure to find a constant substring means that there is no need to make
378 an expensive call to REx engine, thus we celebrate a failure. Similarly,
379 finding a substring too deep into the string means that less calls to
30944b6d
IZ
380 regtry() should be needed.
381
382 REx compiler's optimizer found 4 possible hints:
383 a) Anchored substring;
384 b) Fixed substring;
385 c) Whether we are anchored (beginning-of-line or \G);
386 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 387 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
388 string which does not contradict any of them.
389 */
2c2d71f5 390
6eb5f6b9
JH
391/* Most of decisions we do here should have been done at compile time.
392 The nodes of the REx which we used for the search should have been
393 deleted from the finite automaton. */
394
cad2e5aa
JH
395char *
396Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
397 char *strend, U32 flags, re_scream_pos_data *data)
398{
97aff369 399 dVAR;
b7953727 400 register I32 start_shift = 0;
cad2e5aa 401 /* Should be nonnegative! */
b7953727 402 register I32 end_shift = 0;
2c2d71f5
JH
403 register char *s;
404 register SV *check;
a1933d95 405 char *strbeg;
cad2e5aa 406 char *t;
a3b680e6 407 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 408 I32 ml_anch;
bd61b366
SS
409 register char *other_last = NULL; /* other substr checked before this */
410 char *check_at = NULL; /* check substr found at this pos */
1df70142 411 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 412#ifdef DEBUGGING
890ce7af
AL
413 const char * const i_strpos = strpos;
414 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 415#endif
a3621e74
YO
416
417 GET_RE_DEBUG_FLAGS_DECL;
418
a30b2f1f 419 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 420
b8d68ded 421 if (prog->reganch & ROPT_UTF8) {
a3621e74 422 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
423 "UTF-8 regex...\n"));
424 PL_reg_flags |= RF_utf8;
425 }
426
a3621e74 427 DEBUG_EXECUTE_r({
1df70142 428 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
429 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
430 strpos;
1df70142 431 const int len = PL_reg_match_utf8 ?
b8d68ded 432 strlen(s) : strend - strpos;
2a782b5b
JH
433 if (!PL_colorset)
434 reginitcolors();
b8d68ded 435 if (PL_reg_match_utf8)
a3621e74 436 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 437 "UTF-8 target...\n"));
2a782b5b 438 PerlIO_printf(Perl_debug_log,
a0288114 439 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 440 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
441 prog->precomp,
442 PL_colors[1],
443 (strlen(prog->precomp) > 60 ? "..." : ""),
444 PL_colors[0],
445 (int)(len > 60 ? 60 : len),
446 s, PL_colors[1],
447 (len > 60 ? "..." : "")
448 );
449 });
cad2e5aa 450
c344f387
JH
451 /* CHR_DIST() would be more correct here but it makes things slow. */
452 if (prog->minlen > strend - strpos) {
a3621e74 453 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 454 "String too short... [re_intuit_start]\n"));
cad2e5aa 455 goto fail;
2c2d71f5 456 }
a1933d95 457 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 458 PL_regeol = strend;
33b8afdf
JH
459 if (do_utf8) {
460 if (!prog->check_utf8 && prog->check_substr)
461 to_utf8_substr(prog);
462 check = prog->check_utf8;
463 } else {
464 if (!prog->check_substr && prog->check_utf8)
465 to_byte_substr(prog);
466 check = prog->check_substr;
467 }
468 if (check == &PL_sv_undef) {
a3621e74 469 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
470 "Non-utf string cannot match utf check string\n"));
471 goto fail;
472 }
2c2d71f5 473 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
474 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
475 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 476 && !multiline ) ); /* Check after \n? */
cad2e5aa 477
7e25d62c
JH
478 if (!ml_anch) {
479 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
480 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 481 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
482 && sv && !SvROK(sv)
483 && (strpos != strbeg)) {
a3621e74 484 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
485 goto fail;
486 }
487 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 488 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 489 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
490 I32 slen;
491
1aa99e6b 492 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
493 if (SvTAIL(check)) {
494 slen = SvCUR(check); /* >= 1 */
cad2e5aa 495
9041c2e3 496 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 497 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 498 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 499 goto fail_finish;
cad2e5aa
JH
500 }
501 /* Now should match s[0..slen-2] */
502 slen--;
3f7c398e 503 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 504 || (slen > 1
3f7c398e 505 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 506 report_neq:
a3621e74 507 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
508 goto fail_finish;
509 }
cad2e5aa 510 }
3f7c398e 511 else if (*SvPVX_const(check) != *s
653099ff 512 || ((slen = SvCUR(check)) > 1
3f7c398e 513 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 514 goto report_neq;
c315bfe8 515 check_at = s;
2c2d71f5 516 goto success_at_start;
7e25d62c 517 }
cad2e5aa 518 }
2c2d71f5 519 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 520 s = strpos;
2c2d71f5 521 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 522 end_shift = prog->minlen - start_shift -
653099ff 523 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 524 if (!ml_anch) {
a3b680e6 525 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 526 - (SvTAIL(check) != 0);
a3b680e6 527 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
528
529 if (end_shift < eshift)
530 end_shift = eshift;
531 }
cad2e5aa 532 }
2c2d71f5 533 else { /* Can match at random position */
cad2e5aa
JH
534 ml_anch = 0;
535 s = strpos;
2c2d71f5
JH
536 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
537 /* Should be nonnegative! */
538 end_shift = prog->minlen - start_shift -
653099ff 539 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
540 }
541
2c2d71f5 542#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 543 if (end_shift < 0)
6bbae5e6 544 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
545#endif
546
2c2d71f5
JH
547 restart:
548 /* Find a possible match in the region s..strend by looking for
549 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 550 if (flags & REXEC_SCREAM) {
cad2e5aa 551 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 552 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 553
2c2d71f5
JH
554 if (PL_screamfirst[BmRARE(check)] >= 0
555 || ( BmRARE(check) == '\n'
556 && (BmPREVIOUS(check) == SvCUR(check) - 1)
557 && SvTAIL(check) ))
9041c2e3 558 s = screaminstr(sv, check,
2c2d71f5 559 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 560 else
2c2d71f5 561 goto fail_finish;
4addbd3b
HS
562 /* we may be pointing at the wrong string */
563 if (s && RX_MATCH_COPIED(prog))
3f7c398e 564 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
565 if (data)
566 *data->scream_olds = s;
567 }
f33976b4 568 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
569 s = fbm_instr((U8*)(s + start_shift),
570 (U8*)(strend - end_shift),
7fba1cd6 571 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 572 else
1aa99e6b
IH
573 s = fbm_instr(HOP3(s, start_shift, strend),
574 HOP3(strend, -end_shift, strbeg),
7fba1cd6 575 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
576
577 /* Update the count-of-usability, remove useless subpatterns,
578 unshift s. */
2c2d71f5 579
a0288114 580 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 581 (s ? "Found" : "Did not find"),
33b8afdf 582 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 583 PL_colors[0],
7b0972df 584 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 585 SvPVX_const(check),
2c2d71f5
JH
586 PL_colors[1], (SvTAIL(check) ? "$" : ""),
587 (s ? " at offset " : "...\n") ) );
588
589 if (!s)
590 goto fail_finish;
591
6eb5f6b9
JH
592 check_at = s;
593
2c2d71f5 594 /* Finish the diagnostic message */
a3621e74 595 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
596
597 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
598 Start with the other substr.
599 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 600 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
601 *always* match. Probably should be marked during compile...
602 Probably it is right to do no SCREAM here...
603 */
604
33b8afdf 605 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 606 /* Take into account the "other" substring. */
2c2d71f5
JH
607 /* XXXX May be hopelessly wrong for UTF... */
608 if (!other_last)
6eb5f6b9 609 other_last = strpos;
33b8afdf 610 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
611 do_other_anchored:
612 {
890ce7af
AL
613 char * const last = HOP3c(s, -start_shift, strbeg);
614 char *last1, *last2;
2c2d71f5 615 char *s1 = s;
33b8afdf 616 SV* must;
2c2d71f5 617
2c2d71f5
JH
618 t = s - prog->check_offset_max;
619 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 620 && (!do_utf8
1aa99e6b 621 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 622 && t > strpos)))
30944b6d 623 /* EMPTY */;
2c2d71f5
JH
624 else
625 t = strpos;
1aa99e6b 626 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
627 if (t < other_last) /* These positions already checked */
628 t = other_last;
1aa99e6b 629 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
630 if (last < last1)
631 last1 = last;
632 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
633 /* On end-of-str: see comment below. */
33b8afdf
JH
634 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
635 if (must == &PL_sv_undef) {
636 s = (char*)NULL;
a3621e74 637 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
638 }
639 else
640 s = fbm_instr(
641 (unsigned char*)t,
642 HOP3(HOP3(last1, prog->anchored_offset, strend)
643 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
644 must,
7fba1cd6 645 multiline ? FBMrf_MULTILINE : 0
33b8afdf 646 );
a3621e74 647 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 648 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
649 (s ? "Found" : "Contradicts"),
650 PL_colors[0],
33b8afdf
JH
651 (int)(SvCUR(must)
652 - (SvTAIL(must)!=0)),
3f7c398e 653 SvPVX_const(must),
33b8afdf 654 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
655 if (!s) {
656 if (last1 >= last2) {
a3621e74 657 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
658 ", giving up...\n"));
659 goto fail_finish;
660 }
a3621e74 661 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 662 ", trying floating at offset %ld...\n",
1aa99e6b
IH
663 (long)(HOP3c(s1, 1, strend) - i_strpos)));
664 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
665 s = HOP3c(last, 1, strend);
2c2d71f5
JH
666 goto restart;
667 }
668 else {
a3621e74 669 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 670 (long)(s - i_strpos)));
1aa99e6b
IH
671 t = HOP3c(s, -prog->anchored_offset, strbeg);
672 other_last = HOP3c(s, 1, strend);
30944b6d 673 s = s1;
2c2d71f5
JH
674 if (t == strpos)
675 goto try_at_start;
2c2d71f5
JH
676 goto try_at_offset;
677 }
30944b6d 678 }
2c2d71f5
JH
679 }
680 else { /* Take into account the floating substring. */
33b8afdf
JH
681 char *last, *last1;
682 char *s1 = s;
683 SV* must;
684
685 t = HOP3c(s, -start_shift, strbeg);
686 last1 = last =
687 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
688 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
689 last = HOP3c(t, prog->float_max_offset, strend);
690 s = HOP3c(t, prog->float_min_offset, strend);
691 if (s < other_last)
692 s = other_last;
2c2d71f5 693 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
694 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
695 /* fbm_instr() takes into account exact value of end-of-str
696 if the check is SvTAIL(ed). Since false positives are OK,
697 and end-of-str is not later than strend we are OK. */
698 if (must == &PL_sv_undef) {
699 s = (char*)NULL;
a3621e74 700 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
701 }
702 else
2c2d71f5 703 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
704 (unsigned char*)last + SvCUR(must)
705 - (SvTAIL(must)!=0),
7fba1cd6 706 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 707 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
708 (s ? "Found" : "Contradicts"),
709 PL_colors[0],
710 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 711 SvPVX_const(must),
33b8afdf
JH
712 PL_colors[1], (SvTAIL(must) ? "$" : "")));
713 if (!s) {
714 if (last1 == last) {
a3621e74 715 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
716 ", giving up...\n"));
717 goto fail_finish;
2c2d71f5 718 }
a3621e74 719 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
720 ", trying anchored starting at offset %ld...\n",
721 (long)(s1 + 1 - i_strpos)));
722 other_last = last;
723 s = HOP3c(t, 1, strend);
724 goto restart;
725 }
726 else {
a3621e74 727 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
728 (long)(s - i_strpos)));
729 other_last = s; /* Fix this later. --Hugo */
730 s = s1;
731 if (t == strpos)
732 goto try_at_start;
733 goto try_at_offset;
734 }
2c2d71f5 735 }
cad2e5aa 736 }
2c2d71f5
JH
737
738 t = s - prog->check_offset_max;
2c2d71f5 739 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 740 && (!do_utf8
1aa99e6b
IH
741 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
742 && t > strpos))) {
2c2d71f5
JH
743 /* Fixed substring is found far enough so that the match
744 cannot start at strpos. */
745 try_at_offset:
cad2e5aa 746 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
747 /* Eventually fbm_*() should handle this, but often
748 anchored_offset is not 0, so this check will not be wasted. */
749 /* XXXX In the code below we prefer to look for "^" even in
750 presence of anchored substrings. And we search even
751 beyond the found float position. These pessimizations
752 are historical artefacts only. */
753 find_anchor:
2c2d71f5 754 while (t < strend - prog->minlen) {
cad2e5aa 755 if (*t == '\n') {
4ee3650e 756 if (t < check_at - prog->check_offset_min) {
33b8afdf 757 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
758 /* Since we moved from the found position,
759 we definitely contradict the found anchored
30944b6d
IZ
760 substr. Due to the above check we do not
761 contradict "check" substr.
762 Thus we can arrive here only if check substr
763 is float. Redo checking for "other"=="fixed".
764 */
9041c2e3 765 strpos = t + 1;
a3621e74 766 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 767 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
768 goto do_other_anchored;
769 }
4ee3650e
GS
770 /* We don't contradict the found floating substring. */
771 /* XXXX Why not check for STCLASS? */
cad2e5aa 772 s = t + 1;
a3621e74 773 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 774 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
775 goto set_useful;
776 }
4ee3650e
GS
777 /* Position contradicts check-string */
778 /* XXXX probably better to look for check-string
779 than for "\n", so one should lower the limit for t? */
a3621e74 780 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 781 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 782 other_last = strpos = s = t + 1;
cad2e5aa
JH
783 goto restart;
784 }
785 t++;
786 }
a3621e74 787 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 788 PL_colors[0], PL_colors[1]));
2c2d71f5 789 goto fail_finish;
cad2e5aa 790 }
f5952150 791 else {
a3621e74 792 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 793 PL_colors[0], PL_colors[1]));
f5952150 794 }
cad2e5aa
JH
795 s = t;
796 set_useful:
33b8afdf 797 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
798 }
799 else {
f5952150 800 /* The found string does not prohibit matching at strpos,
2c2d71f5 801 - no optimization of calling REx engine can be performed,
f5952150
GS
802 unless it was an MBOL and we are not after MBOL,
803 or a future STCLASS check will fail this. */
2c2d71f5
JH
804 try_at_start:
805 /* Even in this situation we may use MBOL flag if strpos is offset
806 wrt the start of the string. */
05b4157f 807 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 808 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
809 /* May be due to an implicit anchor of m{.*foo} */
810 && !(prog->reganch & ROPT_IMPLICIT))
811 {
cad2e5aa
JH
812 t = strpos;
813 goto find_anchor;
814 }
a3621e74 815 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 816 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 817 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 818 );
2c2d71f5 819 success_at_start:
30944b6d 820 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
821 && (do_utf8 ? (
822 prog->check_utf8 /* Could be deleted already */
823 && --BmUSEFUL(prog->check_utf8) < 0
824 && (prog->check_utf8 == prog->float_utf8)
825 ) : (
826 prog->check_substr /* Could be deleted already */
827 && --BmUSEFUL(prog->check_substr) < 0
828 && (prog->check_substr == prog->float_substr)
829 )))
66e933ab 830 {
cad2e5aa 831 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 832 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
833 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
834 if (do_utf8 ? prog->check_substr : prog->check_utf8)
835 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
836 prog->check_substr = prog->check_utf8 = NULL; /* disable */
837 prog->float_substr = prog->float_utf8 = NULL; /* clear */
838 check = NULL; /* abort */
cad2e5aa 839 s = strpos;
3cf5c195
IZ
840 /* XXXX This is a remnant of the old implementation. It
841 looks wasteful, since now INTUIT can use many
6eb5f6b9 842 other heuristics. */
cad2e5aa
JH
843 prog->reganch &= ~RE_USE_INTUIT;
844 }
845 else
846 s = strpos;
847 }
848
6eb5f6b9
JH
849 /* Last resort... */
850 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
851 if (prog->regstclass) {
852 /* minlen == 0 is possible if regstclass is \b or \B,
853 and the fixed substr is ''$.
854 Since minlen is already taken into account, s+1 is before strend;
855 accidentally, minlen >= 1 guaranties no false positives at s + 1
856 even for \b or \B. But (minlen? 1 : 0) below assumes that
857 regstclass does not come from lookahead... */
858 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
859 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 860 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 861 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 862 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 863 : 1);
a3b680e6 864 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 865 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 866 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
867 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
868 cl_l, strend)
869 : strend);
6eb5f6b9
JH
870
871 t = s;
9926ca43 872 cache_re(prog);
06b5626a 873 s = find_byclass(prog, prog->regstclass, s, endpos, 1);
6eb5f6b9
JH
874 if (!s) {
875#ifdef DEBUGGING
cbbf8932 876 const char *what = NULL;
6eb5f6b9
JH
877#endif
878 if (endpos == strend) {
a3621e74 879 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
880 "Could not match STCLASS...\n") );
881 goto fail;
882 }
a3621e74 883 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 884 "This position contradicts STCLASS...\n") );
653099ff
GS
885 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
886 goto fail;
6eb5f6b9 887 /* Contradict one of substrings */
33b8afdf
JH
888 if (prog->anchored_substr || prog->anchored_utf8) {
889 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 890 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 891 hop_and_restart:
1aa99e6b 892 s = HOP3c(t, 1, strend);
66e933ab
GS
893 if (s + start_shift + end_shift > strend) {
894 /* XXXX Should be taken into account earlier? */
a3621e74 895 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
896 "Could not match STCLASS...\n") );
897 goto fail;
898 }
5e39e1e5
HS
899 if (!check)
900 goto giveup;
a3621e74 901 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 902 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
903 what, (long)(s + start_shift - i_strpos)) );
904 goto restart;
905 }
66e933ab 906 /* Have both, check_string is floating */
6eb5f6b9
JH
907 if (t + start_shift >= check_at) /* Contradicts floating=check */
908 goto retry_floating_check;
909 /* Recheck anchored substring, but not floating... */
9041c2e3 910 s = check_at;
5e39e1e5
HS
911 if (!check)
912 goto giveup;
a3621e74 913 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 914 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
915 (long)(other_last - i_strpos)) );
916 goto do_other_anchored;
917 }
60e71179
GS
918 /* Another way we could have checked stclass at the
919 current position only: */
920 if (ml_anch) {
921 s = t = t + 1;
5e39e1e5
HS
922 if (!check)
923 goto giveup;
a3621e74 924 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 925 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 926 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 927 goto try_at_offset;
66e933ab 928 }
33b8afdf 929 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 930 goto fail;
6eb5f6b9
JH
931 /* Check is floating subtring. */
932 retry_floating_check:
933 t = check_at - start_shift;
a3621e74 934 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
935 goto hop_and_restart;
936 }
b7953727 937 if (t != s) {
a3621e74 938 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 939 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
940 (long)(t - i_strpos), (long)(s - i_strpos))
941 );
942 }
943 else {
a3621e74 944 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
945 "Does not contradict STCLASS...\n");
946 );
947 }
6eb5f6b9 948 }
5e39e1e5 949 giveup:
a3621e74 950 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
951 PL_colors[4], (check ? "Guessed" : "Giving up"),
952 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 953 return s;
2c2d71f5
JH
954
955 fail_finish: /* Substring not found */
33b8afdf
JH
956 if (prog->check_substr || prog->check_utf8) /* could be removed already */
957 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 958 fail:
a3621e74 959 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 960 PL_colors[4], PL_colors[5]));
bd61b366 961 return NULL;
cad2e5aa 962}
9661b544 963
6eb5f6b9 964/* We know what class REx starts with. Try to find this position... */
3c3eec57 965STATIC char *
a3b680e6 966S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, I32 norun)
a687059c 967{
27da23d5 968 dVAR;
1df70142 969 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 970 char *m;
d8093b23 971 STRLEN ln;
5dab1207 972 STRLEN lnc;
078c425b 973 register STRLEN uskip;
d8093b23
G
974 unsigned int c1;
975 unsigned int c2;
6eb5f6b9
JH
976 char *e;
977 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 978 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 979
6eb5f6b9
JH
980 /* We know what class it must start with. */
981 switch (OP(c)) {
6eb5f6b9 982 case ANYOF:
388cc4de 983 if (do_utf8) {
078c425b 984 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
985 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
986 !UTF8_IS_INVARIANT((U8)s[0]) ?
987 reginclass(c, (U8*)s, 0, do_utf8) :
988 REGINCLASS(c, (U8*)s)) {
989 if (tmp && (norun || regtry(prog, s)))
990 goto got_it;
991 else
992 tmp = doevery;
993 }
994 else
995 tmp = 1;
078c425b 996 s += uskip;
388cc4de
HS
997 }
998 }
999 else {
1000 while (s < strend) {
1001 STRLEN skip = 1;
1002
1003 if (REGINCLASS(c, (U8*)s) ||
1004 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1005 /* The assignment of 2 is intentional:
1006 * for the folded sharp s, the skip is 2. */
1007 (skip = SHARP_S_SKIP))) {
1008 if (tmp && (norun || regtry(prog, s)))
1009 goto got_it;
1010 else
1011 tmp = doevery;
1012 }
1013 else
1014 tmp = 1;
1015 s += skip;
1016 }
a0d0e21e 1017 }
6eb5f6b9 1018 break;
f33976b4
DB
1019 case CANY:
1020 while (s < strend) {
1021 if (tmp && (norun || regtry(prog, s)))
1022 goto got_it;
1023 else
1024 tmp = doevery;
1025 s++;
1026 }
1027 break;
6eb5f6b9 1028 case EXACTF:
5dab1207
NIS
1029 m = STRING(c);
1030 ln = STR_LEN(c); /* length to match in octets/bytes */
1031 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1032 if (UTF) {
a2a2844f 1033 STRLEN ulen1, ulen2;
5dab1207 1034 U8 *sm = (U8 *) m;
89ebb4a3
JH
1035 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1036 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
041457d9 1037 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
a2a2844f
JH
1038
1039 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1040 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1041
89ebb4a3 1042 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 1043 0, uniflags);
89ebb4a3 1044 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1045 0, uniflags);
5dab1207
NIS
1046 lnc = 0;
1047 while (sm < ((U8 *) m + ln)) {
1048 lnc++;
1049 sm += UTF8SKIP(sm);
1050 }
1aa99e6b
IH
1051 }
1052 else {
1053 c1 = *(U8*)m;
1054 c2 = PL_fold[c1];
1055 }
6eb5f6b9
JH
1056 goto do_exactf;
1057 case EXACTFL:
5dab1207
NIS
1058 m = STRING(c);
1059 ln = STR_LEN(c);
1060 lnc = (I32) ln;
d8093b23 1061 c1 = *(U8*)m;
6eb5f6b9
JH
1062 c2 = PL_fold_locale[c1];
1063 do_exactf:
db12adc6 1064 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1065
6eb5f6b9
JH
1066 if (norun && e < s)
1067 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1068
60a8b682
JH
1069 /* The idea in the EXACTF* cases is to first find the
1070 * first character of the EXACTF* node and then, if
1071 * necessary, case-insensitively compare the full
1072 * text of the node. The c1 and c2 are the first
1073 * characters (though in Unicode it gets a bit
1074 * more complicated because there are more cases
7f16dd3d
JH
1075 * than just upper and lower: one needs to use
1076 * the so-called folding case for case-insensitive
1077 * matching (called "loose matching" in Unicode).
1078 * ibcmp_utf8() will do just that. */
60a8b682 1079
1aa99e6b 1080 if (do_utf8) {
575cac57 1081 UV c, f;
89ebb4a3 1082 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1083 STRLEN len, foldlen;
041457d9 1084 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
09091399 1085 if (c1 == c2) {
5dab1207
NIS
1086 /* Upper and lower of 1st char are equal -
1087 * probably not a "letter". */
1aa99e6b 1088 while (s <= e) {
89ebb4a3 1089 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1090 uniflags);
80aecb99
JH
1091 if ( c == c1
1092 && (ln == len ||
66423254 1093 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1094 m, (char **)0, ln, (bool)UTF))
55da9344 1095 && (norun || regtry(prog, s)) )
1aa99e6b 1096 goto got_it;
80aecb99 1097 else {
1df70142 1098 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1099 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1100 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1101 if ( f != c
1102 && (f == c1 || f == c2)
1103 && (ln == foldlen ||
66423254
JH
1104 !ibcmp_utf8((char *) foldbuf,
1105 (char **)0, foldlen, do_utf8,
d07ddd77 1106 m,
eb160463 1107 (char **)0, ln, (bool)UTF))
80aecb99
JH
1108 && (norun || regtry(prog, s)) )
1109 goto got_it;
1110 }
1aa99e6b
IH
1111 s += len;
1112 }
09091399
JH
1113 }
1114 else {
1aa99e6b 1115 while (s <= e) {
89ebb4a3 1116 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1117 uniflags);
80aecb99 1118
60a8b682 1119 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1120 * Note that not all the possible combinations
1121 * are handled here: some of them are handled
1122 * by the standard folding rules, and some of
1123 * them (the character class or ANYOF cases)
1124 * are handled during compiletime in
1125 * regexec.c:S_regclass(). */
880bd946
JH
1126 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1127 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1128 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1129
1130 if ( (c == c1 || c == c2)
1131 && (ln == len ||
66423254 1132 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1133 m, (char **)0, ln, (bool)UTF))
55da9344 1134 && (norun || regtry(prog, s)) )
1aa99e6b 1135 goto got_it;
80aecb99 1136 else {
1df70142 1137 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1138 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1139 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1140 if ( f != c
1141 && (f == c1 || f == c2)
1142 && (ln == foldlen ||
a6872d42 1143 !ibcmp_utf8((char *) foldbuf,
66423254 1144 (char **)0, foldlen, do_utf8,
d07ddd77 1145 m,
eb160463 1146 (char **)0, ln, (bool)UTF))
80aecb99
JH
1147 && (norun || regtry(prog, s)) )
1148 goto got_it;
1149 }
1aa99e6b
IH
1150 s += len;
1151 }
09091399 1152 }
1aa99e6b
IH
1153 }
1154 else {
1155 if (c1 == c2)
1156 while (s <= e) {
1157 if ( *(U8*)s == c1
1158 && (ln == 1 || !(OP(c) == EXACTF
1159 ? ibcmp(s, m, ln)
1160 : ibcmp_locale(s, m, ln)))
1161 && (norun || regtry(prog, s)) )
1162 goto got_it;
1163 s++;
1164 }
1165 else
1166 while (s <= e) {
1167 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1168 && (ln == 1 || !(OP(c) == EXACTF
1169 ? ibcmp(s, m, ln)
1170 : ibcmp_locale(s, m, ln)))
1171 && (norun || regtry(prog, s)) )
1172 goto got_it;
1173 s++;
1174 }
b3c9acc1
IZ
1175 }
1176 break;
bbce6d69 1177 case BOUNDL:
3280af22 1178 PL_reg_flags |= RF_tainted;
bbce6d69 1179 /* FALL THROUGH */
a0d0e21e 1180 case BOUND:
ffc61ed2 1181 if (do_utf8) {
12d33761 1182 if (s == PL_bostr)
ffc61ed2
JH
1183 tmp = '\n';
1184 else {
6136c704 1185 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
b4f7163a 1186 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1187 }
1188 tmp = ((OP(c) == BOUND ?
9041c2e3 1189 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1190 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1191 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1192 if (tmp == !(OP(c) == BOUND ?
3568d838 1193 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1194 isALNUM_LC_utf8((U8*)s)))
1195 {
1196 tmp = !tmp;
1197 if ((norun || regtry(prog, s)))
1198 goto got_it;
1199 }
078c425b 1200 s += uskip;
a687059c 1201 }
a0d0e21e 1202 }
667bb95a 1203 else {
12d33761 1204 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1205 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1206 while (s < strend) {
1207 if (tmp ==
1208 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1209 tmp = !tmp;
1210 if ((norun || regtry(prog, s)))
1211 goto got_it;
1212 }
1213 s++;
a0ed51b3 1214 }
a0ed51b3 1215 }
6eb5f6b9 1216 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1217 goto got_it;
1218 break;
bbce6d69 1219 case NBOUNDL:
3280af22 1220 PL_reg_flags |= RF_tainted;
bbce6d69 1221 /* FALL THROUGH */
a0d0e21e 1222 case NBOUND:
ffc61ed2 1223 if (do_utf8) {
12d33761 1224 if (s == PL_bostr)
ffc61ed2
JH
1225 tmp = '\n';
1226 else {
6136c704 1227 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
b4f7163a 1228 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1229 }
1230 tmp = ((OP(c) == NBOUND ?
9041c2e3 1231 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1232 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1233 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1234 if (tmp == !(OP(c) == NBOUND ?
3568d838 1235 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1236 isALNUM_LC_utf8((U8*)s)))
1237 tmp = !tmp;
1238 else if ((norun || regtry(prog, s)))
1239 goto got_it;
078c425b 1240 s += uskip;
ffc61ed2 1241 }
a0d0e21e 1242 }
667bb95a 1243 else {
12d33761 1244 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1245 tmp = ((OP(c) == NBOUND ?
1246 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1247 while (s < strend) {
1248 if (tmp ==
1249 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1250 tmp = !tmp;
1251 else if ((norun || regtry(prog, s)))
1252 goto got_it;
1253 s++;
1254 }
a0ed51b3 1255 }
6eb5f6b9 1256 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1257 goto got_it;
1258 break;
a0d0e21e 1259 case ALNUM:
ffc61ed2 1260 if (do_utf8) {
1a4fad37 1261 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1262 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1263 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
bbce6d69 1269 else
ffc61ed2 1270 tmp = 1;
078c425b 1271 s += uskip;
bbce6d69 1272 }
bbce6d69 1273 }
ffc61ed2
JH
1274 else {
1275 while (s < strend) {
1276 if (isALNUM(*s)) {
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0ed51b3 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s++;
a0ed51b3 1285 }
a0ed51b3
LW
1286 }
1287 break;
bbce6d69 1288 case ALNUML:
3280af22 1289 PL_reg_flags |= RF_tainted;
ffc61ed2 1290 if (do_utf8) {
078c425b 1291 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1292 if (isALNUM_LC_utf8((U8*)s)) {
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
a687059c 1298 else
ffc61ed2 1299 tmp = 1;
078c425b 1300 s += uskip;
a0d0e21e 1301 }
a0d0e21e 1302 }
ffc61ed2
JH
1303 else {
1304 while (s < strend) {
1305 if (isALNUM_LC(*s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a0ed51b3 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s++;
a0ed51b3 1314 }
a0ed51b3
LW
1315 }
1316 break;
a0d0e21e 1317 case NALNUM:
ffc61ed2 1318 if (do_utf8) {
1a4fad37 1319 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1320 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1321 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
bbce6d69 1327 else
ffc61ed2 1328 tmp = 1;
078c425b 1329 s += uskip;
bbce6d69 1330 }
bbce6d69 1331 }
ffc61ed2
JH
1332 else {
1333 while (s < strend) {
1334 if (!isALNUM(*s)) {
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
a0ed51b3 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s++;
a0ed51b3 1343 }
a0ed51b3
LW
1344 }
1345 break;
bbce6d69 1346 case NALNUML:
3280af22 1347 PL_reg_flags |= RF_tainted;
ffc61ed2 1348 if (do_utf8) {
078c425b 1349 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1350 if (!isALNUM_LC_utf8((U8*)s)) {
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
a687059c 1356 else
ffc61ed2 1357 tmp = 1;
078c425b 1358 s += uskip;
a687059c 1359 }
a0d0e21e 1360 }
ffc61ed2
JH
1361 else {
1362 while (s < strend) {
1363 if (!isALNUM_LC(*s)) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
a0ed51b3 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s++;
a0ed51b3 1372 }
a0ed51b3
LW
1373 }
1374 break;
a0d0e21e 1375 case SPACE:
ffc61ed2 1376 if (do_utf8) {
1a4fad37 1377 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1378 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1379 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1380 if (tmp && (norun || regtry(prog, s)))
1381 goto got_it;
1382 else
1383 tmp = doevery;
1384 }
a0d0e21e 1385 else
ffc61ed2 1386 tmp = 1;
078c425b 1387 s += uskip;
2304df62 1388 }
a0d0e21e 1389 }
ffc61ed2
JH
1390 else {
1391 while (s < strend) {
1392 if (isSPACE(*s)) {
1393 if (tmp && (norun || regtry(prog, s)))
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
a0ed51b3 1398 else
ffc61ed2
JH
1399 tmp = 1;
1400 s++;
a0ed51b3 1401 }
a0ed51b3
LW
1402 }
1403 break;
bbce6d69 1404 case SPACEL:
3280af22 1405 PL_reg_flags |= RF_tainted;
ffc61ed2 1406 if (do_utf8) {
078c425b 1407 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1408 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1409 if (tmp && (norun || regtry(prog, s)))
1410 goto got_it;
1411 else
1412 tmp = doevery;
1413 }
bbce6d69 1414 else
ffc61ed2 1415 tmp = 1;
078c425b 1416 s += uskip;
bbce6d69 1417 }
bbce6d69 1418 }
ffc61ed2
JH
1419 else {
1420 while (s < strend) {
1421 if (isSPACE_LC(*s)) {
1422 if (tmp && (norun || regtry(prog, s)))
1423 goto got_it;
1424 else
1425 tmp = doevery;
1426 }
a0ed51b3 1427 else
ffc61ed2
JH
1428 tmp = 1;
1429 s++;
a0ed51b3 1430 }
a0ed51b3
LW
1431 }
1432 break;
a0d0e21e 1433 case NSPACE:
ffc61ed2 1434 if (do_utf8) {
1a4fad37 1435 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1436 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1437 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1438 if (tmp && (norun || regtry(prog, s)))
1439 goto got_it;
1440 else
1441 tmp = doevery;
1442 }
a0d0e21e 1443 else
ffc61ed2 1444 tmp = 1;
078c425b 1445 s += uskip;
a687059c 1446 }
a0d0e21e 1447 }
ffc61ed2
JH
1448 else {
1449 while (s < strend) {
1450 if (!isSPACE(*s)) {
1451 if (tmp && (norun || regtry(prog, s)))
1452 goto got_it;
1453 else
1454 tmp = doevery;
1455 }
a0ed51b3 1456 else
ffc61ed2
JH
1457 tmp = 1;
1458 s++;
a0ed51b3 1459 }
a0ed51b3
LW
1460 }
1461 break;
bbce6d69 1462 case NSPACEL:
3280af22 1463 PL_reg_flags |= RF_tainted;
ffc61ed2 1464 if (do_utf8) {
078c425b 1465 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1466 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1467 if (tmp && (norun || regtry(prog, s)))
1468 goto got_it;
1469 else
1470 tmp = doevery;
1471 }
bbce6d69 1472 else
ffc61ed2 1473 tmp = 1;
078c425b 1474 s += uskip;
bbce6d69 1475 }
bbce6d69 1476 }
ffc61ed2
JH
1477 else {
1478 while (s < strend) {
1479 if (!isSPACE_LC(*s)) {
1480 if (tmp && (norun || regtry(prog, s)))
1481 goto got_it;
1482 else
1483 tmp = doevery;
1484 }
a0ed51b3 1485 else
ffc61ed2
JH
1486 tmp = 1;
1487 s++;
a0ed51b3 1488 }
a0ed51b3
LW
1489 }
1490 break;
a0d0e21e 1491 case DIGIT:
ffc61ed2 1492 if (do_utf8) {
1a4fad37 1493 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1494 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1495 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1496 if (tmp && (norun || regtry(prog, s)))
1497 goto got_it;
1498 else
1499 tmp = doevery;
1500 }
a0d0e21e 1501 else
ffc61ed2 1502 tmp = 1;
078c425b 1503 s += uskip;
2b69d0c2 1504 }
a0d0e21e 1505 }
ffc61ed2
JH
1506 else {
1507 while (s < strend) {
1508 if (isDIGIT(*s)) {
1509 if (tmp && (norun || regtry(prog, s)))
1510 goto got_it;
1511 else
1512 tmp = doevery;
1513 }
a0ed51b3 1514 else
ffc61ed2
JH
1515 tmp = 1;
1516 s++;
a0ed51b3 1517 }
a0ed51b3
LW
1518 }
1519 break;
b8c5462f
JH
1520 case DIGITL:
1521 PL_reg_flags |= RF_tainted;
ffc61ed2 1522 if (do_utf8) {
078c425b 1523 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1524 if (isDIGIT_LC_utf8((U8*)s)) {
1525 if (tmp && (norun || regtry(prog, s)))
1526 goto got_it;
1527 else
1528 tmp = doevery;
1529 }
b8c5462f 1530 else
ffc61ed2 1531 tmp = 1;
078c425b 1532 s += uskip;
b8c5462f 1533 }
b8c5462f 1534 }
ffc61ed2
JH
1535 else {
1536 while (s < strend) {
1537 if (isDIGIT_LC(*s)) {
1538 if (tmp && (norun || regtry(prog, s)))
1539 goto got_it;
1540 else
1541 tmp = doevery;
1542 }
b8c5462f 1543 else
ffc61ed2
JH
1544 tmp = 1;
1545 s++;
b8c5462f 1546 }
b8c5462f
JH
1547 }
1548 break;
a0d0e21e 1549 case NDIGIT:
ffc61ed2 1550 if (do_utf8) {
1a4fad37 1551 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1552 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1553 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1554 if (tmp && (norun || regtry(prog, s)))
1555 goto got_it;
1556 else
1557 tmp = doevery;
1558 }
a0d0e21e 1559 else
ffc61ed2 1560 tmp = 1;
078c425b 1561 s += uskip;
a687059c 1562 }
a0d0e21e 1563 }
ffc61ed2
JH
1564 else {
1565 while (s < strend) {
1566 if (!isDIGIT(*s)) {
1567 if (tmp && (norun || regtry(prog, s)))
1568 goto got_it;
1569 else
1570 tmp = doevery;
1571 }
a0ed51b3 1572 else
ffc61ed2
JH
1573 tmp = 1;
1574 s++;
a0ed51b3 1575 }
a0ed51b3
LW
1576 }
1577 break;
b8c5462f
JH
1578 case NDIGITL:
1579 PL_reg_flags |= RF_tainted;
ffc61ed2 1580 if (do_utf8) {
078c425b 1581 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1582 if (!isDIGIT_LC_utf8((U8*)s)) {
1583 if (tmp && (norun || regtry(prog, s)))
1584 goto got_it;
1585 else
1586 tmp = doevery;
1587 }
b8c5462f 1588 else
ffc61ed2 1589 tmp = 1;
078c425b 1590 s += uskip;
b8c5462f 1591 }
a0ed51b3 1592 }
ffc61ed2
JH
1593 else {
1594 while (s < strend) {
1595 if (!isDIGIT_LC(*s)) {
1596 if (tmp && (norun || regtry(prog, s)))
1597 goto got_it;
1598 else
1599 tmp = doevery;
1600 }
cf93c79d 1601 else
ffc61ed2
JH
1602 tmp = 1;
1603 s++;
b8c5462f 1604 }
b8c5462f
JH
1605 }
1606 break;
b3c9acc1 1607 default:
3c3eec57
GS
1608 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1609 break;
d6a28714 1610 }
6eb5f6b9
JH
1611 return 0;
1612 got_it:
1613 return s;
1614}
1615
1616/*
1617 - regexec_flags - match a regexp against a string
1618 */
1619I32
1620Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1621 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1622/* strend: pointer to null at end of string */
1623/* strbeg: real beginning of string */
1624/* minend: end of match must be >=minend after stringarg. */
1625/* data: May be used for some additional optimizations. */
1626/* nosave: For optimizations. */
1627{
97aff369 1628 dVAR;
6eb5f6b9
JH
1629 register char *s;
1630 register regnode *c;
1631 register char *startpos = stringarg;
6eb5f6b9
JH
1632 I32 minlen; /* must match at least this many chars */
1633 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1634 I32 end_shift = 0; /* Same for the end. */ /* CC */
1635 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1636 char *scream_olds = NULL;
6eb5f6b9 1637 SV* oreplsv = GvSV(PL_replgv);
1df70142 1638 const bool do_utf8 = DO_UTF8(sv);
a3b680e6 1639 const I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1640#ifdef DEBUGGING
6136c704
AL
1641 SV * const dsv0 = PERL_DEBUG_PAD_ZERO(0);
1642 SV * const dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1643#endif
a3621e74
YO
1644
1645 GET_RE_DEBUG_FLAGS_DECL;
1646
9d4ba2ae 1647 PERL_UNUSED_ARG(data);
a30b2f1f 1648 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9 1649
6eb5f6b9
JH
1650 cache_re(prog);
1651#ifdef DEBUGGING
aea4f609 1652 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1653#endif
1654
1655 /* Be paranoid... */
1656 if (prog == NULL || startpos == NULL) {
1657 Perl_croak(aTHX_ "NULL regexp parameter");
1658 return 0;
1659 }
1660
1661 minlen = prog->minlen;
61a36c01 1662 if (strend - startpos < minlen) {
a3621e74 1663 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1664 "String too short [regexec_flags]...\n"));
1665 goto phooey;
1aa99e6b 1666 }
6eb5f6b9 1667
6eb5f6b9
JH
1668 /* Check validity of program. */
1669 if (UCHARAT(prog->program) != REG_MAGIC) {
1670 Perl_croak(aTHX_ "corrupted regexp program");
1671 }
1672
1673 PL_reg_flags = 0;
1674 PL_reg_eval_set = 0;
1675 PL_reg_maxiter = 0;
1676
1677 if (prog->reganch & ROPT_UTF8)
1678 PL_reg_flags |= RF_utf8;
1679
1680 /* Mark beginning of line for ^ and lookbehind. */
1681 PL_regbol = startpos;
1682 PL_bostr = strbeg;
1683 PL_reg_sv = sv;
1684
1685 /* Mark end of line for $ (and such) */
1686 PL_regeol = strend;
1687
1688 /* see how far we have to get to not match where we matched before */
1689 PL_regtill = startpos+minend;
1690
1691 /* We start without call_cc context. */
1692 PL_reg_call_cc = 0;
1693
1694 /* If there is a "must appear" string, look for it. */
1695 s = startpos;
1696
1697 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1698 MAGIC *mg;
1699
1700 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1701 PL_reg_ganch = startpos;
1702 else if (sv && SvTYPE(sv) >= SVt_PVMG
1703 && SvMAGIC(sv)
14befaf4
DM
1704 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1705 && mg->mg_len >= 0) {
6eb5f6b9
JH
1706 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1707 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1708 if (s > PL_reg_ganch)
6eb5f6b9
JH
1709 goto phooey;
1710 s = PL_reg_ganch;
1711 }
1712 }
1713 else /* pos() not defined */
1714 PL_reg_ganch = strbeg;
1715 }
1716
a0714e2c 1717 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1718 re_scream_pos_data d;
1719
1720 d.scream_olds = &scream_olds;
1721 d.scream_pos = &scream_pos;
1722 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1723 if (!s) {
a3621e74 1724 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1725 goto phooey; /* not present */
3fa9c3d7 1726 }
6eb5f6b9
JH
1727 }
1728
a3621e74 1729 DEBUG_EXECUTE_r({
1df70142
AL
1730 const char * const s0 = UTF
1731 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1732 UNI_DISPLAY_REGEX)
1733 : prog->precomp;
1734 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1735 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1736 UNI_DISPLAY_REGEX) : startpos;
1df70142 1737 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1738 if (!PL_colorset)
1739 reginitcolors();
1740 PerlIO_printf(Perl_debug_log,
a0288114 1741 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1742 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1743 len0, len0, s0,
2a782b5b 1744 PL_colors[1],
9e55ce06 1745 len0 > 60 ? "..." : "",
2a782b5b 1746 PL_colors[0],
9e55ce06
JH
1747 (int)(len1 > 60 ? 60 : len1),
1748 s1, PL_colors[1],
1749 (len1 > 60 ? "..." : "")
2a782b5b
JH
1750 );
1751 });
6eb5f6b9
JH
1752
1753 /* Simplest case: anchored match need be tried only once. */
1754 /* [unless only anchor is BOL and multiline is set] */
1755 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1756 if (s == startpos && regtry(prog, startpos))
1757 goto got_it;
7fba1cd6 1758 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1759 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1760 {
1761 char *end;
1762
1763 if (minlen)
1764 dontbother = minlen - 1;
1aa99e6b 1765 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1766 /* for multiline we only have to try after newlines */
33b8afdf 1767 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1768 if (s == startpos)
1769 goto after_try;
1770 while (1) {
1771 if (regtry(prog, s))
1772 goto got_it;
1773 after_try:
1774 if (s >= end)
1775 goto phooey;
1776 if (prog->reganch & RE_USE_INTUIT) {
1777 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1778 if (!s)
1779 goto phooey;
1780 }
1781 else
1782 s++;
1783 }
1784 } else {
1785 if (s > startpos)
1786 s--;
1787 while (s < end) {
1788 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1789 if (regtry(prog, s))
1790 goto got_it;
1791 }
1792 }
1793 }
1794 }
1795 goto phooey;
1796 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1797 if (regtry(prog, PL_reg_ganch))
1798 goto got_it;
1799 goto phooey;
1800 }
1801
1802 /* Messy cases: unanchored match. */
33b8afdf 1803 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1804 /* we have /x+whatever/ */
1805 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1806 char ch;
bf93d4cc
GS
1807#ifdef DEBUGGING
1808 int did_match = 0;
1809#endif
33b8afdf
JH
1810 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1811 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1812 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1813
1aa99e6b 1814 if (do_utf8) {
6eb5f6b9
JH
1815 while (s < strend) {
1816 if (*s == ch) {
a3621e74 1817 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1818 if (regtry(prog, s)) goto got_it;
1819 s += UTF8SKIP(s);
1820 while (s < strend && *s == ch)
1821 s += UTF8SKIP(s);
1822 }
1823 s += UTF8SKIP(s);
1824 }
1825 }
1826 else {
1827 while (s < strend) {
1828 if (*s == ch) {
a3621e74 1829 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1830 if (regtry(prog, s)) goto got_it;
1831 s++;
1832 while (s < strend && *s == ch)
1833 s++;
1834 }
1835 s++;
1836 }
1837 }
a3621e74 1838 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1839 PerlIO_printf(Perl_debug_log,
b7953727
JH
1840 "Did not find anchored character...\n")
1841 );
6eb5f6b9 1842 }
a0714e2c
SS
1843 else if (prog->anchored_substr != NULL
1844 || prog->anchored_utf8 != NULL
1845 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1846 && prog->float_max_offset < strend - s)) {
1847 SV *must;
1848 I32 back_max;
1849 I32 back_min;
1850 char *last;
6eb5f6b9 1851 char *last1; /* Last position checked before */
bf93d4cc
GS
1852#ifdef DEBUGGING
1853 int did_match = 0;
1854#endif
33b8afdf
JH
1855 if (prog->anchored_substr || prog->anchored_utf8) {
1856 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1857 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1858 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1859 back_max = back_min = prog->anchored_offset;
1860 } else {
1861 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1862 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1863 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1864 back_max = prog->float_max_offset;
1865 back_min = prog->float_min_offset;
1866 }
1867 if (must == &PL_sv_undef)
1868 /* could not downgrade utf8 check substring, so must fail */
1869 goto phooey;
1870
1871 last = HOP3c(strend, /* Cannot start after this */
1872 -(I32)(CHR_SVLEN(must)
1873 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1874
1875 if (s > PL_bostr)
1876 last1 = HOPc(s, -1);
1877 else
1878 last1 = s - 1; /* bogus */
1879
a0288114 1880 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1881 check_substr==must. */
1882 scream_pos = -1;
1883 dontbother = end_shift;
1884 strend = HOPc(strend, -dontbother);
1885 while ( (s <= last) &&
9041c2e3 1886 ((flags & REXEC_SCREAM)
1aa99e6b 1887 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1888 end_shift, &scream_pos, 0))
1aa99e6b 1889 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1890 (unsigned char*)strend, must,
7fba1cd6 1891 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1892 /* we may be pointing at the wrong string */
1893 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1894 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1895 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1896 if (HOPc(s, -back_max) > last1) {
1897 last1 = HOPc(s, -back_min);
1898 s = HOPc(s, -back_max);
1899 }
1900 else {
1901 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1902
1903 last1 = HOPc(s, -back_min);
1904 s = t;
1905 }
1aa99e6b 1906 if (do_utf8) {
6eb5f6b9
JH
1907 while (s <= last1) {
1908 if (regtry(prog, s))
1909 goto got_it;
1910 s += UTF8SKIP(s);
1911 }
1912 }
1913 else {
1914 while (s <= last1) {
1915 if (regtry(prog, s))
1916 goto got_it;
1917 s++;
1918 }
1919 }
1920 }
a3621e74 1921 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1922 PerlIO_printf(Perl_debug_log,
a0288114 1923 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1924 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1925 ? "anchored" : "floating"),
1926 PL_colors[0],
1927 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1928 SvPVX_const(must),
b7953727
JH
1929 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1930 );
6eb5f6b9
JH
1931 goto phooey;
1932 }
155aba94 1933 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1934 if (minlen) {
1935 I32 op = (U8)OP(prog->regstclass);
66e933ab 1936 /* don't bother with what can't match */
f14c76ed
RGS
1937 if (PL_regkind[op] != EXACT && op != CANY)
1938 strend = HOPc(strend, -(minlen - 1));
1939 }
a3621e74 1940 DEBUG_EXECUTE_r({
ffc61ed2 1941 SV *prop = sv_newmortal();
cfd0369c
NC
1942 const char *s0;
1943 const char *s1;
9e55ce06
JH
1944 int len0;
1945 int len1;
1946
ffc61ed2 1947 regprop(prop, c);
9e55ce06 1948 s0 = UTF ?
3f7c398e 1949 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1950 UNI_DISPLAY_REGEX) :
cfd0369c 1951 SvPVX_const(prop);
9e55ce06
JH
1952 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1953 s1 = UTF ?
c728cb41 1954 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1955 len1 = UTF ? SvCUR(dsv1) : strend - s;
1956 PerlIO_printf(Perl_debug_log,
a0288114 1957 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1958 len0, len0, s0,
1959 len1, len1, s1);
ffc61ed2 1960 });
06b5626a 1961 if (find_byclass(prog, c, s, strend, 0))
6eb5f6b9 1962 goto got_it;
a3621e74 1963 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1964 }
1965 else {
1966 dontbother = 0;
a0714e2c 1967 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1968 /* Trim the end. */
d6a28714 1969 char *last;
33b8afdf
JH
1970 SV* float_real;
1971
1972 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1973 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1974 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1975
1976 if (flags & REXEC_SCREAM) {
33b8afdf 1977 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1978 end_shift, &scream_pos, 1); /* last one */
1979 if (!last)
ffc61ed2 1980 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1981 /* we may be pointing at the wrong string */
1982 else if (RX_MATCH_COPIED(prog))
3f7c398e 1983 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1984 }
d6a28714
JH
1985 else {
1986 STRLEN len;
cfd0369c 1987 const char * const little = SvPV_const(float_real, len);
d6a28714 1988
33b8afdf 1989 if (SvTAIL(float_real)) {
d6a28714
JH
1990 if (memEQ(strend - len + 1, little, len - 1))
1991 last = strend - len + 1;
7fba1cd6 1992 else if (!multiline)
9041c2e3 1993 last = memEQ(strend - len, little, len)
bd61b366 1994 ? strend - len : NULL;
b8c5462f 1995 else
d6a28714
JH
1996 goto find_last;
1997 } else {
1998 find_last:
9041c2e3 1999 if (len)
d6a28714 2000 last = rninstr(s, strend, little, little + len);
b8c5462f 2001 else
a0288114 2002 last = strend; /* matching "$" */
b8c5462f 2003 }
b8c5462f 2004 }
bf93d4cc 2005 if (last == NULL) {
a3621e74 2006 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 2007 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 2008 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
2009 goto phooey; /* Should not happen! */
2010 }
d6a28714
JH
2011 dontbother = strend - last + prog->float_min_offset;
2012 }
2013 if (minlen && (dontbother < minlen))
2014 dontbother = minlen - 1;
2015 strend -= dontbother; /* this one's always in bytes! */
2016 /* We don't know much -- general case. */
1aa99e6b 2017 if (do_utf8) {
d6a28714
JH
2018 for (;;) {
2019 if (regtry(prog, s))
2020 goto got_it;
2021 if (s >= strend)
2022 break;
b8c5462f 2023 s += UTF8SKIP(s);
d6a28714
JH
2024 };
2025 }
2026 else {
2027 do {
2028 if (regtry(prog, s))
2029 goto got_it;
2030 } while (s++ < strend);
2031 }
2032 }
2033
2034 /* Failure. */
2035 goto phooey;
2036
2037got_it:
2038 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2039
2040 if (PL_reg_eval_set) {
2041 /* Preserve the current value of $^R */
2042 if (oreplsv != GvSV(PL_replgv))
2043 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2044 restored, the value remains
2045 the same. */
acfe0abc 2046 restore_pos(aTHX_ 0);
d6a28714
JH
2047 }
2048
2049 /* make sure $`, $&, $', and $digit will work later */
2050 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2051 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2052 if (flags & REXEC_COPY_STR) {
2053 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2054#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2055 if ((SvIsCOW(sv)
2056 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2057 if (DEBUG_C_TEST) {
2058 PerlIO_printf(Perl_debug_log,
2059 "Copy on write: regexp capture, type %d\n",
2060 (int) SvTYPE(sv));
2061 }
2062 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2063 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2064 assert (SvPOKp(prog->saved_copy));
2065 } else
2066#endif
2067 {
2068 RX_MATCH_COPIED_on(prog);
2069 s = savepvn(strbeg, i);
2070 prog->subbeg = s;
2071 }
d6a28714 2072 prog->sublen = i;
d6a28714
JH
2073 }
2074 else {
2075 prog->subbeg = strbeg;
2076 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2077 }
2078 }
9041c2e3 2079
d6a28714
JH
2080 return 1;
2081
2082phooey:
a3621e74 2083 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2084 PL_colors[4], PL_colors[5]));
d6a28714 2085 if (PL_reg_eval_set)
acfe0abc 2086 restore_pos(aTHX_ 0);
d6a28714
JH
2087 return 0;
2088}
2089
2090/*
2091 - regtry - try match at specific point
2092 */
2093STATIC I32 /* 0 failure, 1 success */
2094S_regtry(pTHX_ regexp *prog, char *startpos)
2095{
97aff369 2096 dVAR;
d6a28714
JH
2097 register I32 i;
2098 register I32 *sp;
2099 register I32 *ep;
2100 CHECKPOINT lastcp;
a3621e74 2101 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2102
02db2b7b
IZ
2103#ifdef DEBUGGING
2104 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2105#endif
d6a28714
JH
2106 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2107 MAGIC *mg;
2108
2109 PL_reg_eval_set = RS_init;
a3621e74 2110 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2111 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2112 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2113 ));
e8347627 2114 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2115 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2116 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2117 SAVETMPS;
2118 /* Apparently this is not needed, judging by wantarray. */
e8347627 2119 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2120 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2121
2122 if (PL_reg_sv) {
2123 /* Make $_ available to executed code. */
2124 if (PL_reg_sv != DEFSV) {
59f00321 2125 SAVE_DEFSV;
d6a28714 2126 DEFSV = PL_reg_sv;
b8c5462f 2127 }
d6a28714 2128
9041c2e3 2129 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2130 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2131 /* prepare for quick setting of pos */
d300d9fa
NC
2132#ifdef PERL_OLD_COPY_ON_WRITE
2133 if (SvIsCOW(sv))
2134 sv_force_normal_flags(sv, 0);
2135#endif
2136 mg = sv_magicext(PL_reg_sv, (SV*)0, PERL_MAGIC_regex_global,
2137 &PL_vtbl_mglob, NULL, 0);
d6a28714 2138 mg->mg_len = -1;
b8c5462f 2139 }
d6a28714
JH
2140 PL_reg_magic = mg;
2141 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2142 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2143 }
09687e5a 2144 if (!PL_reg_curpm) {
a02a5408 2145 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2146#ifdef USE_ITHREADS
2147 {
2148 SV* repointer = newSViv(0);
577e12cc 2149 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2150 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2151 av_push(PL_regex_padav,repointer);
2152 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2153 PL_regex_pad = AvARRAY(PL_regex_padav);
2154 }
2155#endif
2156 }
aaa362c4 2157 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2158 PL_reg_oldcurpm = PL_curpm;
2159 PL_curpm = PL_reg_curpm;
2160 if (RX_MATCH_COPIED(prog)) {
2161 /* Here is a serious problem: we cannot rewrite subbeg,
2162 since it may be needed if this match fails. Thus
2163 $` inside (?{}) could fail... */
2164 PL_reg_oldsaved = prog->subbeg;
2165 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2166#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2167 PL_nrs = prog->saved_copy;
2168#endif
d6a28714
JH
2169 RX_MATCH_COPIED_off(prog);
2170 }
2171 else
bd61b366 2172 PL_reg_oldsaved = NULL;
d6a28714
JH
2173 prog->subbeg = PL_bostr;
2174 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2175 }
973dddac 2176 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2177 PL_reginput = startpos;
2178 PL_regstartp = prog->startp;
2179 PL_regendp = prog->endp;
2180 PL_reglastparen = &prog->lastparen;
a01268b5 2181 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2182 prog->lastparen = 0;
03994de8 2183 prog->lastcloseparen = 0;
d6a28714 2184 PL_regsize = 0;
a3621e74 2185 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2186 if (PL_reg_start_tmpl <= prog->nparens) {
2187 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2188 if(PL_reg_start_tmp)
2189 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2190 else
a02a5408 2191 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2192 }
2193
2194 /* XXXX What this code is doing here?!!! There should be no need
2195 to do this again and again, PL_reglastparen should take care of
3dd2943c 2196 this! --ilya*/
dafc8851
JH
2197
2198 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2199 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2200 * PL_reglastparen), is not needed at all by the test suite
2201 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2202 * enough, for building DynaLoader, or otherwise this
2203 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2204 * will happen. Meanwhile, this code *is* needed for the
2205 * above-mentioned test suite tests to succeed. The common theme
2206 * on those tests seems to be returning null fields from matches.
2207 * --jhi */
dafc8851 2208#if 1
d6a28714
JH
2209 sp = prog->startp;
2210 ep = prog->endp;
2211 if (prog->nparens) {
eb160463 2212 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2213 *++sp = -1;
2214 *++ep = -1;
2215 }
2216 }
dafc8851 2217#endif
02db2b7b 2218 REGCP_SET(lastcp);
d6a28714
JH
2219 if (regmatch(prog->program + 1)) {
2220 prog->endp[0] = PL_reginput - PL_bostr;
2221 return 1;
2222 }
02db2b7b 2223 REGCP_UNWIND(lastcp);
d6a28714
JH
2224 return 0;
2225}
2226
02db2b7b
IZ
2227#define RE_UNWIND_BRANCH 1
2228#define RE_UNWIND_BRANCHJ 2
2229
2230union re_unwind_t;
2231
2232typedef struct { /* XX: makes sense to enlarge it... */
2233 I32 type;
2234 I32 prev;
2235 CHECKPOINT lastcp;
2236} re_unwind_generic_t;
2237
2238typedef struct {
2239 I32 type;
2240 I32 prev;
2241 CHECKPOINT lastcp;
2242 I32 lastparen;
2243 regnode *next;
2244 char *locinput;
2245 I32 nextchr;
2246#ifdef DEBUGGING
2247 int regindent;
2248#endif
2249} re_unwind_branch_t;
2250
2251typedef union re_unwind_t {
2252 I32 type;
2253 re_unwind_generic_t generic;
2254 re_unwind_branch_t branch;
2255} re_unwind_t;
2256
8ba1375e
MJD
2257#define sayYES goto yes
2258#define sayNO goto no
e0f9d4a8 2259#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2260#define sayYES_FINAL goto yes_final
2261#define sayYES_LOUD goto yes_loud
2262#define sayNO_FINAL goto no_final
2263#define sayNO_SILENT goto do_no
2264#define saySAME(x) if (x) goto yes; else goto no
2265
3ab3c9b4
HS
2266#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2267#define POSCACHE_SEEN 1 /* we know what we're caching */
2268#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
2269#define CACHEsayYES STMT_START { \
2270 if (cache_offset | cache_bit) { \
2271 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2272 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) || (1<<POSCACHE_SEEN); \
2273 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2274 /* cache records failure, but this is success */ \
2275 DEBUG_r( \
2276 PerlIO_printf(Perl_debug_log, \
2277 "%*s (remove success from failure cache)\n", \
2278 REPORT_CODE_OFF+PL_regindent*2, "") \
2279 ); \
2280 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2281 } \
2282 } \
2283 sayYES; \
2284} STMT_END
2285#define CACHEsayNO STMT_START { \
2286 if (cache_offset | cache_bit) { \
2287 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) \
2288 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
2289 else if ((PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2290 /* cache records success, but this is failure */ \
2291 DEBUG_r( \
2292 PerlIO_printf(Perl_debug_log, \
2293 "%*s (remove failure from success cache)\n", \
2294 REPORT_CODE_OFF+PL_regindent*2, "") \
2295 ); \
2296 PL_reg_poscache[cache_offset] &= ~(1<<cache_bit); \
2297 } \
2298 } \
2299 sayNO; \
2300} STMT_END
2301
a3621e74
YO
2302/* this is used to determine how far from the left messages like
2303 'failed...' are printed. Currently 29 makes these messages line
2304 up with the opcode they refer to. Earlier perls used 25 which
2305 left these messages outdented making reviewing a debug output
2306 quite difficult.
2307*/
2308#define REPORT_CODE_OFF 29
2309
2310
2311/* Make sure there is a test for this +1 options in re_tests */
2312#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2313
5b47454d 2314
95b24440
DM
2315/* simulate a recursive call to regmatch */
2316
2317#define REGMATCH(ns, where) \
2318 new_scan = (ns); \
2319 resume_state = resume_##where; \
2320 goto start_recurse; \
2321 resume_point_##where:
2322
2323typedef enum {
2324 resume_TRIE1,
2325 resume_TRIE2,
2326 resume_CURLYX,
2327 resume_WHILEM1,
2328 resume_WHILEM2,
2329 resume_WHILEM3,
2330 resume_WHILEM4,
2331 resume_WHILEM5,
2332 resume_WHILEM6,
2333 resume_CURLYM1,
2334 resume_CURLYM2,
2335 resume_CURLYM3,
2336 resume_CURLYM4,
2337 resume_IFMATCH,
2338 resume_PLUS1,
2339 resume_PLUS2,
2340 resume_PLUS3,
2341 resume_PLUS4,
2342 resume_END
2343} resume_states;
2344
2345
2346struct regmatch_state {
2347 struct regmatch_state *prev_state;
2348 resume_states resume_state;
2349 regnode *scan;
2350 regnode *next;
2351 bool minmod;
2352 bool sw;
2353 int logical;
2354 I32 unwind;
2355 CURCUR *cc;
2356 char *locinput;
2357
2358 I32 n;
2359 I32 ln;
2360 I32 c1, c2, paren;
2361 CHECKPOINT cp, lastcp;
2362 CURCUR *oldcc;
2363 char *lastloc;
2364 I32 cache_offset, cache_bit;
2365 I32 curlym_l;
2366 I32 matches;
2367 I32 maxwanted;
2368 char *e;
2369 char *old;
2370 int count;
2371 re_cc_state *cur_call_cc;
2372 regexp *end_re;
2373 reg_trie_accepted *accept_buff;
2374 U32 accepted;
2375
2376 re_cc_state *reg_call_cc; /* saved value of PL_reg_call_cc */
2377};
8ba1375e 2378
d6a28714
JH
2379/*
2380 - regmatch - main matching routine
2381 *
2382 * Conceptually the strategy is simple: check to see whether the current
2383 * node matches, call self recursively to see whether the rest matches,
2384 * and then act accordingly. In practice we make some effort to avoid
2385 * recursion, in particular by going through "ordinary" nodes (that don't
2386 * need to know whether the rest of the match failed) by a loop instead of
2387 * by recursion.
2388 */
2389/* [lwall] I've hoisted the register declarations to the outer block in order to
2390 * maybe save a little bit of pushing and popping on the stack. It also takes
2391 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2392 *
2393 * This function used to be heavily recursive, but since this had the
2394 * effect of blowing the CPU stack on complex regexes, it has been
2395 * restructured to be iterative, and to save state onto the heap rather
2396 * than the stack. Essentially whereever regmatch() used to be called, it
2397 * pushes the current state, notes where to return, then jumps back into
2398 * the main loop.
2399 *
2400 * Originally the structure of this function used to look something like
2401
2402 S_regmatch() {
2403 int a = 1, b = 2;
2404 ...
2405 while (scan != NULL) {
2406 ...
2407 switch (OP(scan)) {
2408 case FOO: {
2409 int local = 3;
2410 ...
2411 if (regmatch(...)) // recurse
2412 goto yes;
2413 }
2414 ...
2415 }
2416 }
2417 yes:
2418 return 1;
2419 }
2420
2421 * Now it looks something like this:
2422
2423 struct regmatch_state {
2424 int a, b, local;
2425 int resume_state;
2426 };
2427
2428 S_regmatch() {
2429 int a = 1, b = 2;
2430 int local;
2431 ...
2432 while (scan != NULL) {
2433 ...
2434 switch (OP(scan)) {
2435 case FOO: {
2436 local = 3;
2437 ...
2438 resume_state = resume_FOO;
2439 new_scan = ...;
2440 goto start_recurse;
2441 resume_point_FOO:
2442
2443 if (result) // recurse
2444 goto yes;
2445 }
2446 ...
2447 }
2448 start_recurse:
2449 ...push a, b, local onto the heap
2450 a = 1; b = 2;
2451 scan = new_scan;
2452 }
2453 yes:
2454 result = 1;
2455 if (states pushed on heap) {
2456 ... restore a, b, local from heap
2457 switch (resume_state) {
2458 case resume_FOO:
2459 goto resume_point_FOO;
2460 ...
2461 }
2462 }
2463 return result
2464 }
2465
2466 * WARNING: this means that any line in this function that contains a
2467 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2468 * regmatch() using gotos instead. Thus the values of any local variables
2469 * not saved in the regmatch_state structure will have been lost when
2470 * execution resumes on the next line .
d6a28714 2471 */
95b24440
DM
2472
2473
d6a28714
JH
2474STATIC I32 /* 0 failure, 1 success */
2475S_regmatch(pTHX_ regnode *prog)
2476{
27da23d5 2477 dVAR;
95b24440
DM
2478 register const bool do_utf8 = PL_reg_match_utf8;
2479 const U32 uniflags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
2480
2481 /************************************************************
2482 * the following variabes are saved and restored on each fake
2483 * recursive call to regmatch:
2484 *
2485 * The first ones contain state that needs to be maintained
2486 * across the main while loop: */
2487
2488 struct regmatch_state *prev_state = NULL; /* stack of pushed states */
2489 resume_states resume_state; /* where to jump to on return */
d6a28714
JH
2490 register regnode *scan; /* Current node. */
2491 regnode *next; /* Next node. */
95b24440
DM
2492 bool minmod = 0; /* the next {n.m} is a {n,m}? */
2493 bool sw = 0; /* the condition value in (?(cond)a|b) */
2494 int logical = 0;
2495 I32 unwind = 0; /* savestack index of current unwind block */
2496 CURCUR *cc = NULL; /* current innermost curly struct */
d6a28714 2497 register char *locinput = PL_reginput;
a3621e74 2498
95b24440
DM
2499 /* while the rest of these are local to an individual branch, and
2500 * have only been hoisted into this outer scope to allow for saving and
2501 * restoration - thus they can be safely reused in other branches.
2502 * Note that they are only initialized here to silence compiler
2503 * warnings :-( */
a3621e74 2504
95b24440
DM
2505 register I32 n = 0; /* no or next */
2506 register I32 ln = 0; /* len or last */
2507 register I32 c1 = 0, c2 = 0, paren = 0; /* case fold search, parenth */
2508 CHECKPOINT cp = 0; /* remember current savestack indexes */
2509 CHECKPOINT lastcp = 0;
2510 CURCUR *oldcc = NULL; /* tmp copy of cc */
2511 char *lastloc = NULL; /* Detection of 0-len. */
2512 I32 cache_offset = 0;
2513 I32 cache_bit = 0;
2514 I32 curlym_l = 0;
2515 I32 matches = 0;
2516 I32 maxwanted = 0;
2517 char *e = NULL;
2518 char *old = NULL;
2519 int count = 0;
2520 re_cc_state *cur_call_cc = NULL;
2521 regexp *end_re = NULL;
2522 reg_trie_accepted *accept_buff = NULL;
2523 U32 accepted = 0; /* how many accepting states we have seen */
2524
2525 /************************************************************
2526 * these variables are NOT saved: */
2527
2528 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2529 regnode *new_scan; /* node to begin exedcution at when recursing */
2530 bool result; /* return value of S_regmatch */
2531
2532 regnode *inner; /* Next node in internal branch. */
2533
2534#ifdef DEBUGGING
ab74612d 2535 SV *re_debug_flags = NULL;
a3621e74 2536 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2537 PL_regindent++;
2538#endif
2539
2540 /* Note that nextchr is a byte even in UTF */
2541 nextchr = UCHARAT(locinput);
2542 scan = prog;
2543 while (scan != NULL) {
8ba1375e 2544
a3621e74 2545 DEBUG_EXECUTE_r( {
6136c704 2546 SV * const prop = sv_newmortal();
1df70142
AL
2547 const int docolor = *PL_colors[0];
2548 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2549 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2550 /* The part of the string before starttry has one color
2551 (pref0_len chars), between starttry and current
2552 position another one (pref_len - pref0_len chars),
2553 after the current position the third one.
2554 We assume that pref0_len <= pref_len, otherwise we
2555 decrease pref0_len. */
9041c2e3 2556 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2557 ? (5 + taill) - l : locinput - PL_bostr;
2558 int pref0_len;
d6a28714 2559
df1ffd02 2560 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2561 pref_len++;
2562 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2563 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2564 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2565 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2566 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2567 l--;
d6a28714
JH
2568 if (pref0_len < 0)
2569 pref0_len = 0;
2570 if (pref0_len > pref_len)
2571 pref0_len = pref_len;
2572 regprop(prop, scan);
2a782b5b 2573 {
1df70142 2574 const char * const s0 =
f14c76ed 2575 do_utf8 && OP(scan) != CANY ?
95b24440 2576 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2577 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2578 locinput - pref_len;
1df70142
AL
2579 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2580 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2581 pv_uni_display(PERL_DEBUG_PAD(1),
2582 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2583 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2584 locinput - pref_len + pref0_len;
1df70142
AL
2585 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2586 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2587 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2588 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2589 locinput;
1df70142 2590 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2591 PerlIO_printf(Perl_debug_log,
2592 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2593 (IV)(locinput - PL_bostr),
2594 PL_colors[4],
2595 len0, s0,
2596 PL_colors[5],
2597 PL_colors[2],
2598 len1, s1,
2599 PL_colors[3],
2600 (docolor ? "" : "> <"),
2601 PL_colors[0],
2602 len2, s2,
2603 PL_colors[1],
2604 15 - l - pref_len + 1,
2605 "",
2606 (IV)(scan - PL_regprogram), PL_regindent*2, "",
3f7c398e 2607 SvPVX_const(prop));
2a782b5b
JH
2608 }
2609 });
d6a28714
JH
2610
2611 next = scan + NEXT_OFF(scan);
2612 if (next == scan)
2613 next = NULL;
2614
2615 switch (OP(scan)) {
2616 case BOL:
7fba1cd6 2617 if (locinput == PL_bostr)
d6a28714
JH
2618 {
2619 /* regtill = regbol; */
b8c5462f
JH
2620 break;
2621 }
d6a28714
JH
2622 sayNO;
2623 case MBOL:
12d33761
HS
2624 if (locinput == PL_bostr ||
2625 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2626 {
b8c5462f
JH
2627 break;
2628 }
d6a28714
JH
2629 sayNO;
2630 case SBOL:
c2a73568 2631 if (locinput == PL_bostr)
b8c5462f 2632 break;
d6a28714
JH
2633 sayNO;
2634 case GPOS:
2635 if (locinput == PL_reg_ganch)
2636 break;
2637 sayNO;
2638 case EOL:
d6a28714
JH
2639 goto seol;
2640 case MEOL:
d6a28714 2641 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2642 sayNO;
b8c5462f 2643 break;
d6a28714
JH
2644 case SEOL:
2645 seol:
2646 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2647 sayNO;
d6a28714 2648 if (PL_regeol - locinput > 1)
b8c5462f 2649 sayNO;
b8c5462f 2650 break;
d6a28714
JH
2651 case EOS:
2652 if (PL_regeol != locinput)
b8c5462f 2653 sayNO;
d6a28714 2654 break;
ffc61ed2 2655 case SANY:
d6a28714 2656 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2657 sayNO;
f33976b4
DB
2658 if (do_utf8) {
2659 locinput += PL_utf8skip[nextchr];
2660 if (locinput > PL_regeol)
2661 sayNO;
2662 nextchr = UCHARAT(locinput);
2663 }
2664 else
2665 nextchr = UCHARAT(++locinput);
2666 break;
2667 case CANY:
2668 if (!nextchr && locinput >= PL_regeol)
2669 sayNO;
b8c5462f 2670 nextchr = UCHARAT(++locinput);
a0d0e21e 2671 break;
ffc61ed2 2672 case REG_ANY:
1aa99e6b
IH
2673 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2674 sayNO;
2675 if (do_utf8) {
b8c5462f 2676 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2677 if (locinput > PL_regeol)
2678 sayNO;
a0ed51b3 2679 nextchr = UCHARAT(locinput);
a0ed51b3 2680 }
1aa99e6b
IH
2681 else
2682 nextchr = UCHARAT(++locinput);
a0ed51b3 2683 break;
a3621e74
YO
2684
2685
2686
2687 /*
2688 traverse the TRIE keeping track of all accepting states
2689 we transition through until we get to a failing node.
2690
a3621e74
YO
2691
2692 */
5b47454d 2693 case TRIE:
a3621e74
YO
2694 case TRIEF:
2695 case TRIEFL:
2696 {
a3621e74
YO
2697 U8 *uc = ( U8* )locinput;
2698 U32 state = 1;
2699 U16 charid = 0;
2700 U32 base = 0;
2701 UV uvc = 0;
2702 STRLEN len = 0;
2703 STRLEN foldlen = 0;
a3621e74
YO
2704 U8 *uscan = (U8*)NULL;
2705 STRLEN bufflen=0;
95b24440 2706 SV *sv_accept_buff = NULL;
5b47454d
DM
2707 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2708 trie_type = do_utf8 ?
2709 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2710 : trie_plain;
2711
7087a21c
NC
2712 /* what trie are we using right now */
2713 reg_trie_data *trie
2714 = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
95b24440
DM
2715 accepted = 0; /* how many accepting states we have seen */
2716 result = 0;
a3621e74
YO
2717
2718 while ( state && uc <= (U8*)PL_regeol ) {
2719
5b47454d
DM
2720 if (trie->states[ state ].wordnum) {
2721 if (!accepted ) {
2722 ENTER;
2723 SAVETMPS;
2724 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2725 sv_accept_buff=newSV(bufflen *
2726 sizeof(reg_trie_accepted) - 1);
2727 SvCUR_set(sv_accept_buff,
2728 sizeof(reg_trie_accepted));
2729 SvPOK_on(sv_accept_buff);
2730 sv_2mortal(sv_accept_buff);
2731 accept_buff =
2732 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2733 }
2734 else {
2735 if (accepted >= bufflen) {
2736 bufflen *= 2;
2737 accept_buff =(reg_trie_accepted*)
2738 SvGROW(sv_accept_buff,
2739 bufflen * sizeof(reg_trie_accepted));
2740 }
2741 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2742 + sizeof(reg_trie_accepted));
2743 }
2744 accept_buff[accepted].wordnum = trie->states[state].wordnum;
2745 accept_buff[accepted].endpos = uc;
2746 ++accepted;
2747 }
a3621e74
YO
2748
2749 base = trie->states[ state ].trans.base;
2750
2751 DEBUG_TRIE_EXECUTE_r(
2752 PerlIO_printf( Perl_debug_log,
e4584336 2753 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2754 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
e4584336 2755 (UV)state, (UV)base, (UV)accepted );
a3621e74
YO
2756 );
2757
2758 if ( base ) {
5b47454d
DM
2759 switch (trie_type) {
2760 case trie_uft8_fold:
a3621e74
YO
2761 if ( foldlen>0 ) {
2762 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2763 foldlen -= len;
2764 uscan += len;
2765 len=0;
2766 } else {
1df70142 2767 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2768 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2769 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2770 foldlen -= UNISKIP( uvc );
2771 uscan = foldbuf + UNISKIP( uvc );
2772 }
5b47454d
DM
2773 break;
2774 case trie_utf8:
2775 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2776 &len, uniflags );
2777 break;
2778 case trie_plain:
e4584336 2779 uvc = (UV)*uc;
a3621e74
YO
2780 len = 1;
2781 }
2782
5b47454d
DM
2783 if (uvc < 256) {
2784 charid = trie->charmap[ uvc ];
2785 }
2786 else {
2787 charid = 0;
2788 if (trie->widecharmap) {
2789 SV** svpp = (SV**)NULL;
2790 svpp = hv_fetch(trie->widecharmap,
2791 (char*)&uvc, sizeof(UV), 0);
2792 if (svpp)
2793 charid = (U16)SvIV(*svpp);
2794 }
2795 }
a3621e74 2796
5b47454d
DM
2797 if (charid &&
2798 (base + charid > trie->uniquecharcount )
2799 && (base + charid - 1 - trie->uniquecharcount
2800 < trie->lasttrans)
2801 && trie->trans[base + charid - 1 -
2802 trie->uniquecharcount].check == state)
2803 {
2804 state = trie->trans[base + charid - 1 -
2805 trie->uniquecharcount ].next;
2806 }
2807 else {
2808 state = 0;
2809 }
2810 uc += len;
2811
2812 }
2813 else {
a3621e74
YO
2814 state = 0;
2815 }
2816 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2817 PerlIO_printf( Perl_debug_log,
2818 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2819 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2820 );
2821 }
5b47454d 2822 if (!accepted )
a3621e74 2823 sayNO;
a3621e74
YO
2824
2825 /*
2826 There was at least one accepting state that we
2827 transitioned through. Presumably the number of accepting
2828 states is going to be low, typically one or two. So we
2829 simply scan through to find the one with lowest wordnum.
2830 Once we find it, we swap the last state into its place
2831 and decrement the size. We then try to match the rest of
2832 the pattern at the point where the word ends, if we
2833 succeed then we end the loop, otherwise the loop
2834 eventually terminates once all of the accepting states
2835 have been tried.
2836 */
a3621e74
YO
2837
2838 if ( accepted == 1 ) {
2839 DEBUG_EXECUTE_r({
2840 SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2841 PerlIO_printf( Perl_debug_log,
2842 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2843 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
a3621e74 2844 accept_buff[ 0 ].wordnum,
cfd0369c 2845 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2846 PL_colors[5] );
2847 });
cc601c31 2848 PL_reginput = (char *)accept_buff[ 0 ].endpos;
a3621e74
YO
2849 /* in this case we free tmps/leave before we call regmatch
2850 as we wont be using accept_buff again. */
2851 FREETMPS;
2852 LEAVE;
95b24440
DM
2853 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2854 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2855 } else {
2856 DEBUG_EXECUTE_r(
e4584336
RB
2857 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
2858 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)accepted,
a3621e74
YO
2859 PL_colors[5] );
2860 );
95b24440 2861 while ( !result && accepted-- ) {
a3621e74
YO
2862 U32 best = 0;
2863 U32 cur;
2864 for( cur = 1 ; cur <= accepted ; cur++ ) {
e4584336
RB
2865 DEBUG_TRIE_EXECUTE_r(
2866 PerlIO_printf( Perl_debug_log,
2867 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2868 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2869 (IV)best, accept_buff[ best ].wordnum, (IV)cur,
2870 accept_buff[ cur ].wordnum, PL_colors[5] );
2871 );
a3621e74
YO
2872
2873 if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2874 best = cur;
2875 }
2876 DEBUG_EXECUTE_r({
6136c704 2877 SV ** const tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2878 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2879 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
a3621e74 2880 accept_buff[best].wordnum,
cfd0369c 2881 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2882 PL_colors[5] );
2883 });
2884 if ( best<accepted ) {
2885 reg_trie_accepted tmp = accept_buff[ best ];
2886 accept_buff[ best ] = accept_buff[ accepted ];
2887 accept_buff[ accepted ] = tmp;
2888 best = accepted;
2889 }
cc601c31 2890 PL_reginput = (char *)accept_buff[ best ].endpos;
a3621e74
YO
2891
2892 /*
2893 as far as I can tell we only need the SAVETMPS/FREETMPS
2894 for re's with EVAL in them but I'm leaving them in for
2895 all until I can be sure.
2896 */
2897 SAVETMPS;
95b24440
DM
2898 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2899 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2900 FREETMPS;
2901 }
2902 FREETMPS;
2903 LEAVE;
2904 }
2905
95b24440 2906 if (result) {
a3621e74
YO
2907 sayYES;
2908 } else {
2909 sayNO;
2910 }
2911 }
2912 /* unreached codepoint */
95b24440
DM
2913 case EXACT: {
2914 char *s = STRING(scan);
cd439c50 2915 ln = STR_LEN(scan);
eb160463 2916 if (do_utf8 != UTF) {
bc517b45 2917 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2918 char *l = locinput;
a3b680e6 2919 const char *e = s + ln;
a72c7584 2920
5ff6fc6d
JH
2921 if (do_utf8) {
2922 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2923 while (s < e) {
a3b680e6 2924 STRLEN ulen;
1aa99e6b 2925 if (l >= PL_regeol)
5ff6fc6d
JH
2926 sayNO;
2927 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2928 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2929 uniflags))
5ff6fc6d 2930 sayNO;
bc517b45 2931 l += ulen;
5ff6fc6d 2932 s ++;
1aa99e6b 2933 }
5ff6fc6d
JH
2934 }
2935 else {
2936 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2937 while (s < e) {
a3b680e6 2938 STRLEN ulen;
1aa99e6b
IH
2939 if (l >= PL_regeol)
2940 sayNO;
5ff6fc6d 2941 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2942 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2943 uniflags))
1aa99e6b 2944 sayNO;
bc517b45 2945 s += ulen;
a72c7584 2946 l ++;
1aa99e6b 2947 }
5ff6fc6d 2948 }
1aa99e6b
IH
2949 locinput = l;
2950 nextchr = UCHARAT(locinput);
2951 break;
2952 }
bc517b45 2953 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2954 /* Inline the first character, for speed. */
2955 if (UCHARAT(s) != nextchr)
2956 sayNO;
2957 if (PL_regeol - locinput < ln)
2958 sayNO;
2959 if (ln > 1 && memNE(s, locinput, ln))
2960 sayNO;
2961 locinput += ln;
2962 nextchr = UCHARAT(locinput);
2963 break;
95b24440 2964 }
d6a28714 2965 case EXACTFL:
b8c5462f
JH
2966 PL_reg_flags |= RF_tainted;
2967 /* FALL THROUGH */
95b24440
DM
2968 case EXACTF: {
2969 char *s = STRING(scan);
cd439c50 2970 ln = STR_LEN(scan);
d6a28714 2971
d07ddd77
JH
2972 if (do_utf8 || UTF) {
2973 /* Either target or the pattern are utf8. */
d6a28714 2974 char *l = locinput;
d07ddd77 2975 char *e = PL_regeol;
bc517b45 2976
eb160463 2977 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 2978 l, &e, 0, do_utf8)) {
5486206c
JH
2979 /* One more case for the sharp s:
2980 * pack("U0U*", 0xDF) =~ /ss/i,
2981 * the 0xC3 0x9F are the UTF-8
2982 * byte sequence for the U+00DF. */
2983 if (!(do_utf8 &&
2984 toLOWER(s[0]) == 's' &&
2985 ln >= 2 &&
2986 toLOWER(s[1]) == 's' &&
2987 (U8)l[0] == 0xC3 &&
2988 e - l >= 2 &&
2989 (U8)l[1] == 0x9F))
2990 sayNO;
2991 }
d07ddd77
JH
2992 locinput = e;
2993 nextchr = UCHARAT(locinput);
2994 break;
a0ed51b3 2995 }
d6a28714 2996
bc517b45
JH
2997 /* Neither the target and the pattern are utf8. */
2998
d6a28714
JH
2999 /* Inline the first character, for speed. */
3000 if (UCHARAT(s) != nextchr &&
3001 UCHARAT(s) != ((OP(scan) == EXACTF)
3002 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 3003 sayNO;
d6a28714 3004 if (PL_regeol - locinput < ln)
b8c5462f 3005 sayNO;
d6a28714
JH
3006 if (ln > 1 && (OP(scan) == EXACTF
3007 ? ibcmp(s, locinput, ln)
3008 : ibcmp_locale(s, locinput, ln)))
4633a7c4 3009 sayNO;
d6a28714
JH
3010 locinput += ln;
3011 nextchr = UCHARAT(locinput);
a0d0e21e 3012 break;
95b24440 3013 }
d6a28714 3014 case ANYOF:
ffc61ed2 3015 if (do_utf8) {
9e55ce06
JH
3016 STRLEN inclasslen = PL_regeol - locinput;
3017
ba7b4546 3018 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 3019 sayNO_ANYOF;
ffc61ed2
JH
3020 if (locinput >= PL_regeol)
3021 sayNO;
0f0076b4 3022 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3023 nextchr = UCHARAT(locinput);
e0f9d4a8 3024 break;
ffc61ed2
JH
3025 }
3026 else {
3027 if (nextchr < 0)
3028 nextchr = UCHARAT(locinput);
7d3e948e 3029 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 3030 sayNO_ANYOF;
ffc61ed2
JH
3031 if (!nextchr && locinput >= PL_regeol)
3032 sayNO;
3033 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3034 break;
3035 }
3036 no_anyof:
3037 /* If we might have the case of the German sharp s
3038 * in a casefolding Unicode character class. */
3039
ebc501f0
JH
3040 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3041 locinput += SHARP_S_SKIP;
e0f9d4a8 3042 nextchr = UCHARAT(locinput);
ffc61ed2 3043 }
e0f9d4a8
JH
3044 else
3045 sayNO;
b8c5462f 3046 break;
d6a28714 3047 case ALNUML:
b8c5462f
JH
3048 PL_reg_flags |= RF_tainted;
3049 /* FALL THROUGH */
d6a28714 3050 case ALNUM:
b8c5462f 3051 if (!nextchr)
4633a7c4 3052 sayNO;
ffc61ed2 3053 if (do_utf8) {
1a4fad37 3054 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3055 if (!(OP(scan) == ALNUM
3568d838 3056 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3057 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3058 {
3059 sayNO;
a0ed51b3 3060 }
b8c5462f 3061 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3062 nextchr = UCHARAT(locinput);
3063 break;
3064 }
ffc61ed2 3065 if (!(OP(scan) == ALNUM
d6a28714 3066 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3067 sayNO;
b8c5462f 3068 nextchr = UCHARAT(++locinput);
a0d0e21e 3069 break;
d6a28714 3070 case NALNUML:
b8c5462f
JH
3071 PL_reg_flags |= RF_tainted;
3072 /* FALL THROUGH */
d6a28714
JH
3073 case NALNUM:
3074 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3075 sayNO;
ffc61ed2 3076 if (do_utf8) {
1a4fad37 3077 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3078 if (OP(scan) == NALNUM
3568d838 3079 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3080 : isALNUM_LC_utf8((U8*)locinput))
3081 {
b8c5462f 3082 sayNO;
d6a28714 3083 }
b8c5462f
JH
3084 locinput += PL_utf8skip[nextchr];
3085 nextchr = UCHARAT(locinput);
3086 break;
3087 }
ffc61ed2 3088 if (OP(scan) == NALNUM
d6a28714 3089 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3090 sayNO;
76e3520e 3091 nextchr = UCHARAT(++locinput);
a0d0e21e 3092 break;
d6a28714
JH
3093 case BOUNDL:
3094 case NBOUNDL:
3280af22 3095 PL_reg_flags |= RF_tainted;
bbce6d69 3096 /* FALL THROUGH */
d6a28714
JH
3097 case BOUND:
3098 case NBOUND:
3099 /* was last char in word? */
ffc61ed2 3100 if (do_utf8) {
12d33761
HS
3101 if (locinput == PL_bostr)
3102 ln = '\n';
ffc61ed2 3103 else {
a3b680e6 3104 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3105
b4f7163a 3106 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
3107 }
3108 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3109 ln = isALNUM_uni(ln);
1a4fad37 3110 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3111 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3112 }
3113 else {
9041c2e3 3114 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
3115 n = isALNUM_LC_utf8((U8*)locinput);
3116 }
a0ed51b3 3117 }
d6a28714 3118 else {
12d33761
HS
3119 ln = (locinput != PL_bostr) ?
3120 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
3121 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3122 ln = isALNUM(ln);
3123 n = isALNUM(nextchr);
3124 }
3125 else {
3126 ln = isALNUM_LC(ln);
3127 n = isALNUM_LC(nextchr);
3128 }
d6a28714 3129 }
ffc61ed2
JH
3130 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3131 OP(scan) == BOUNDL))
3132 sayNO;
a0ed51b3 3133 break;
d6a28714 3134 case SPACEL:
3280af22 3135 PL_reg_flags |= RF_tainted;
bbce6d69 3136 /* FALL THROUGH */
d6a28714 3137 case SPACE:
9442cb0e 3138 if (!nextchr)
4633a7c4 3139 sayNO;
1aa99e6b 3140 if (do_utf8) {
fd400ab9 3141 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3142 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3143 if (!(OP(scan) == SPACE
3568d838 3144 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3145 : isSPACE_LC_utf8((U8*)locinput)))
3146 {
3147 sayNO;
3148 }
3149 locinput += PL_utf8skip[nextchr];
3150 nextchr = UCHARAT(locinput);
3151 break;
d6a28714 3152 }
ffc61ed2
JH
3153 if (!(OP(scan) == SPACE
3154 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3155 sayNO;
3156 nextchr = UCHARAT(++locinput);
3157 }
3158 else {
3159 if (!(OP(scan) == SPACE
3160 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3161 sayNO;
3162 nextchr = UCHARAT(++locinput);
a0ed51b3 3163 }
a0ed51b3 3164 break;
d6a28714 3165 case NSPACEL:
3280af22 3166 PL_reg_flags |= RF_tainted;
bbce6d69 3167 /* FALL THROUGH */
d6a28714 3168 case NSPACE:
9442cb0e 3169 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3170 sayNO;
1aa99e6b 3171 if (do_utf8) {
1a4fad37 3172 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3173 if (OP(scan) == NSPACE
3568d838 3174 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3175 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3176 {
3177 sayNO;
3178 }
3179 locinput += PL_utf8skip[nextchr];
3180 nextchr = UCHARAT(locinput);
3181 break;
a0ed51b3 3182 }
ffc61ed2 3183 if (OP(scan) == NSPACE
d6a28714 3184 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3185 sayNO;
76e3520e 3186 nextchr = UCHARAT(++locinput);
a0d0e21e 3187 break;
d6a28714 3188 case DIGITL:
a0ed51b3
LW
3189 PL_reg_flags |= RF_tainted;
3190 /* FALL THROUGH */
d6a28714 3191 case DIGIT:
9442cb0e 3192 if (!nextchr)
a0ed51b3 3193 sayNO;
1aa99e6b 3194 if (do_utf8) {
1a4fad37 3195 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3196 if (!(OP(scan) == DIGIT
3568d838 3197 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3198 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3199 {
a0ed51b3 3200 sayNO;
dfe13c55 3201 }
6f06b55f 3202 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3203 nextchr = UCHARAT(locinput);
3204 break;
3205 }
ffc61ed2 3206 if (!(OP(scan) == DIGIT
9442cb0e 3207 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3208 sayNO;
3209 nextchr = UCHARAT(++locinput);
3210 break;
d6a28714 3211 case NDIGITL:
b8c5462f
JH
3212 PL_reg_flags |= RF_tainted;
3213 /* FALL THROUGH */
d6a28714 3214 case NDIGIT:
9442cb0e 3215 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3216 sayNO;
1aa99e6b 3217 if (do_utf8) {
1a4fad37 3218 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3219 if (OP(scan) == NDIGIT
3568d838 3220 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3221 : isDIGIT_LC_utf8((U8*)locinput))
3222 {
a0ed51b3 3223 sayNO;
9442cb0e 3224 }
6f06b55f 3225 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3226 nextchr = UCHARAT(locinput);
3227 break;
3228 }
ffc61ed2 3229 if (OP(scan) == NDIGIT
9442cb0e 3230 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3231 sayNO;
3232 nextchr = UCHARAT(++locinput);
3233 break;
3234 case CLUMP:
b7c83a7e 3235 if (locinput >= PL_regeol)
a0ed51b3 3236 sayNO;
b7c83a7e 3237 if (do_utf8) {
1a4fad37 3238 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3239 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3240 sayNO;
3241 locinput += PL_utf8skip[nextchr];
3242 while (locinput < PL_regeol &&
3243 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3244 locinput += UTF8SKIP(locinput);
3245 if (locinput > PL_regeol)
3246 sayNO;
eb08e2da
JH
3247 }
3248 else
3249 locinput++;
a0ed51b3
LW
3250 nextchr = UCHARAT(locinput);
3251 break;
c8756f30 3252 case REFFL:
3280af22 3253 PL_reg_flags |= RF_tainted;
c8756f30 3254 /* FALL THROUGH */
c277df42 3255 case REF:
95b24440
DM
3256 case REFF: {
3257 char *s;
c277df42 3258 n = ARG(scan); /* which paren pair */
cf93c79d 3259 ln = PL_regstartp[n];
2c2d71f5 3260 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
eb160463 3261 if ((I32)*PL_reglastparen < n || ln == -1)
af3f8c16 3262 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 3263 if (ln == PL_regendp[n])
a0d0e21e 3264 break;
a0ed51b3 3265
cf93c79d 3266 s = PL_bostr + ln;
1aa99e6b 3267 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3268 char *l = locinput;
a3b680e6 3269 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3270 /*
3271 * Note that we can't do the "other character" lookup trick as
3272 * in the 8-bit case (no pun intended) because in Unicode we
3273 * have to map both upper and title case to lower case.
3274 */
3275 if (OP(scan) == REFF) {
3276 while (s < e) {
a3b680e6
AL
3277 STRLEN ulen1, ulen2;
3278 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3279 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3280
a0ed51b3
LW
3281 if (l >= PL_regeol)
3282 sayNO;
a2a2844f
JH
3283 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3284 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3285 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3286 sayNO;
a2a2844f
JH
3287 s += ulen1;
3288 l += ulen2;
a0ed51b3
LW
3289 }
3290 }
3291 locinput = l;
3292 nextchr = UCHARAT(locinput);
3293 break;
3294 }
3295
a0d0e21e 3296 /* Inline the first character, for speed. */
76e3520e 3297 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3298 (OP(scan) == REF ||
3299 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3300 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3301 sayNO;
cf93c79d 3302 ln = PL_regendp[n] - ln;
3280af22 3303 if (locinput + ln > PL_regeol)
4633a7c4 3304 sayNO;
c8756f30
AK
3305 if (ln > 1 && (OP(scan) == REF
3306 ? memNE(s, locinput, ln)
3307 : (OP(scan) == REFF
3308 ? ibcmp(s, locinput, ln)
3309 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3310 sayNO;
a0d0e21e 3311 locinput += ln;
76e3520e 3312 nextchr = UCHARAT(locinput);
a0d0e21e 3313 break;
95b24440 3314 }
a0d0e21e
LW
3315
3316 case NOTHING:
c277df42 3317 case TAIL:
a0d0e21e
LW
3318 break;
3319 case BACK:
3320 break;
c277df42
IZ
3321 case EVAL:
3322 {
3323 dSP;
6136c704
AL
3324 OP_4tree * const oop = PL_op;
3325 COP * const ocurcop = PL_curcop;
f3548bdc 3326 PAD *old_comppad;
c277df42 3327 SV *ret;
6136c704 3328 struct regexp * const oreg = PL_reg_re;
9041c2e3 3329
c277df42 3330 n = ARG(scan);
533c011a 3331 PL_op = (OP_4tree*)PL_regdata->data[n];
a3621e74 3332 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f3548bdc 3333 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 3334 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 3335
8e5e9ebe 3336 {
6136c704 3337 SV ** const before = SP;
8e5e9ebe
RGS
3338 CALLRUNOPS(aTHX); /* Scalar context. */
3339 SPAGAIN;
3340 if (SP == before)
075aa684 3341 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3342 else {
3343 ret = POPs;
3344 PUTBACK;
3345 }
3346 }
3347
0f5d15d6 3348 PL_op = oop;
f3548bdc 3349 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 3350 PL_curcop = ocurcop;
c277df42 3351 if (logical) {
0f5d15d6
IZ
3352 if (logical == 2) { /* Postponed subexpression. */
3353 regexp *re;
6136c704 3354 MAGIC *mg = NULL;
0f5d15d6 3355 re_cc_state state;
cb50f42d 3356 int toggleutf;
faf82a0b 3357 register SV *sv;
0f5d15d6 3358
faf82a0b
AE
3359 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3360 mg = mg_find(sv, PERL_MAGIC_qr);
3361 else if (SvSMAGICAL(ret)) {
3362 if (SvGMAGICAL(ret))
3363 sv_unmagic(ret, PERL_MAGIC_qr);
3364 else
3365 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3366 }
faf82a0b 3367
0f5d15d6
IZ
3368 if (mg) {
3369 re = (regexp *)mg->mg_obj;
df0003d4 3370 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3371 }
3372 else {
3373 STRLEN len;
6136c704 3374 const char * const t = SvPV_const(ret, len);
0f5d15d6 3375 PMOP pm;
a3b680e6
AL
3376 char * const oprecomp = PL_regprecomp;
3377 const I32 osize = PL_regsize;
3378 const I32 onpar = PL_regnpar;
0f5d15d6 3379
5fcd1c1b 3380 Zero(&pm, 1, PMOP);
cb50f42d 3381 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3382 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3383 if (!(SvFLAGS(ret)
faf82a0b
AE
3384 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3385 | SVs_GMG)))
14befaf4
DM
3386 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3387 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
3388 PL_regprecomp = oprecomp;
3389 PL_regsize = osize;
3390 PL_regnpar = onpar;
3391 }
a3621e74 3392 DEBUG_EXECUTE_r(
9041c2e3 3393 PerlIO_printf(Perl_debug_log,
a0288114 3394 "Entering embedded \"%s%.60s%s%s\"\n",
0f5d15d6
IZ
3395 PL_colors[0],
3396 re->precomp,
3397 PL_colors[1],
3398 (strlen(re->precomp) > 60 ? "..." : ""))
3399 );
3400 state.node = next;
3401 state.prev = PL_reg_call_cc;
95b24440 3402 state.cc = cc;
0f5d15d6
IZ
3403 state.re = PL_reg_re;
3404
95b24440 3405 cc = 0;
9041c2e3 3406
0f5d15d6 3407 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3408 REGCP_SET(lastcp);
0f5d15d6
IZ
3409 cache_re(re);
3410 state.ss = PL_savestack_ix;
3411 *PL_reglastparen = 0;
a01268b5 3412 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
3413 PL_reg_call_cc = &state;
3414 PL_reginput = locinput;
cb50f42d
YST
3415 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3416 ((re->reganch & ROPT_UTF8) != 0);
3417 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3418
3419 /* XXXX This is too dramatic a measure... */
3420 PL_reg_maxiter = 0;
3421
95b24440 3422 /* XXX the only recursion left in regmatch() */
0f5d15d6 3423 if (regmatch(re->program + 1)) {
2c914db6
IZ
3424 /* Even though we succeeded, we need to restore
3425 global variables, since we may be wrapped inside
3426 SUSPEND, thus the match may be not finished yet. */
3427
3428 /* XXXX Do this only if SUSPENDed? */
3429 PL_reg_call_cc = state.prev;
95b24440 3430 cc = state.cc;
2c914db6
IZ
3431 PL_reg_re = state.re;
3432 cache_re(PL_reg_re);
cb50f42d 3433 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
3434
3435 /* XXXX This is too dramatic a measure... */
3436 PL_reg_maxiter = 0;
3437
3438 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
3439 ReREFCNT_dec(re);
3440 regcpblow(cp);
3441 sayYES;
3442 }
0f5d15d6 3443 ReREFCNT_dec(re);
02db2b7b 3444 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3445 regcppop();
3446 PL_reg_call_cc = state.prev;
95b24440 3447 cc = state.cc;
0f5d15d6 3448 PL_reg_re = state.re;
d3790889 3449 cache_re(PL_reg_re);
cb50f42d 3450 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3451
3452 /* XXXX This is too dramatic a measure... */
3453 PL_reg_maxiter = 0;
3454
8e514ae6 3455 logical = 0;
0f5d15d6
IZ
3456 sayNO;
3457 }
c277df42 3458 sw = SvTRUE(ret);
0f5d15d6 3459 logical = 0;
a0ed51b3 3460 }
080c2dec 3461 else {
3280af22 3462 sv_setsv(save_scalar(PL_replgv), ret);
080c2dec
AE
3463 cache_re(oreg);
3464 }
c277df42
IZ
3465 break;
3466 }
a0d0e21e 3467 case OPEN:
c277df42 3468 n = ARG(scan); /* which paren pair */
3280af22
NIS
3469 PL_reg_start_tmp[n] = locinput;
3470 if (n > PL_regsize)
3471 PL_regsize = n;
a0d0e21e
LW
3472 break;
3473 case CLOSE:
c277df42 3474 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3475 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3476 PL_regendp[n] = locinput - PL_bostr;
eb160463 3477 if (n > (I32)*PL_reglastparen)
3280af22 3478 *PL_reglastparen = n;
a01268b5 3479 *PL_reglastcloseparen = n;
a0d0e21e 3480 break;
c277df42
IZ
3481 case GROUPP:
3482 n = ARG(scan); /* which paren pair */
eb160463 3483 sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3484 break;
3485 case IFTHEN:
2c2d71f5 3486 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
3487 if (sw)
3488 next = NEXTOPER(NEXTOPER(scan));
3489 else {
3490 next = scan + ARG(scan);
3491 if (OP(next) == IFTHEN) /* Fake one. */
3492 next = NEXTOPER(NEXTOPER(next));
3493 }
3494 break;
3495 case LOGICAL:
0f5d15d6 3496 logical = scan->flags;
c277df42 3497 break;
2ab05381 3498/*******************************************************************
95b24440 3499 cc contains infoblock about the innermost (...)* loop, and
2ab05381
IZ
3500 a pointer to the next outer infoblock.
3501
3502 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3503
95b24440 3504 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3505
95b24440
DM
3506 2) This regnode mallocs an infoblock, and calls regmatch() recursively
3507 with the starting point at WHILEM node;
2ab05381
IZ
3508
3509 3) Each hit of WHILEM node tries to match A and Z (in the order
3510 depending on the current iteration, min/max of {min,max} and
3511 greediness). The information about where are nodes for "A"
3512 and "Z" is read from the infoblock, as is info on how many times "A"
3513 was already matched, and greediness.
3514
3515 4) After A matches, the same WHILEM node is hit again.
3516
95b24440 3517 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3518 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3519 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3520 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3521 of the external loop.
3522
3523 Currently present infoblocks form a tree with a stem formed by PL_curcc
3524 and whatever it mentions via ->next, and additional attached trees
3525 corresponding to temporarily unset infoblocks as in "5" above.
3526
95b24440 3527 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3528 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3529 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3530 infoblocks are drawn below the "reset" infoblock.
3531
3532 In fact in the picture below we do not show failed matches for Z and T
3533 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3534 more obvious *why* one needs to *temporary* unset infoblocks.]
3535
3536 Matched REx position InfoBlocks Comment
3537 (Y(A)*?Z)*?T x
3538 Y(A)*?Z)*?T x <- O
3539 Y (A)*?Z)*?T x <- O
3540 Y A)*?Z)*?T x <- O <- I
3541 YA )*?Z)*?T x <- O <- I
3542 YA A)*?Z)*?T x <- O <- I
3543 YAA )*?Z)*?T x <- O <- I
3544 YAA Z)*?T x <- O # Temporary unset I
3545 I
3546
3547 YAAZ Y(A)*?Z)*?T x <- O
3548 I
3549
3550 YAAZY (A)*?Z)*?T x <- O
3551 I
3552
3553 YAAZY A)*?Z)*?T x <- O <- I
3554 I
3555
3556 YAAZYA )*?Z)*?T x <- O <- I
3557 I
3558
3559 YAAZYA Z)*?T x <- O # Temporary unset I
3560 I,I
3561
3562 YAAZYAZ )*?T x <- O
3563 I,I
3564
3565 YAAZYAZ T x # Temporary unset O
3566 O
3567 I,I
3568
3569 YAAZYAZT x
3570 O
3571 I,I
3572 *******************************************************************/
95b24440 3573
a0d0e21e 3574 case CURLYX: {
cb434fcc
IZ
3575 /* No need to save/restore up to this paren */
3576 I32 parenfloor = scan->flags;
c277df42 3577
95b24440
DM
3578 {
3579 CURCUR *newcc;
3580 Newx(newcc, 1, CURCUR);
3581 oldcc = cc;
3582 newcc->oldcc = cc;
3583 cc = newcc;
3584 }
3585 cp = PL_savestack_ix;
c277df42
IZ
3586 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3587 next += ARG(next);
cb434fcc
IZ
3588 /* XXXX Probably it is better to teach regpush to support
3589 parenfloor > PL_regsize... */
eb160463 3590 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3591 parenfloor = *PL_reglastparen; /* Pessimization... */
95b24440
DM
3592 cc->parenfloor = parenfloor;
3593 cc->cur = -1;
3594 cc->min = ARG1(scan);
3595 cc->max = ARG2(scan);
3596 cc->scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3597 cc->next = next;
3598 cc->minmod = minmod;
3599 cc->lastloc = 0;
3280af22 3600 PL_reginput = locinput;
95b24440
DM
3601 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3602 /*** all unsaved local vars undefined at this point */
a0d0e21e 3603 regcpblow(cp);
95b24440
DM
3604 Safefree(cc);
3605 cc = oldcc;
3606 saySAME(result);
a0d0e21e 3607 }
5f66b61c 3608 /* NOTREACHED */
a0d0e21e
LW
3609 case WHILEM: {
3610 /*
3611 * This is really hard to understand, because after we match
3612 * what we're trying to match, we must make sure the rest of
2c2d71f5 3613 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3614 * to go back UP the parse tree by recursing ever deeper. And
3615 * if it fails, we have to reset our parent's current state
3616 * that we can try again after backing off.
3617 */
3618
95b24440
DM
3619 lastloc = cc->lastloc; /* Detection of 0-len. */
3620 cache_offset = 0;
3621 cache_bit = 0;
c277df42 3622
4633a7c4 3623 n = cc->cur + 1; /* how many we know we matched */
3280af22 3624 PL_reginput = locinput;
a0d0e21e 3625
a3621e74 3626 DEBUG_EXECUTE_r(
9041c2e3 3627 PerlIO_printf(Perl_debug_log,
91f3b821 3628 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3629 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3630 (long)n, (long)cc->min,
2797576d 3631 (long)cc->max, PTR2UV(cc))
c277df42 3632 );
4633a7c4 3633
a0d0e21e
LW
3634 /* If degenerate scan matches "", assume scan done. */
3635
579cf2c3 3636 if (locinput == cc->lastloc && n >= cc->min) {
95b24440
DM
3637 oldcc = cc;
3638 cc = cc->oldcc;
3639 if (cc)
3640 ln = cc->cur;
a3621e74 3641 DEBUG_EXECUTE_r(
c3464db5
DD
3642 PerlIO_printf(Perl_debug_log,
3643 "%*s empty match detected, try continuation...\n",
3280af22 3644 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3645 );
95b24440
DM
3646 REGMATCH(oldcc->next, WHILEM1);
3647 /*** all unsaved local vars undefined at this point */
3648 cc = oldcc;
3649 if (result)
4633a7c4 3650 sayYES;
95b24440
DM
3651 if (cc->oldcc)
3652 cc->oldcc->cur = ln;
4633a7c4 3653 sayNO;
a0d0e21e
LW
3654 }
3655
3656 /* First just match a string of min scans. */
3657
3658 if (n < cc->min) {
3659 cc->cur = n;
3660 cc->lastloc = locinput;
95b24440
DM
3661 REGMATCH(cc->scan, WHILEM2);
3662 /*** all unsaved local vars undefined at this point */
3663 if (result)
4633a7c4
LW
3664 sayYES;
3665 cc->cur = n - 1;
c277df42 3666 cc->lastloc = lastloc;
4633a7c4 3667 sayNO;
a0d0e21e
LW
3668 }
3669
2c2d71f5
JH
3670 if (scan->flags) {
3671 /* Check whether we already were at this position.
3672 Postpone detection until we know the match is not
3673 *that* much linear. */
3674 if (!PL_reg_maxiter) {
3675 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3676 PL_reg_leftiter = PL_reg_maxiter;
3677 }
3678 if (PL_reg_leftiter-- == 0) {
a3b680e6 3679 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3680 if (PL_reg_poscache) {
eb160463 3681 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3682 Renew(PL_reg_poscache, size, char);
3683 PL_reg_poscache_size = size;
3684 }
3685 Zero(PL_reg_poscache, size, char);
3686 }
3687 else {
3688 PL_reg_poscache_size = size;
a02a5408 3689 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3690 }
a3621e74 3691 DEBUG_EXECUTE_r(
2c2d71f5
JH
3692 PerlIO_printf(Perl_debug_log,
3693 "%sDetected a super-linear match, switching on caching%s...\n",
3694 PL_colors[4], PL_colors[5])
3695 );
3696 }
3697 if (PL_reg_leftiter < 0) {
3ab3c9b4 3698 cache_offset = locinput - PL_bostr;
2c2d71f5 3699
3ab3c9b4
HS
3700 cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3701 + cache_offset * (scan->flags>>4);
3702 cache_bit = cache_offset % 8;
3703 cache_offset /= 8;
3704 if (PL_reg_poscache[cache_offset] & (1<<cache_bit)) {
a3621e74 3705 DEBUG_EXECUTE_r(
2c2d71f5
JH
3706 PerlIO_printf(Perl_debug_log,
3707 "%*s already tried at this position...\n",
3708 REPORT_CODE_OFF+PL_regindent*2, "")
3709 );
3ab3c9b4
HS
3710 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3711 /* cache records success */
c2b0868c
HS
3712 sayYES;
3713 else
3ab3c9b4 3714 /* cache records failure */
c2b0868c 3715 sayNO_SILENT;
2c2d71f5 3716 }
3ab3c9b4 3717 PL_reg_poscache[cache_offset] |= (1<<cache_bit);
2c2d71f5
JH
3718 }
3719 }
3720
a0d0e21e
LW
3721 /* Prefer next over scan for minimal matching. */
3722
3723 if (cc->minmod) {
95b24440
DM
3724 oldcc = cc;
3725 cc = cc->oldcc;
3726 if (cc)
3727 ln = cc->cur;
3728 cp = regcppush(oldcc->parenfloor);
02db2b7b 3729 REGCP_SET(lastcp);
95b24440
DM
3730 REGMATCH(oldcc->next, WHILEM3);
3731 /*** all unsaved local vars undefined at this point */
3732 cc = oldcc;
3733 if (result) {
c277df42 3734 regcpblow(cp);
3ab3c9b4 3735 CACHEsayYES; /* All done. */
5f05dabc 3736 }
02db2b7b 3737 REGCP_UNWIND(lastcp);
5f05dabc 3738 regcppop();
95b24440
DM
3739 if (cc->oldcc)
3740 cc->oldcc->cur = ln;
a0d0e21e 3741
c277df42 3742 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3743 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3744 && !(PL_reg_flags & RF_warned)) {
3745 PL_reg_flags |= RF_warned;
9014280d 3746 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3747 "Complex regular subexpression recursion",
3748 REG_INFTY - 1);
c277df42 3749 }
3ab3c9b4 3750 CACHEsayNO;
c277df42 3751 }
a687059c 3752
a3621e74 3753 DEBUG_EXECUTE_r(
c3464db5
DD
3754 PerlIO_printf(Perl_debug_log,
3755 "%*s trying longer...\n",
3280af22 3756 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3757 );
a0d0e21e 3758 /* Try scanning more and see if it helps. */
3280af22 3759 PL_reginput = locinput;
a0d0e21e
LW
3760 cc->cur = n;
3761 cc->lastloc = locinput;
5f05dabc 3762 cp = regcppush(cc->parenfloor);
02db2b7b 3763 REGCP_SET(lastcp);
95b24440
DM
3764 REGMATCH(cc->scan, WHILEM4);
3765 /*** all unsaved local vars undefined at this point */
3766 if (result) {
c277df42 3767 regcpblow(cp);
3ab3c9b4 3768 CACHEsayYES;
5f05dabc 3769 }
02db2b7b 3770 REGCP_UNWIND(lastcp);
5f05dabc 3771 regcppop();
4633a7c4 3772 cc->cur = n - 1;
c277df42 3773 cc->lastloc = lastloc;
3ab3c9b4 3774 CACHEsayNO;
a0d0e21e
LW
3775 }
3776
3777 /* Prefer scan over next for maximal matching. */
3778
3779 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3780 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3781 cc->cur = n;
3782 cc->lastloc = locinput;
02db2b7b 3783 REGCP_SET(lastcp);
95b24440
DM
3784 REGMATCH(cc->scan, WHILEM5);
3785 /*** all unsaved local vars undefined at this point */
3786 if (result) {
c277df42 3787 regcpblow(cp);
3ab3c9b4 3788 CACHEsayYES;
5f05dabc 3789 }
02db2b7b 3790 REGCP_UNWIND(lastcp);
a0d0e21e 3791 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3792 PL_reginput = locinput;
a3621e74 3793 DEBUG_EXECUTE_r(
c3464db5
DD
3794 PerlIO_printf(Perl_debug_log,
3795 "%*s failed, try continuation...\n",
3280af22 3796 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3797 );
3798 }
9041c2e3 3799 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3800 && !(PL_reg_flags & RF_warned)) {
3280af22 3801 PL_reg_flags |= RF_warned;
9014280d 3802 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3803 "Complex regular subexpression recursion",
3804 REG_INFTY - 1);
a0d0e21e
LW
3805 }
3806
3807 /* Failed deeper matches of scan, so see if this one works. */
95b24440
DM
3808 oldcc = cc;
3809 cc = cc->oldcc;
3810 if (cc)
3811 ln = cc->cur;
3812 REGMATCH(oldcc->next, WHILEM6);
3813 /*** all unsaved local vars undefined at this point */
3814 cc = oldcc;
3815 if (result)
3ab3c9b4 3816 CACHEsayYES;
95b24440
DM
3817 if (cc->oldcc)
3818 cc->oldcc->cur = ln;
4633a7c4 3819 cc->cur = n - 1;
c277df42 3820 cc->lastloc = lastloc;
3ab3c9b4 3821 CACHEsayNO;
a0d0e21e 3822 }
5f66b61c 3823 /* NOTREACHED */
9041c2e3 3824 case BRANCHJ:
c277df42
IZ
3825 next = scan + ARG(scan);
3826 if (next == scan)
3827 next = NULL;
3828 inner = NEXTOPER(NEXTOPER(scan));
3829 goto do_branch;
9041c2e3 3830 case BRANCH:
c277df42
IZ
3831 inner = NEXTOPER(scan);
3832 do_branch:
3833 {
c277df42
IZ
3834 c1 = OP(scan);
3835 if (OP(next) != c1) /* No choice. */
3836 next = inner; /* Avoid recursion. */
a0d0e21e 3837 else {
a3b680e6 3838 const I32 lastparen = *PL_reglastparen;
02db2b7b 3839 /* Put unwinding data on stack */
6136c704
AL
3840 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3841 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3842
02db2b7b
IZ
3843 uw->prev = unwind;
3844 unwind = unwind1;
3845 uw->type = ((c1 == BRANCH)
3846 ? RE_UNWIND_BRANCH
3847 : RE_UNWIND_BRANCHJ);
3848 uw->lastparen = lastparen;
3849 uw->next = next;
3850 uw->locinput = locinput;
3851 uw->nextchr = nextchr;
3852#ifdef DEBUGGING
3853 uw->regindent = ++PL_regindent;
3854#endif
c277df42 3855
02db2b7b
IZ
3856 REGCP_SET(uw->lastcp);
3857
3858 /* Now go into the first branch */
3859 next = inner;
a687059c 3860 }
a0d0e21e
LW
3861 }
3862 break;
3863 case MINMOD:
3864 minmod = 1;
3865 break;
c277df42
IZ
3866 case CURLYM:
3867 {
95b24440 3868 curlym_l = matches = 0;
9041c2e3 3869
c277df42 3870 /* We suppose that the next guy does not need
0e788c72 3871 backtracking: in particular, it is of constant non-zero length,
c277df42
IZ
3872 and has no parenths to influence future backrefs. */
3873 ln = ARG1(scan); /* min to match */
3874 n = ARG2(scan); /* max to match */
c277df42
IZ
3875 paren = scan->flags;
3876 if (paren) {
3280af22
NIS
3877 if (paren > PL_regsize)
3878 PL_regsize = paren;
eb160463 3879 if (paren > (I32)*PL_reglastparen)
3280af22 3880 *PL_reglastparen = paren;
c277df42 3881 }
dc45a647 3882 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3883 if (paren)
3884 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3885 PL_reginput = locinput;
6407bf3b
DM
3886 maxwanted = minmod ? ln : n;
3887 if (maxwanted) {
3888 while (PL_reginput < PL_regeol && matches < maxwanted) {
95b24440
DM
3889 REGMATCH(scan, CURLYM1);
3890 /*** all unsaved local vars undefined at this point */
3891 if (!result)
6407bf3b 3892 break;
95b24440 3893 /* on first match, determine length, curlym_l */
6407bf3b
DM
3894 if (!matches++) {
3895 if (PL_reg_match_utf8) {
3896 char *s = locinput;
3897 while (s < PL_reginput) {
95b24440 3898 curlym_l++;
6407bf3b
DM
3899 s += UTF8SKIP(s);
3900 }
3901 }
3902 else {
95b24440 3903 curlym_l = PL_reginput - locinput;
6407bf3b 3904 }
95b24440 3905 if (curlym_l == 0) {
6407bf3b
DM
3906 matches = maxwanted;
3907 break;
3908 }
3909 }
3910 locinput = PL_reginput;
3911 }
3912 }
3913
3914 PL_reginput = locinput;
3915
c277df42
IZ
3916 if (minmod) {
3917 minmod = 0;
6407bf3b 3918 if (ln && matches < ln)
c277df42 3919 sayNO;
cca55fe3 3920 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3921 regnode *text_node = next;
3922
cca55fe3 3923 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3924
cca55fe3 3925 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3926 else {
cca55fe3 3927 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3928 c1 = c2 = -1000;
3929 goto assume_ok_MM;
cca55fe3
JP
3930 }
3931 else { c1 = (U8)*STRING(text_node); }
af5decee 3932 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3933 c2 = PL_fold[c1];
af5decee 3934 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3935 c2 = PL_fold_locale[c1];
3936 else
3937 c2 = c1;
3938 }
a0ed51b3
LW
3939 }
3940 else
c277df42 3941 c1 = c2 = -1000;
cca55fe3 3942 assume_ok_MM:
02db2b7b 3943 REGCP_SET(lastcp);
0e788c72 3944 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
c277df42
IZ
3945 /* If it could work, try it. */
3946 if (c1 == -1000 ||
3280af22
NIS
3947 UCHARAT(PL_reginput) == c1 ||
3948 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3949 {
3950 if (paren) {
f31a99c8 3951 if (ln) {
cf93c79d 3952 PL_regstartp[paren] =
95b24440 3953 HOPc(PL_reginput, -curlym_l) - PL_bostr;
cf93c79d 3954 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3955 }
3956 else
cf93c79d 3957 PL_regendp[paren] = -1;
c277df42 3958 }
95b24440
DM
3959 REGMATCH(next, CURLYM2);
3960 /*** all unsaved local vars undefined at this point */
3961 if (result)
c277df42 3962 sayYES;
02db2b7b 3963 REGCP_UNWIND(lastcp);
c277df42
IZ
3964 }
3965 /* Couldn't or didn't -- move forward. */
3280af22 3966 PL_reginput = locinput;
95b24440
DM
3967 REGMATCH(scan, CURLYM3);
3968 /*** all unsaved local vars undefined at this point */
3969 if (result) {
c277df42 3970 ln++;
3280af22 3971 locinput = PL_reginput;
c277df42
IZ
3972 }
3973 else
3974 sayNO;
3975 }
a0ed51b3
LW
3976 }
3977 else {
a3621e74 3978 DEBUG_EXECUTE_r(
5c0ca799 3979 PerlIO_printf(Perl_debug_log,
95b24440
DM
3980 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3981 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3982 (IV) matches, (IV)curlym_l)
c277df42 3983 );
6407bf3b 3984 if (matches >= ln) {
cca55fe3 3985 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3986 regnode *text_node = next;
3987
cca55fe3 3988 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3989
cca55fe3 3990 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3991 else {
cca55fe3 3992 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3993 c1 = c2 = -1000;
3994 goto assume_ok_REG;
cca55fe3
JP
3995 }
3996 else { c1 = (U8)*STRING(text_node); }
3997
af5decee 3998 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3999 c2 = PL_fold[c1];
af5decee 4000 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
4001 c2 = PL_fold_locale[c1];
4002 else
4003 c2 = c1;
4004 }
a0ed51b3
LW
4005 }
4006 else
c277df42
IZ
4007 c1 = c2 = -1000;
4008 }
cca55fe3 4009 assume_ok_REG:
02db2b7b 4010 REGCP_SET(lastcp);
6407bf3b 4011 while (matches >= ln) {
c277df42
IZ
4012 /* If it could work, try it. */
4013 if (c1 == -1000 ||
3280af22
NIS
4014 UCHARAT(PL_reginput) == c1 ||
4015 UCHARAT(PL_reginput) == c2)
a0ed51b3 4016 {
a3621e74 4017 DEBUG_EXECUTE_r(
6407bf3b
DM
4018 PerlIO_printf(Perl_debug_log,
4019 "%*s trying tail with matches=%"IVdf"...\n",
4020 (int)(REPORT_CODE_OFF+PL_regindent*2),
4021 "", (IV)matches)
a0ed51b3
LW
4022 );
4023 if (paren) {
6407bf3b 4024 if (matches) {
95b24440
DM
4025 PL_regstartp[paren]
4026 = HOPc(PL_reginput, -curlym_l) - PL_bostr;
cf93c79d 4027 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 4028 }
a0ed51b3 4029 else
cf93c79d 4030 PL_regendp[paren] = -1;
c277df42 4031 }
95b24440
DM
4032 REGMATCH(next, CURLYM4);
4033 /*** all unsaved local vars undefined at this point */
4034 if (result)
a0ed51b3 4035 sayYES;
02db2b7b 4036 REGCP_UNWIND(lastcp);
a0ed51b3 4037 }
c277df42 4038 /* Couldn't or didn't -- back up. */
6407bf3b 4039 matches--;
95b24440 4040 locinput = HOPc(locinput, -curlym_l);
3280af22 4041 PL_reginput = locinput;
c277df42
IZ
4042 }
4043 }
4044 sayNO;
5f66b61c 4045 /* NOTREACHED */
c277df42
IZ
4046 break;
4047 }
4048 case CURLYN:
4049 paren = scan->flags; /* Which paren to set */
3280af22
NIS
4050 if (paren > PL_regsize)
4051 PL_regsize = paren;
eb160463 4052 if (paren > (I32)*PL_reglastparen)
3280af22 4053 *PL_reglastparen = paren;
c277df42
IZ
4054 ln = ARG1(scan); /* min to match */
4055 n = ARG2(scan); /* max to match */
dc45a647 4056 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4057 goto repeat;
a0d0e21e 4058 case CURLY:
c277df42 4059 paren = 0;
a0d0e21e
LW
4060 ln = ARG1(scan); /* min to match */
4061 n = ARG2(scan); /* max to match */
dc45a647 4062 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4063 goto repeat;
4064 case STAR:
4065 ln = 0;
c277df42 4066 n = REG_INFTY;
a0d0e21e 4067 scan = NEXTOPER(scan);
c277df42 4068 paren = 0;
a0d0e21e
LW
4069 goto repeat;
4070 case PLUS:
c277df42
IZ
4071 ln = 1;
4072 n = REG_INFTY;
4073 scan = NEXTOPER(scan);
4074 paren = 0;
4075 repeat:
a0d0e21e
LW
4076 /*
4077 * Lookahead to avoid useless match attempts
4078 * when we know what character comes next.
4079 */
5f80c4cf
JP
4080
4081 /*
4082 * Used to only do .*x and .*?x, but now it allows
4083 * for )'s, ('s and (?{ ... })'s to be in the way
4084 * of the quantifier and the EXACT-like node. -- japhy
4085 */
4086
cca55fe3 4087 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4088 U8 *s;
4089 regnode *text_node = next;
4090
cca55fe3 4091 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4092
cca55fe3 4093 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 4094 else {
cca55fe3 4095 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
4096 c1 = c2 = -1000;
4097 goto assume_ok_easy;
cca55fe3
JP
4098 }
4099 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4100
4101 if (!UTF) {
4102 c2 = c1 = *s;
f65d3ee7 4103 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 4104 c2 = PL_fold[c1];
f65d3ee7 4105 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 4106 c2 = PL_fold_locale[c1];
1aa99e6b 4107 }
5f80c4cf 4108 else { /* UTF */
f65d3ee7 4109 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4110 STRLEN ulen1, ulen2;
89ebb4a3
JH
4111 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4112 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4113
4114 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4115 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4116
89ebb4a3 4117 c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4118 uniflags);
89ebb4a3 4119 c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4120 uniflags);
5f80c4cf
JP
4121 }
4122 else {
89ebb4a3 4123 c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4124 uniflags);
5f80c4cf 4125 }
1aa99e6b
IH
4126 }
4127 }
bbce6d69 4128 }
a0d0e21e 4129 else
bbce6d69 4130 c1 = c2 = -1000;
cca55fe3 4131 assume_ok_easy:
3280af22 4132 PL_reginput = locinput;
a0d0e21e
LW
4133 if (minmod) {
4134 minmod = 0;
4135 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 4136 sayNO;
a0ed51b3 4137 locinput = PL_reginput;
02db2b7b 4138 REGCP_SET(lastcp);
0fe9bf95 4139 if (c1 != -1000) {
95b24440
DM
4140 old = locinput;
4141 count = 0;
0fe9bf95 4142
1aa99e6b 4143 if (n == REG_INFTY) {
0fe9bf95 4144 e = PL_regeol - 1;
1aa99e6b
IH
4145 if (do_utf8)
4146 while (UTF8_IS_CONTINUATION(*(U8*)e))
4147 e--;
4148 }
4149 else if (do_utf8) {
4150 int m = n - ln;
4151 for (e = locinput;
4152 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
4153 e += UTF8SKIP(e);
4154 }
4155 else {
4156 e = locinput + n - ln;
4157 if (e >= PL_regeol)
4158 e = PL_regeol - 1;
4159 }
0fe9bf95
IZ
4160 while (1) {
4161 /* Find place 'next' could work */
1aa99e6b
IH
4162 if (!do_utf8) {
4163 if (c1 == c2) {
a8e8ab15
JH
4164 while (locinput <= e &&
4165 UCHARAT(locinput) != c1)
1aa99e6b
IH
4166 locinput++;
4167 } else {
9041c2e3 4168 while (locinput <= e
a8e8ab15
JH
4169 && UCHARAT(locinput) != c1
4170 && UCHARAT(locinput) != c2)
1aa99e6b
IH
4171 locinput++;
4172 }
4173 count = locinput - old;
4174 }
4175 else {
1aa99e6b 4176 if (c1 == c2) {
a3b680e6 4177 STRLEN len;
872c91ae
JH
4178 /* count initialised to
4179 * utf8_distance(old, locinput) */
b2f2f093 4180 while (locinput <= e &&
872c91ae 4181 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4182 UTF8_MAXBYTES, &len,
041457d9 4183 uniflags) != (UV)c1) {
1aa99e6b 4184 locinput += len;
b2f2f093
JH
4185 count++;
4186 }
1aa99e6b 4187 } else {
a3b680e6 4188 STRLEN len;
872c91ae
JH
4189 /* count initialised to
4190 * utf8_distance(old, locinput) */
b2f2f093 4191 while (locinput <= e) {
872c91ae 4192 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4193 UTF8_MAXBYTES, &len,
041457d9 4194 uniflags);
eb160463 4195 if (c == (UV)c1 || c == (UV)c2)
1aa99e6b 4196 break;
b2f2f093
JH
4197 locinput += len;
4198 count++;
1aa99e6b
IH
4199 }
4200 }
0fe9bf95 4201 }
9041c2e3 4202 if (locinput > e)
0fe9bf95
IZ
4203 sayNO;
4204 /* PL_reginput == old now */
4205 if (locinput != old) {
4206 ln = 1; /* Did some */
1aa99e6b 4207 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
4208 sayNO;
4209 }
4210 /* PL_reginput == locinput now */
95b24440
DM
4211 TRYPAREN(paren, ln, locinput, PLUS1);
4212 /*** all unsaved local vars undefined at this point */
0fe9bf95 4213 PL_reginput = locinput; /* Could be reset... */
02db2b7b 4214 REGCP_UNWIND(lastcp);
0fe9bf95 4215 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
4216 old = locinput;
4217 if (do_utf8)
4218 locinput += UTF8SKIP(locinput);
4219 else
4220 locinput++;
b2f2f093 4221 count = 1;
0fe9bf95
IZ
4222 }
4223 }
4224 else
c277df42 4225 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
4226 UV c;
4227 if (c1 != -1000) {
4228 if (do_utf8)
872c91ae 4229 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4230 UTF8_MAXBYTES, 0,
041457d9 4231 uniflags);
1aa99e6b 4232 else
9041c2e3 4233 c = UCHARAT(PL_reginput);
2390ecbc 4234 /* If it could work, try it. */
eb160463 4235 if (c == (UV)c1 || c == (UV)c2)
2390ecbc 4236 {
95b24440
DM
4237 TRYPAREN(paren, ln, PL_reginput, PLUS2);
4238 /*** all unsaved local vars undefined at this point */
2390ecbc
PP
4239 REGCP_UNWIND(lastcp);
4240 }
1aa99e6b 4241 }
a0d0e21e 4242 /* If it could work, try it. */
2390ecbc 4243 else if (c1 == -1000)
bbce6d69 4244 {
95b24440
DM
4245 TRYPAREN(paren, ln, PL_reginput, PLUS3);
4246 /*** all unsaved local vars undefined at this point */
02db2b7b 4247 REGCP_UNWIND(lastcp);
bbce6d69 4248 }
c277df42 4249 /* Couldn't or didn't -- move forward. */
a0ed51b3 4250 PL_reginput = locinput;
a0d0e21e
LW
4251 if (regrepeat(scan, 1)) {
4252 ln++;
a0ed51b3
LW
4253 locinput = PL_reginput;
4254 }
4255 else
4633a7c4 4256 sayNO;
a0d0e21e
LW
4257 }
4258 }
4259 else {
4260 n = regrepeat(scan, n);
a0ed51b3 4261 locinput = PL_reginput;
22c35a8c 4262 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4263 (OP(next) != MEOL ||
15272685
HS
4264 OP(next) == SEOL || OP(next) == EOS))
4265 {
a0d0e21e 4266 ln = n; /* why back off? */
1aeab75a
GS
4267 /* ...because $ and \Z can match before *and* after
4268 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4269 We should back off by one in this case. */
4270 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4271 ln--;
4272 }
02db2b7b 4273 REGCP_SET(lastcp);
1d5c262f 4274 {
8fa7f367 4275 UV c = 0;
c277df42 4276 while (n >= ln) {
1aa99e6b
IH
4277 if (c1 != -1000) {
4278 if (do_utf8)
872c91ae 4279 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4280 UTF8_MAXBYTES, 0,
041457d9 4281 uniflags);
1aa99e6b 4282 else
9041c2e3 4283 c = UCHARAT(PL_reginput);
1aa99e6b 4284 }
c277df42 4285 /* If it could work, try it. */
eb160463 4286 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 4287 {
95b24440
DM
4288 TRYPAREN(paren, n, PL_reginput, PLUS4);
4289 /*** all unsaved local vars undefined at this point */
02db2b7b 4290 REGCP_UNWIND(lastcp);
c277df42
IZ
4291 }
4292 /* Couldn't or didn't -- back up. */
4293 n--;
dfe13c55 4294 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4295 }
a0d0e21e
LW
4296 }
4297 }
4633a7c4 4298 sayNO;
c277df42 4299 break;
a0d0e21e 4300 case END:
0f5d15d6 4301 if (PL_reg_call_cc) {
95b24440
DM
4302 cur_call_cc = PL_reg_call_cc;
4303 end_re = PL_reg_re;
6136c704
AL
4304
4305 /* Save *all* the positions. */
95b24440 4306 cp = regcppush(0);
02db2b7b 4307 REGCP_SET(lastcp);
6136c704
AL
4308
4309 /* Restore parens of the caller. */
95b24440
DM
4310 {
4311 I32 tmp = PL_savestack_ix;
4312 PL_savestack_ix = PL_reg_call_cc->ss;
4313 regcppop();
4314 PL_savestack_ix = tmp;
4315 }
6136c704
AL
4316
4317 /* Make position available to the callcc. */
4318 PL_reginput = locinput;
4319
0f5d15d6 4320 cache_re(PL_reg_call_cc->re);
95b24440
DM
4321 oldcc = cc;
4322 cc = PL_reg_call_cc->cc;
0f5d15d6 4323 PL_reg_call_cc = PL_reg_call_cc->prev;
95b24440
DM
4324 REGMATCH(cur_call_cc->node, END);
4325 /*** all unsaved local vars undefined at this point */
4326 if (result) {
0f5d15d6
IZ
4327 PL_reg_call_cc = cur_call_cc;
4328 regcpblow(cp);
4329 sayYES;
4330 }
02db2b7b 4331 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
4332 regcppop();
4333 PL_reg_call_cc = cur_call_cc;
95b24440
DM
4334 cc = oldcc;
4335 PL_reg_re = end_re;
4336 cache_re(end_re);
0f5d15d6 4337
a3621e74 4338 DEBUG_EXECUTE_r(
0f5d15d6
IZ
4339 PerlIO_printf(Perl_debug_log,
4340 "%*s continuation failed...\n",
4341 REPORT_CODE_OFF+PL_regindent*2, "")
4342 );
7821416a 4343 sayNO_SILENT;
0f5d15d6 4344 }
7821416a 4345 if (locinput < PL_regtill) {
a3621e74 4346 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4347 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4348 PL_colors[4],
4349 (long)(locinput - PL_reg_starttry),
4350 (long)(PL_regtill - PL_reg_starttry),
4351 PL_colors[5]));
4352 sayNO_FINAL; /* Cannot match: too short. */
4353 }
4354 PL_reginput = locinput; /* put where regtry can find it */
4355 sayYES_FINAL; /* Success! */
7e5428c5 4356 case SUCCEED:
3280af22 4357 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4358 sayYES_LOUD; /* Success! */
c277df42
IZ
4359 case SUSPEND:
4360 n = 1;
9fe1d20c 4361 PL_reginput = locinput;
9041c2e3 4362 goto do_ifmatch;
a0d0e21e 4363 case UNLESSM:
c277df42 4364 n = 0;
a0ed51b3 4365 if (scan->flags) {
95b24440 4366 char *s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4367 if (!s)
4368 goto say_yes;
4369 PL_reginput = s;
a0ed51b3
LW
4370 }
4371 else
4372 PL_reginput = locinput;
c277df42
IZ
4373 goto do_ifmatch;
4374 case IFMATCH:
4375 n = 1;
a0ed51b3 4376 if (scan->flags) {
95b24440 4377 char *s = HOPBACKc(locinput, scan->flags);
efb30f32
HS
4378 if (!s)
4379 goto say_no;
4380 PL_reginput = s;
a0ed51b3
LW
4381 }
4382 else
4383 PL_reginput = locinput;
4384
c277df42 4385 do_ifmatch:
95b24440
DM
4386 REGMATCH(NEXTOPER(NEXTOPER(scan)), IFMATCH);
4387 /*** all unsaved local vars undefined at this point */
4388 if (result != n) {
c277df42
IZ
4389 say_no:
4390 if (logical) {
4391 logical = 0;
4392 sw = 0;
4393 goto do_longjump;
a0ed51b3
LW
4394 }
4395 else
c277df42
IZ
4396 sayNO;
4397 }
4398 say_yes:
4399 if (logical) {
4400 logical = 0;
4401 sw = 1;
4402 }
fe44a5e8 4403 if (OP(scan) == SUSPEND) {
3280af22 4404 locinput = PL_reginput;
565764a8 4405 nextchr = UCHARAT(locinput);
fe44a5e8 4406 }
c277df42
IZ
4407 /* FALL THROUGH. */
4408 case LONGJMP:
4409 do_longjump:
4410 next = scan + ARG(scan);
4411 if (next == scan)
4412 next = NULL;
a0d0e21e
LW
4413 break;
4414 default:
b900a521 4415 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4416 PTR2UV(scan), OP(scan));
cea2e8a9 4417 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4418 }
95b24440 4419
02db2b7b 4420 reenter:
a0d0e21e 4421 scan = next;
95b24440
DM
4422 continue;
4423 /* NOTREACHED */
4424
4425 /* simulate recursively calling regmatch(), but without actually
4426 * recursing - ie save the current state on the heap rather than on
4427 * the stack, then re-enter the loop. This avoids complex regexes
4428 * blowing the processor stack */
4429
4430 start_recurse:
4431 {
4432 /* save existing local variables */
4433 struct regmatch_state *p;
4434
4435 Newx(p, 1, struct regmatch_state);
4436 p->prev_state = prev_state;
4437 prev_state = p;
4438 p->resume_state = resume_state;
4439 p->scan = scan;
4440 p->next = next;
4441 p->minmod = minmod;
4442 p->sw = sw;
4443 p->logical = logical;
4444 p->unwind = unwind;
4445 p->cc = cc;
4446 p->locinput = locinput;
4447 p->n = n;
4448 p->ln = ln;
4449 p->c1 = c1;
4450 p->c2 = c2;
4451 p->paren = paren;
4452 p->cp = cp;
4453 p->lastcp = lastcp;
4454 p->oldcc = oldcc;
4455 p->lastloc = lastloc;
4456 p->cache_offset = cache_offset;
4457 p->cache_bit = cache_bit;
4458 p->curlym_l = curlym_l;
4459 p->matches = matches;
4460 p->maxwanted = maxwanted;
4461 p->e = e;
4462 p->old = old;
4463 p->count = count;
4464 p->cur_call_cc = cur_call_cc;
4465 p->end_re = end_re;
4466 p->accept_buff = accept_buff;
4467 p->accepted = accepted;
4468 p->reg_call_cc = PL_reg_call_cc;
4469
4470 scan = new_scan;
4471 locinput = PL_reginput;
4472 nextchr = UCHARAT(locinput);
4473 minmod = 0;
4474 sw = 0;
4475 logical = 0;
4476 unwind = 0;
4477#ifdef DEBUGGING
4478 PL_regindent++;
4479#endif
4480 }
a0d0e21e 4481 }
a687059c 4482
a0d0e21e
LW
4483 /*
4484 * We get here only if there's trouble -- normally "case END" is
4485 * the terminating point.
4486 */
cea2e8a9 4487 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4488 /*NOTREACHED*/
4633a7c4
LW
4489 sayNO;
4490
7821416a 4491yes_loud:
a3621e74 4492 DEBUG_EXECUTE_r(
7821416a
IZ
4493 PerlIO_printf(Perl_debug_log,
4494 "%*s %scould match...%s\n",
e4584336 4495 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4496 );
4497 goto yes;
4498yes_final:
a3621e74 4499 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4500 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4501yes:
4502#ifdef DEBUGGING
3280af22 4503 PL_regindent--;
4633a7c4 4504#endif
02db2b7b 4505
95b24440
DM
4506 result = 1;
4507 goto exit_level;
4633a7c4
LW
4508
4509no:
a3621e74 4510 DEBUG_EXECUTE_r(
7821416a
IZ
4511 PerlIO_printf(Perl_debug_log,
4512 "%*s %sfailed...%s\n",
e4584336 4513 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4514 );
4515 goto do_no;
4516no_final:
4517do_no:
02db2b7b 4518 if (unwind) {
6136c704 4519 re_unwind_t * const uw = SSPTRt(unwind,re_unwind_t);
02db2b7b
IZ
4520
4521 switch (uw->type) {
4522 case RE_UNWIND_BRANCH:
4523 case RE_UNWIND_BRANCHJ:
4524 {
6136c704 4525 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4526 const I32 lastparen = uwb->lastparen;
9041c2e3 4527
02db2b7b
IZ
4528 REGCP_UNWIND(uwb->lastcp);
4529 for (n = *PL_reglastparen; n > lastparen; n--)
4530 PL_regendp[n] = -1;
4531 *PL_reglastparen = n;
4532 scan = next = uwb->next;
9041c2e3
NIS
4533 if ( !scan ||
4534 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
4535 ? BRANCH : BRANCHJ) ) { /* Failure */
4536 unwind = uwb->prev;
4537#ifdef DEBUGGING
4538 PL_regindent--;
4539#endif
4540 goto do_no;
4541 }
4542 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4543 if ((n = (uwb->type == RE_UNWIND_BRANCH
4544 ? NEXT_OFF(next) : ARG(next))))
4545 next += n;
4546 else
4547 next = NULL; /* XXXX Needn't unwinding in this case... */
4548 uwb->next = next;
4549 next = NEXTOPER(scan);
4550 if (uwb->type == RE_UNWIND_BRANCHJ)
4551 next = NEXTOPER(next);
4552 locinput = uwb->locinput;
4553 nextchr = uwb->nextchr;
4554#ifdef DEBUGGING
4555 PL_regindent = uwb->regindent;
4556#endif
4557
4558 goto reenter;
4559 }
5f66b61c 4560 /* NOTREACHED */
02db2b7b
IZ
4561 default:
4562 Perl_croak(aTHX_ "regexp unwind memory corruption");
4563 }
5f66b61c 4564 /* NOTREACHED */
02db2b7b 4565 }
4633a7c4 4566#ifdef DEBUGGING
3280af22 4567 PL_regindent--;
4633a7c4 4568#endif
95b24440
DM
4569 result = 0;
4570exit_level:
4571 if (prev_state) {
4572 /* restore previous state and re-enter */
4573
4574 struct regmatch_state *p = prev_state;
4575 resume_state = p->resume_state;
4576 scan = p->scan;
4577 next = p->next;
4578 minmod = p->minmod;
4579 sw = p->sw;
4580 logical = p->logical;
4581 unwind = p->unwind;
4582 cc = p->cc;
4583 locinput = p->locinput;
4584 nextchr = UCHARAT(locinput);
4585 n = p->n;
4586 ln = p->ln;
4587 c1 = p->c1;
4588 c2 = p->c2;
4589 paren = p->paren;
4590 cp = p->cp;
4591 lastcp = p->lastcp;
4592 oldcc = p->oldcc;
4593 lastloc = p->lastloc;
4594 cache_offset = p->cache_offset;
4595 cache_bit = p->cache_bit;
4596 curlym_l = p->curlym_l;
4597 matches = p->matches;
4598 maxwanted = p->maxwanted;
4599 e = p->e;
4600 old = p->old;
4601 count = p->count;
4602 cur_call_cc = p->cur_call_cc;
4603 end_re = p->end_re;
4604 accept_buff = p->accept_buff;
4605 accepted = p->accepted;
4606 PL_reg_call_cc = p->reg_call_cc;
4607 prev_state = p->prev_state;
82255c49 4608 Safefree(p);
95b24440
DM
4609
4610 switch (resume_state) {
4611 case resume_TRIE1:
4612 goto resume_point_TRIE1;
4613 case resume_TRIE2:
4614 goto resume_point_TRIE2;
4615 case resume_CURLYX:
4616 goto resume_point_CURLYX;
4617 case resume_WHILEM1:
4618 goto resume_point_WHILEM1;
4619 case resume_WHILEM2:
4620 goto resume_point_WHILEM2;
4621 case resume_WHILEM3:
4622 goto resume_point_WHILEM3;
4623 case resume_WHILEM4:
4624 goto resume_point_WHILEM4;
4625 case resume_WHILEM5:
4626 goto resume_point_WHILEM5;
4627 case resume_WHILEM6:
4628 goto resume_point_WHILEM6;
4629 case resume_CURLYM1:
4630 goto resume_point_CURLYM1;
4631 case resume_CURLYM2:
4632 goto resume_point_CURLYM2;
4633 case resume_CURLYM3:
4634 goto resume_point_CURLYM3;
4635 case resume_CURLYM4:
4636 goto resume_point_CURLYM4;
4637 case resume_IFMATCH:
4638 goto resume_point_IFMATCH;
4639 case resume_PLUS1:
4640 goto resume_point_PLUS1;
4641 case resume_PLUS2:
4642 goto resume_point_PLUS2;
4643 case resume_PLUS3:
4644 goto resume_point_PLUS3;
4645 case resume_PLUS4:
4646 goto resume_point_PLUS4;
4647 case resume_END:
4648 goto resume_point_END;
4649 default:
4650 Perl_croak(aTHX_ "regexp resume memory corruption");
4651 }
4652 /* NOTREACHED */
4653 }
4654 return result;
4655
a687059c
LW
4656}
4657
4658/*
4659 - regrepeat - repeatedly match something simple, report how many
4660 */
4661/*
4662 * [This routine now assumes that it will only match on things of length 1.
4663 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4664 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4665 */
76e3520e 4666STATIC I32
a3b680e6 4667S_regrepeat(pTHX_ const regnode *p, I32 max)
a687059c 4668{
27da23d5 4669 dVAR;
a0d0e21e 4670 register char *scan;
a0d0e21e 4671 register I32 c;
3280af22 4672 register char *loceol = PL_regeol;
a0ed51b3 4673 register I32 hardcount = 0;
53c4c00c 4674 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4675
3280af22 4676 scan = PL_reginput;
faf11cac
HS
4677 if (max == REG_INFTY)
4678 max = I32_MAX;
4679 else if (max < loceol - scan)
a0d0e21e 4680 loceol = scan + max;
a0d0e21e 4681 switch (OP(p)) {
22c35a8c 4682 case REG_ANY:
1aa99e6b 4683 if (do_utf8) {
ffc61ed2 4684 loceol = PL_regeol;
1aa99e6b 4685 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4686 scan += UTF8SKIP(scan);
4687 hardcount++;
4688 }
4689 } else {
4690 while (scan < loceol && *scan != '\n')
4691 scan++;
a0ed51b3
LW
4692 }
4693 break;
ffc61ed2 4694 case SANY:
def8e4ea
JH
4695 if (do_utf8) {
4696 loceol = PL_regeol;
a0804c9e 4697 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4698 scan += UTF8SKIP(scan);
4699 hardcount++;
4700 }
4701 }
4702 else
4703 scan = loceol;
a0ed51b3 4704 break;
f33976b4
DB
4705 case CANY:
4706 scan = loceol;
4707 break;
090f7165
JH
4708 case EXACT: /* length of string is 1 */
4709 c = (U8)*STRING(p);
4710 while (scan < loceol && UCHARAT(scan) == c)
4711 scan++;
bbce6d69 4712 break;
4713 case EXACTF: /* length of string is 1 */
cd439c50 4714 c = (U8)*STRING(p);
bbce6d69 4715 while (scan < loceol &&
22c35a8c 4716 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4717 scan++;
4718 break;
4719 case EXACTFL: /* length of string is 1 */
3280af22 4720 PL_reg_flags |= RF_tainted;
cd439c50 4721 c = (U8)*STRING(p);
bbce6d69 4722 while (scan < loceol &&
22c35a8c 4723 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4724 scan++;
4725 break;
4726 case ANYOF:
ffc61ed2
JH
4727 if (do_utf8) {
4728 loceol = PL_regeol;
cfc92286
JH
4729 while (hardcount < max && scan < loceol &&
4730 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4731 scan += UTF8SKIP(scan);
4732 hardcount++;
4733 }
4734 } else {
7d3e948e 4735 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4736 scan++;
4737 }
a0d0e21e
LW
4738 break;
4739 case ALNUM:
1aa99e6b 4740 if (do_utf8) {
ffc61ed2 4741 loceol = PL_regeol;
1a4fad37 4742 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4743 while (hardcount < max && scan < loceol &&
3568d838 4744 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4745 scan += UTF8SKIP(scan);
4746 hardcount++;
4747 }
4748 } else {
4749 while (scan < loceol && isALNUM(*scan))
4750 scan++;
a0ed51b3
LW
4751 }
4752 break;
bbce6d69 4753 case ALNUML:
3280af22 4754 PL_reg_flags |= RF_tainted;
1aa99e6b 4755 if (do_utf8) {
ffc61ed2 4756 loceol = PL_regeol;
1aa99e6b
IH
4757 while (hardcount < max && scan < loceol &&
4758 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4759 scan += UTF8SKIP(scan);
4760 hardcount++;
4761 }
4762 } else {
4763 while (scan < loceol && isALNUM_LC(*scan))
4764 scan++;
a0ed51b3
LW
4765 }
4766 break;
a0d0e21e 4767 case NALNUM:
1aa99e6b 4768 if (do_utf8) {
ffc61ed2 4769 loceol = PL_regeol;
1a4fad37 4770 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4771 while (hardcount < max && scan < loceol &&
3568d838 4772 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4773 scan += UTF8SKIP(scan);
4774 hardcount++;
4775 }
4776 } else {
4777 while (scan < loceol && !isALNUM(*scan))
4778 scan++;
a0ed51b3
LW
4779 }
4780 break;
bbce6d69 4781 case NALNUML:
3280af22 4782 PL_reg_flags |= RF_tainted;
1aa99e6b 4783 if (do_utf8) {
ffc61ed2 4784 loceol = PL_regeol;
1aa99e6b
IH
4785 while (hardcount < max && scan < loceol &&
4786 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4787 scan += UTF8SKIP(scan);
4788 hardcount++;
4789 }
4790 } else {
4791 while (scan < loceol && !isALNUM_LC(*scan))
4792 scan++;
a0ed51b3
LW
4793 }
4794 break;
a0d0e21e 4795 case SPACE:
1aa99e6b 4796 if (do_utf8) {
ffc61ed2 4797 loceol = PL_regeol;
1a4fad37 4798 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4799 while (hardcount < max && scan < loceol &&
3568d838
JH
4800 (*scan == ' ' ||
4801 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4802 scan += UTF8SKIP(scan);
4803 hardcount++;
4804 }
4805 } else {
4806 while (scan < loceol && isSPACE(*scan))
4807 scan++;
a0ed51b3
LW
4808 }
4809 break;
bbce6d69 4810 case SPACEL:
3280af22 4811 PL_reg_flags |= RF_tainted;
1aa99e6b 4812 if (do_utf8) {
ffc61ed2 4813 loceol = PL_regeol;
1aa99e6b 4814 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4815 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4816 scan += UTF8SKIP(scan);
4817 hardcount++;
4818 }
4819 } else {
4820 while (scan < loceol && isSPACE_LC(*scan))
4821 scan++;
a0ed51b3
LW
4822 }
4823 break;
a0d0e21e 4824 case NSPACE:
1aa99e6b 4825 if (do_utf8) {
ffc61ed2 4826 loceol = PL_regeol;
1a4fad37 4827 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4828 while (hardcount < max && scan < loceol &&
3568d838
JH
4829 !(*scan == ' ' ||
4830 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4831 scan += UTF8SKIP(scan);
4832 hardcount++;
4833 }
4834 } else {
4835 while (scan < loceol && !isSPACE(*scan))
4836 scan++;
4837 break;
a0ed51b3 4838 }
bbce6d69 4839 case NSPACEL:
3280af22 4840 PL_reg_flags |= RF_tainted;
1aa99e6b 4841 if (do_utf8) {
ffc61ed2 4842 loceol = PL_regeol;
1aa99e6b 4843 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4844 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4845 scan += UTF8SKIP(scan);
4846 hardcount++;
4847 }
4848 } else {
4849 while (scan < loceol && !isSPACE_LC(*scan))
4850 scan++;
a0ed51b3
LW
4851 }
4852 break;
a0d0e21e 4853 case DIGIT:
1aa99e6b 4854 if (do_utf8) {
ffc61ed2 4855 loceol = PL_regeol;
1a4fad37 4856 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4857 while (hardcount < max && scan < loceol &&
3568d838 4858 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4859 scan += UTF8SKIP(scan);
4860 hardcount++;
4861 }
4862 } else {
4863 while (scan < loceol && isDIGIT(*scan))
4864 scan++;
a0ed51b3
LW
4865 }
4866 break;
a0d0e21e 4867 case NDIGIT:
1aa99e6b 4868 if (do_utf8) {
ffc61ed2 4869 loceol = PL_regeol;
1a4fad37 4870 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4871 while (hardcount < max && scan < loceol &&
3568d838 4872 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4873 scan += UTF8SKIP(scan);
4874 hardcount++;
4875 }
4876 } else {
4877 while (scan < loceol && !isDIGIT(*scan))
4878 scan++;
a0ed51b3
LW
4879 }
4880 break;
a0d0e21e
LW
4881 default: /* Called on something of 0 width. */
4882 break; /* So match right here or not at all. */
4883 }
a687059c 4884
a0ed51b3
LW
4885 if (hardcount)
4886 c = hardcount;
4887 else
4888 c = scan - PL_reginput;
3280af22 4889 PL_reginput = scan;
a687059c 4890
a3621e74 4891 DEBUG_r({
ab74612d 4892 SV *re_debug_flags = NULL;
6136c704 4893 SV * const prop = sv_newmortal();
a3621e74
YO
4894 GET_RE_DEBUG_FLAGS;
4895 DEBUG_EXECUTE_r({
c277df42 4896 regprop(prop, p);
9041c2e3
NIS
4897 PerlIO_printf(Perl_debug_log,
4898 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4899 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4900 });
a3621e74 4901 });
9041c2e3 4902
a0d0e21e 4903 return(c);
a687059c
LW
4904}
4905
c277df42
IZ
4906
4907/*
ffc61ed2
JH
4908- regclass_swash - prepare the utf8 swash
4909*/
4910
4911SV *
a3b680e6 4912Perl_regclass_swash(pTHX_ register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4913{
97aff369 4914 dVAR;
9e55ce06
JH
4915 SV *sw = NULL;
4916 SV *si = NULL;
4917 SV *alt = NULL;
ffc61ed2
JH
4918
4919 if (PL_regdata && PL_regdata->count) {
a3b680e6 4920 const U32 n = ARG(node);
ffc61ed2
JH
4921
4922 if (PL_regdata->what[n] == 's') {
890ce7af
AL
4923 SV * const rv = (SV*)PL_regdata->data[n];
4924 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4925 SV **const ary = AvARRAY(av);
9e55ce06 4926 SV **a, **b;
9041c2e3 4927
711a919c 4928 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4929 * documentation of these array elements. */
4930
b11f357e 4931 si = *ary;
8f7f7219 4932 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4933 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4934
ffc61ed2
JH
4935 if (a)
4936 sw = *a;
4937 else if (si && doinit) {
4938 sw = swash_init("utf8", "", si, 1, 0);
4939 (void)av_store(av, 1, sw);
4940 }
9e55ce06
JH
4941 if (b)
4942 alt = *b;
ffc61ed2
JH
4943 }
4944 }
4945
9e55ce06
JH
4946 if (listsvp)
4947 *listsvp = si;
4948 if (altsvp)
4949 *altsvp = alt;
ffc61ed2
JH
4950
4951 return sw;
4952}
4953
4954/*
ba7b4546 4955 - reginclass - determine if a character falls into a character class
832705d4
JH
4956
4957 The n is the ANYOF regnode, the p is the target string, lenp
4958 is pointer to the maximum length of how far to go in the p
4959 (if the lenp is zero, UTF8SKIP(p) is used),
4960 do_utf8 tells whether the target string is in UTF-8.
4961
bbce6d69 4962 */
4963
76e3520e 4964STATIC bool
a3b680e6 4965S_reginclass(pTHX_ register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4966{
27da23d5 4967 dVAR;
a3b680e6 4968 const char flags = ANYOF_FLAGS(n);
bbce6d69 4969 bool match = FALSE;
cc07378b 4970 UV c = *p;
ae9ddab8 4971 STRLEN len = 0;
9e55ce06 4972 STRLEN plen;
1aa99e6b 4973
19f67299
TS
4974 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4975 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4976 ckWARN(WARN_UTF8) ? UTF8_CHECK_ONLY :
4977 UTF8_ALLOW_ANYUV|UTF8_CHECK_ONLY);
4978 if (len == (STRLEN)-1)
4979 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4980 }
bbce6d69 4981
0f0076b4 4982 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4983 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4984 if (lenp)
4985 *lenp = 0;
ffc61ed2 4986 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4987 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4988 match = TRUE;
bbce6d69 4989 }
3568d838 4990 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4991 match = TRUE;
ffc61ed2 4992 if (!match) {
9e55ce06 4993 AV *av;
890ce7af 4994 SV * const sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4995
4996 if (sw) {
3568d838 4997 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4998 match = TRUE;
4999 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5000 if (!match && lenp && av) {
5001 I32 i;
9e55ce06 5002 for (i = 0; i <= av_len(av); i++) {
890ce7af 5003 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5004 STRLEN len;
890ce7af 5005 const char * const s = SvPV_const(sv, len);
9e55ce06 5006
061b10df 5007 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5008 *lenp = len;
5009 match = TRUE;
5010 break;
5011 }
5012 }
5013 }
5014 if (!match) {
89ebb4a3 5015 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5016 STRLEN tmplen;
5017
9e55ce06
JH
5018 to_utf8_fold(p, tmpbuf, &tmplen);
5019 if (swash_fetch(sw, tmpbuf, do_utf8))
5020 match = TRUE;
5021 }
ffc61ed2
JH
5022 }
5023 }
bbce6d69 5024 }
9e55ce06 5025 if (match && lenp && *lenp == 0)
0f0076b4 5026 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5027 }
1aa99e6b 5028 if (!match && c < 256) {
ffc61ed2
JH
5029 if (ANYOF_BITMAP_TEST(n, c))
5030 match = TRUE;
5031 else if (flags & ANYOF_FOLD) {
eb160463 5032 U8 f;
a0ed51b3 5033
ffc61ed2
JH
5034 if (flags & ANYOF_LOCALE) {
5035 PL_reg_flags |= RF_tainted;
5036 f = PL_fold_locale[c];
5037 }
5038 else
5039 f = PL_fold[c];
5040 if (f != c && ANYOF_BITMAP_TEST(n, f))
5041 match = TRUE;
5042 }
5043
5044 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5045 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5046 if (
5047 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5048 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5049 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5050 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5051 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5052 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5053 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5054 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5055 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5056 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5057 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5058 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5059 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5060 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5061 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5062 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5063 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5064 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5065 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5066 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5067 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5068 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5069 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5070 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5071 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5072 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5073 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5074 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5075 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5076 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5077 ) /* How's that for a conditional? */
5078 {
5079 match = TRUE;
5080 }
a0ed51b3 5081 }
a0ed51b3
LW
5082 }
5083
a0ed51b3
LW
5084 return (flags & ANYOF_INVERT) ? !match : match;
5085}
161b471a 5086
dfe13c55 5087STATIC U8 *
cea2e8a9 5088S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 5089{
97aff369 5090 dVAR;
5f66b61c 5091 return S_reghop3(s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
1aa99e6b
IH
5092}
5093
5094STATIC U8 *
5f66b61c 5095S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 5096{
97aff369 5097 dVAR;
a0ed51b3 5098 if (off >= 0) {
1aa99e6b 5099 while (off-- && s < lim) {
ffc61ed2 5100 /* XXX could check well-formedness here */
a0ed51b3 5101 s += UTF8SKIP(s);
ffc61ed2 5102 }
a0ed51b3
LW
5103 }
5104 else {
5105 while (off++) {
1aa99e6b 5106 if (s > lim) {
a0ed51b3 5107 s--;
ffc61ed2 5108 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5109 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5110 s--;
ffc61ed2
JH
5111 }
5112 /* XXX could check well-formedness here */
a0ed51b3
LW
5113 }
5114 }
5115 }
5116 return s;
5117}
161b471a 5118
dfe13c55 5119STATIC U8 *
1aa99e6b 5120S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 5121{
97aff369 5122 dVAR;
5f66b61c 5123 return S_reghopmaybe3(s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
1aa99e6b
IH
5124}
5125
5126STATIC U8 *
5f66b61c 5127S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5128{
97aff369 5129 dVAR;
a0ed51b3 5130 if (off >= 0) {
1aa99e6b 5131 while (off-- && s < lim) {
ffc61ed2 5132 /* XXX could check well-formedness here */
a0ed51b3 5133 s += UTF8SKIP(s);
ffc61ed2 5134 }
a0ed51b3
LW
5135 if (off >= 0)
5136 return 0;
5137 }
5138 else {
5139 while (off++) {
1aa99e6b 5140 if (s > lim) {
a0ed51b3 5141 s--;
ffc61ed2 5142 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5143 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5144 s--;
ffc61ed2
JH
5145 }
5146 /* XXX could check well-formedness here */
a0ed51b3
LW
5147 }
5148 else
5149 break;
5150 }
5151 if (off <= 0)
5152 return 0;
5153 }
5154 return s;
5155}
51371543 5156
51371543 5157static void
acfe0abc 5158restore_pos(pTHX_ void *arg)
51371543 5159{
97aff369 5160 dVAR;
9d4ba2ae 5161 PERL_UNUSED_ARG(arg);
51371543
GS
5162 if (PL_reg_eval_set) {
5163 if (PL_reg_oldsaved) {
5164 PL_reg_re->subbeg = PL_reg_oldsaved;
5165 PL_reg_re->sublen = PL_reg_oldsavedlen;
f8c7b90f 5166#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
5167 PL_reg_re->saved_copy = PL_nrs;
5168#endif
51371543
GS
5169 RX_MATCH_COPIED_on(PL_reg_re);
5170 }
5171 PL_reg_magic->mg_len = PL_reg_oldpos;
5172 PL_reg_eval_set = 0;
5173 PL_curpm = PL_reg_oldcurpm;
5174 }
5175}
33b8afdf
JH
5176
5177STATIC void
5178S_to_utf8_substr(pTHX_ register regexp *prog)
5179{
33b8afdf 5180 if (prog->float_substr && !prog->float_utf8) {
9d4ba2ae 5181 SV* sv;
f2b990bf 5182 prog->float_utf8 = sv = newSVsv(prog->float_substr);
33b8afdf
JH
5183 sv_utf8_upgrade(sv);
5184 if (SvTAIL(prog->float_substr))
5185 SvTAIL_on(sv);
5186 if (prog->float_substr == prog->check_substr)
5187 prog->check_utf8 = sv;
5188 }
5189 if (prog->anchored_substr && !prog->anchored_utf8) {
9d4ba2ae 5190 SV* sv;
f2b990bf 5191 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
33b8afdf
JH
5192 sv_utf8_upgrade(sv);
5193 if (SvTAIL(prog->anchored_substr))
5194 SvTAIL_on(sv);
5195 if (prog->anchored_substr == prog->check_substr)
5196 prog->check_utf8 = sv;
5197 }
5198}
5199
5200STATIC void
5201S_to_byte_substr(pTHX_ register regexp *prog)
5202{
97aff369 5203 dVAR;
33b8afdf 5204 if (prog->float_utf8 && !prog->float_substr) {
9d4ba2ae 5205 SV* sv;
f2b990bf 5206 prog->float_substr = sv = newSVsv(prog->float_utf8);
33b8afdf
JH
5207 if (sv_utf8_downgrade(sv, TRUE)) {
5208 if (SvTAIL(prog->float_utf8))
5209 SvTAIL_on(sv);
5210 } else {
5211 SvREFCNT_dec(sv);
5212 prog->float_substr = sv = &PL_sv_undef;
5213 }
5214 if (prog->float_utf8 == prog->check_utf8)
5215 prog->check_substr = sv;
5216 }
5217 if (prog->anchored_utf8 && !prog->anchored_substr) {
9d4ba2ae 5218 SV* sv;
f2b990bf 5219 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
33b8afdf
JH
5220 if (sv_utf8_downgrade(sv, TRUE)) {
5221 if (SvTAIL(prog->anchored_utf8))
5222 SvTAIL_on(sv);
5223 } else {
5224 SvREFCNT_dec(sv);
5225 prog->anchored_substr = sv = &PL_sv_undef;
5226 }
5227 if (prog->anchored_utf8 == prog->check_utf8)
5228 prog->check_substr = sv;
5229 }
5230}
66610fdd
RGS
5231
5232/*
5233 * Local variables:
5234 * c-indentation-style: bsd
5235 * c-basic-offset: 4
5236 * indent-tabs-mode: t
5237 * End:
5238 *
37442d52
RGS
5239 * ex: set ts=8 sts=4 sw=4 noet:
5240 */