This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Continuing threads sync
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e 33#ifdef PERL_EXT_RE_BUILD
54df2634 34#include "re_top.h"
9041c2e3 35#endif
56953603 36
a687059c 37/*
e50aee73 38 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
39 *
40 * Copyright (c) 1986 by University of Toronto.
41 * Written by Henry Spencer. Not derived from licensed software.
42 *
43 * Permission is granted to anyone to use this software for any
44 * purpose on any computer system, and to redistribute it freely,
45 * subject to the following restrictions:
46 *
47 * 1. The author is not responsible for the consequences of use of
48 * this software, no matter how awful, even if they arise
49 * from defects in it.
50 *
51 * 2. The origin of this software must not be misrepresented, either
52 * by explicit claim or by omission.
53 *
54 * 3. Altered versions must be plainly marked as such, and must not
55 * be misrepresented as being the original software.
56 *
57 **** Alterations to Henry's code are...
58 ****
4bb101f2 59 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 60 **** 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 61 ****
9ef589d8
LW
62 **** You may distribute under the terms of either the GNU General Public
63 **** License or the Artistic License, as specified in the README file.
a687059c
LW
64 *
65 * Beware that some of this code is subtly aware of the way operator
66 * precedence is structured in regular expressions. Serious changes in
67 * regular-expression syntax might require a total rethink.
68 */
69#include "EXTERN.h"
864dbfa3 70#define PERL_IN_REGEXEC_C
a687059c 71#include "perl.h"
0f5d15d6 72
54df2634
NC
73#ifdef PERL_IN_XSUB_RE
74# include "re_comp.h"
75#else
76# include "regcomp.h"
77#endif
a687059c 78
c277df42
IZ
79#define RF_tainted 1 /* tainted information used? */
80#define RF_warned 2 /* warned about big count? */
ce862d02 81#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
82#define RF_utf8 8 /* String contains multibyte chars? */
83
eb160463 84#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
85
86#define RS_init 1 /* eval environment created */
87#define RS_set 2 /* replsv value is set */
c277df42 88
a687059c
LW
89#ifndef STATIC
90#define STATIC static
91#endif
92
32fc9b6a 93#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 94
c277df42
IZ
95/*
96 * Forwards.
97 */
98
33b8afdf 99#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 100#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 101
52657f30
AL
102#define HOPc(pos,off) ((char *)(PL_reg_match_utf8 \
103 ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
104 : (U8*)(pos + off)))
105#define HOPBACKc(pos, off) ((char*) \
106 ((PL_reg_match_utf8) \
107 ? reghopmaybe3((U8*)pos, -off, ((U8*)(off < 0 ? PL_regeol : PL_bostr))) \
efb30f32
HS
108 : (pos - off >= PL_bostr) \
109 ? (U8*)(pos - off) \
52657f30 110 : (U8*)NULL) \
efb30f32 111)
efb30f32 112
1aa99e6b 113#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c 114#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b 115#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
1aa99e6b 116
1a4fad37
AL
117#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
118 if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
119#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
120#define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
121#define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
122#define LOAD_UTF8_CHARCLASS_MARK() LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
51371543 123
5f80c4cf 124/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
125#define JUMPABLE(rn) ( \
126 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
127 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
128 OP(rn) == PLUS || OP(rn) == MINMOD || \
129 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
130)
131
cca55fe3
JP
132#define HAS_TEXT(rn) ( \
133 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
134)
e2d8ce26 135
a84d97b6
HS
136/*
137 Search for mandatory following text node; for lookahead, the text must
138 follow but for lookbehind (rn->flags != 0) we skip to the next step.
139*/
cca55fe3 140#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 141 while (JUMPABLE(rn)) \
a84d97b6 142 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 143 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
144 else if (OP(rn) == PLUS) \
145 rn = NEXTOPER(rn); \
a84d97b6
HS
146 else if (OP(rn) == IFMATCH) \
147 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 148 else rn += NEXT_OFF(rn); \
5f80c4cf 149} STMT_END
74750237 150
acfe0abc 151static void restore_pos(pTHX_ void *arg);
51371543 152
76e3520e 153STATIC CHECKPOINT
cea2e8a9 154S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 155{
97aff369 156 dVAR;
a3b680e6 157 const int retval = PL_savestack_ix;
b1ce53c5 158#define REGCP_PAREN_ELEMS 4
a3b680e6 159 const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
160 int p;
161
e49a9654
IH
162 if (paren_elems_to_push < 0)
163 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
164
a01268b5 165#define REGCP_OTHER_ELEMS 6
4b3c1a47 166 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 167 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 168/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
169 SSPUSHINT(PL_regendp[p]);
170 SSPUSHINT(PL_regstartp[p]);
3280af22 171 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
172 SSPUSHINT(p);
173 }
b1ce53c5 174/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
175 SSPUSHINT(PL_regsize);
176 SSPUSHINT(*PL_reglastparen);
a01268b5 177 SSPUSHINT(*PL_reglastcloseparen);
3280af22 178 SSPUSHPTR(PL_reginput);
41123dfd
JH
179#define REGCP_FRAME_ELEMS 2
180/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
181 * are needed for the regexp context stack bookkeeping. */
182 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 183 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 184
a0d0e21e
LW
185 return retval;
186}
187
c277df42 188/* These are needed since we do not localize EVAL nodes: */
a3621e74 189# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 190 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 191 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 192
a3621e74 193# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 194 PerlIO_printf(Perl_debug_log, \
faccc32b 195 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 196 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 197
76e3520e 198STATIC char *
097eb12c 199S_regcppop(pTHX_ const regexp *rex)
a0d0e21e 200{
97aff369 201 dVAR;
b1ce53c5 202 I32 i;
a0d0e21e 203 char *input;
b1ce53c5 204
a3621e74
YO
205 GET_RE_DEBUG_FLAGS_DECL;
206
b1ce53c5 207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
1df70142 219 I32 tmps;
097eb12c 220 U32 paren = (U32)SSPOPINT;
3280af22 221 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
222 PL_regstartp[paren] = SSPOPINT;
223 tmps = SSPOPINT;
3280af22
NIS
224 if (paren <= *PL_reglastparen)
225 PL_regendp[paren] = tmps;
a3621e74 226 DEBUG_EXECUTE_r(
c3464db5 227 PerlIO_printf(Perl_debug_log,
b900a521 228 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 229 (UV)paren, (IV)PL_regstartp[paren],
b900a521 230 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 231 (IV)PL_regendp[paren],
3280af22 232 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 233 );
a0d0e21e 234 }
a3621e74 235 DEBUG_EXECUTE_r(
4f639d21 236 if ((I32)(*PL_reglastparen + 1) <= rex->nparens) {
c3464db5 237 PerlIO_printf(Perl_debug_log,
faccc32b 238 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
4f639d21 239 (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
c277df42
IZ
240 }
241 );
daf18116 242#if 1
dafc8851
JH
243 /* It would seem that the similar code in regtry()
244 * already takes care of this, and in fact it is in
245 * a better location to since this code can #if 0-ed out
246 * but the code in regtry() is needed or otherwise tests
247 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
248 * (as of patchlevel 7877) will fail. Then again,
249 * this code seems to be necessary or otherwise
250 * building DynaLoader will fail:
251 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
252 * --jhi */
097eb12c
AL
253 for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
254 if (i > PL_regsize)
255 PL_regstartp[i] = -1;
256 PL_regendp[i] = -1;
a0d0e21e 257 }
dafc8851 258#endif
a0d0e21e
LW
259 return input;
260}
261
02db2b7b 262#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 263
95b24440 264#define TRYPAREN(paren, n, input, where) { \
29d1e993
HS
265 if (paren) { \
266 if (n) { \
267 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
268 PL_regendp[paren] = input - PL_bostr; \
269 } \
270 else \
271 PL_regendp[paren] = -1; \
272 } \
95b24440
DM
273 REGMATCH(next, where); \
274 if (result) \
29d1e993
HS
275 sayYES; \
276 if (paren && n) \
277 PL_regendp[paren] = -1; \
278}
279
280
a687059c 281/*
e50aee73 282 * pregexec and friends
a687059c
LW
283 */
284
76234dfb 285#ifndef PERL_IN_XSUB_RE
a687059c 286/*
c277df42 287 - pregexec - match a regexp against a string
a687059c 288 */
c277df42 289I32
864dbfa3 290Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 291 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
292/* strend: pointer to null at end of string */
293/* strbeg: real beginning of string */
294/* minend: end of match must be >=minend after stringarg. */
295/* nosave: For optimizations. */
296{
297 return
9041c2e3 298 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
299 nosave ? 0 : REXEC_COPY_STR);
300}
76234dfb 301#endif
22e551b9 302
9041c2e3 303/*
cad2e5aa
JH
304 * Need to implement the following flags for reg_anch:
305 *
306 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
307 * USE_INTUIT_ML
308 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
309 * INTUIT_AUTORITATIVE_ML
310 * INTUIT_ONCE_NOML - Intuit can match in one location only.
311 * INTUIT_ONCE_ML
312 *
313 * Another flag for this function: SECOND_TIME (so that float substrs
314 * with giant delta may be not rechecked).
315 */
316
317/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
318
3f7c398e 319/* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
cad2e5aa
JH
320 Otherwise, only SvCUR(sv) is used to get strbeg. */
321
322/* XXXX We assume that strpos is strbeg unless sv. */
323
6eb5f6b9
JH
324/* XXXX Some places assume that there is a fixed substring.
325 An update may be needed if optimizer marks as "INTUITable"
326 RExen without fixed substrings. Similarly, it is assumed that
327 lengths of all the strings are no more than minlen, thus they
328 cannot come from lookahead.
329 (Or minlen should take into account lookahead.) */
330
2c2d71f5
JH
331/* A failure to find a constant substring means that there is no need to make
332 an expensive call to REx engine, thus we celebrate a failure. Similarly,
333 finding a substring too deep into the string means that less calls to
30944b6d
IZ
334 regtry() should be needed.
335
336 REx compiler's optimizer found 4 possible hints:
337 a) Anchored substring;
338 b) Fixed substring;
339 c) Whether we are anchored (beginning-of-line or \G);
340 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 341 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
342 string which does not contradict any of them.
343 */
2c2d71f5 344
6eb5f6b9
JH
345/* Most of decisions we do here should have been done at compile time.
346 The nodes of the REx which we used for the search should have been
347 deleted from the finite automaton. */
348
cad2e5aa
JH
349char *
350Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
351 char *strend, U32 flags, re_scream_pos_data *data)
352{
97aff369 353 dVAR;
b7953727 354 register I32 start_shift = 0;
cad2e5aa 355 /* Should be nonnegative! */
b7953727 356 register I32 end_shift = 0;
2c2d71f5
JH
357 register char *s;
358 register SV *check;
a1933d95 359 char *strbeg;
cad2e5aa 360 char *t;
a3b680e6 361 const int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 362 I32 ml_anch;
bd61b366
SS
363 register char *other_last = NULL; /* other substr checked before this */
364 char *check_at = NULL; /* check substr found at this pos */
1df70142 365 const I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d 366#ifdef DEBUGGING
890ce7af
AL
367 const char * const i_strpos = strpos;
368 SV * const dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 369#endif
a3621e74
YO
370
371 GET_RE_DEBUG_FLAGS_DECL;
372
a30b2f1f 373 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 374
b8d68ded 375 if (prog->reganch & ROPT_UTF8) {
a3621e74 376 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
377 "UTF-8 regex...\n"));
378 PL_reg_flags |= RF_utf8;
379 }
380
a3621e74 381 DEBUG_EXECUTE_r({
1df70142 382 const char *s = PL_reg_match_utf8 ?
c728cb41
JH
383 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
384 strpos;
1df70142 385 const int len = PL_reg_match_utf8 ?
b8d68ded 386 strlen(s) : strend - strpos;
2a782b5b
JH
387 if (!PL_colorset)
388 reginitcolors();
b8d68ded 389 if (PL_reg_match_utf8)
a3621e74 390 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 391 "UTF-8 target...\n"));
2a782b5b 392 PerlIO_printf(Perl_debug_log,
a0288114 393 "%sGuessing start of match, REx%s \"%s%.60s%s%s\" against \"%s%.*s%s%s\"...\n",
e4584336 394 PL_colors[4], PL_colors[5], PL_colors[0],
2a782b5b
JH
395 prog->precomp,
396 PL_colors[1],
397 (strlen(prog->precomp) > 60 ? "..." : ""),
398 PL_colors[0],
399 (int)(len > 60 ? 60 : len),
400 s, PL_colors[1],
401 (len > 60 ? "..." : "")
402 );
403 });
cad2e5aa 404
c344f387
JH
405 /* CHR_DIST() would be more correct here but it makes things slow. */
406 if (prog->minlen > strend - strpos) {
a3621e74 407 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 408 "String too short... [re_intuit_start]\n"));
cad2e5aa 409 goto fail;
2c2d71f5 410 }
a1933d95 411 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 412 PL_regeol = strend;
33b8afdf
JH
413 if (do_utf8) {
414 if (!prog->check_utf8 && prog->check_substr)
415 to_utf8_substr(prog);
416 check = prog->check_utf8;
417 } else {
418 if (!prog->check_substr && prog->check_utf8)
419 to_byte_substr(prog);
420 check = prog->check_substr;
421 }
422 if (check == &PL_sv_undef) {
a3621e74 423 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
424 "Non-utf string cannot match utf check string\n"));
425 goto fail;
426 }
2c2d71f5 427 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
428 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
429 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 430 && !multiline ) ); /* Check after \n? */
cad2e5aa 431
7e25d62c
JH
432 if (!ml_anch) {
433 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
434 | ROPT_IMPLICIT)) /* not a real BOL */
3f7c398e 435 /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
7e25d62c
JH
436 && sv && !SvROK(sv)
437 && (strpos != strbeg)) {
a3621e74 438 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
439 goto fail;
440 }
441 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 442 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 443 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
444 I32 slen;
445
1aa99e6b 446 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
447 if (SvTAIL(check)) {
448 slen = SvCUR(check); /* >= 1 */
cad2e5aa 449
9041c2e3 450 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 451 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 452 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 453 goto fail_finish;
cad2e5aa
JH
454 }
455 /* Now should match s[0..slen-2] */
456 slen--;
3f7c398e 457 if (slen && (*SvPVX_const(check) != *s
cad2e5aa 458 || (slen > 1
3f7c398e 459 && memNE(SvPVX_const(check), s, slen)))) {
2c2d71f5 460 report_neq:
a3621e74 461 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
462 goto fail_finish;
463 }
cad2e5aa 464 }
3f7c398e 465 else if (*SvPVX_const(check) != *s
653099ff 466 || ((slen = SvCUR(check)) > 1
3f7c398e 467 && memNE(SvPVX_const(check), s, slen)))
2c2d71f5 468 goto report_neq;
c315bfe8 469 check_at = s;
2c2d71f5 470 goto success_at_start;
7e25d62c 471 }
cad2e5aa 472 }
2c2d71f5 473 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 474 s = strpos;
2c2d71f5 475 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 476 end_shift = prog->minlen - start_shift -
653099ff 477 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 478 if (!ml_anch) {
a3b680e6 479 const I32 end = prog->check_offset_max + CHR_SVLEN(check)
653099ff 480 - (SvTAIL(check) != 0);
a3b680e6 481 const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
482
483 if (end_shift < eshift)
484 end_shift = eshift;
485 }
cad2e5aa 486 }
2c2d71f5 487 else { /* Can match at random position */
cad2e5aa
JH
488 ml_anch = 0;
489 s = strpos;
2c2d71f5
JH
490 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
491 /* Should be nonnegative! */
492 end_shift = prog->minlen - start_shift -
653099ff 493 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
494 }
495
2c2d71f5 496#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 497 if (end_shift < 0)
6bbae5e6 498 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
499#endif
500
2c2d71f5
JH
501 restart:
502 /* Find a possible match in the region s..strend by looking for
503 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 504 if (flags & REXEC_SCREAM) {
cad2e5aa 505 I32 p = -1; /* Internal iterator of scream. */
a3b680e6 506 I32 * const pp = data ? data->scream_pos : &p;
cad2e5aa 507
2c2d71f5
JH
508 if (PL_screamfirst[BmRARE(check)] >= 0
509 || ( BmRARE(check) == '\n'
510 && (BmPREVIOUS(check) == SvCUR(check) - 1)
511 && SvTAIL(check) ))
9041c2e3 512 s = screaminstr(sv, check,
2c2d71f5 513 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 514 else
2c2d71f5 515 goto fail_finish;
4addbd3b
HS
516 /* we may be pointing at the wrong string */
517 if (s && RX_MATCH_COPIED(prog))
3f7c398e 518 s = strbeg + (s - SvPVX_const(sv));
cad2e5aa
JH
519 if (data)
520 *data->scream_olds = s;
521 }
f33976b4 522 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
523 s = fbm_instr((U8*)(s + start_shift),
524 (U8*)(strend - end_shift),
7fba1cd6 525 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 526 else
1aa99e6b
IH
527 s = fbm_instr(HOP3(s, start_shift, strend),
528 HOP3(strend, -end_shift, strbeg),
7fba1cd6 529 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
530
531 /* Update the count-of-usability, remove useless subpatterns,
532 unshift s. */
2c2d71f5 533
a0288114 534 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr \"%s%.*s%s\"%s%s",
2c2d71f5 535 (s ? "Found" : "Did not find"),
33b8afdf 536 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 537 PL_colors[0],
7b0972df 538 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
3f7c398e 539 SvPVX_const(check),
2c2d71f5
JH
540 PL_colors[1], (SvTAIL(check) ? "$" : ""),
541 (s ? " at offset " : "...\n") ) );
542
543 if (!s)
544 goto fail_finish;
545
6eb5f6b9
JH
546 check_at = s;
547
2c2d71f5 548 /* Finish the diagnostic message */
a3621e74 549 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
550
551 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
552 Start with the other substr.
553 XXXX no SCREAM optimization yet - and a very coarse implementation
a0288114 554 XXXX /ttx+/ results in anchored="ttx", floating="x". floating will
2c2d71f5
JH
555 *always* match. Probably should be marked during compile...
556 Probably it is right to do no SCREAM here...
557 */
558
33b8afdf 559 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 560 /* Take into account the "other" substring. */
2c2d71f5
JH
561 /* XXXX May be hopelessly wrong for UTF... */
562 if (!other_last)
6eb5f6b9 563 other_last = strpos;
33b8afdf 564 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
565 do_other_anchored:
566 {
890ce7af
AL
567 char * const last = HOP3c(s, -start_shift, strbeg);
568 char *last1, *last2;
2c2d71f5 569 char *s1 = s;
33b8afdf 570 SV* must;
2c2d71f5 571
2c2d71f5
JH
572 t = s - prog->check_offset_max;
573 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 574 && (!do_utf8
1aa99e6b 575 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 576 && t > strpos)))
30944b6d 577 /* EMPTY */;
2c2d71f5
JH
578 else
579 t = strpos;
1aa99e6b 580 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
581 if (t < other_last) /* These positions already checked */
582 t = other_last;
1aa99e6b 583 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
584 if (last < last1)
585 last1 = last;
586 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
587 /* On end-of-str: see comment below. */
33b8afdf
JH
588 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
589 if (must == &PL_sv_undef) {
590 s = (char*)NULL;
a3621e74 591 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
592 }
593 else
594 s = fbm_instr(
595 (unsigned char*)t,
596 HOP3(HOP3(last1, prog->anchored_offset, strend)
597 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
598 must,
7fba1cd6 599 multiline ? FBMrf_MULTILINE : 0
33b8afdf 600 );
a3621e74 601 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a0288114 602 "%s anchored substr \"%s%.*s%s\"%s",
2c2d71f5
JH
603 (s ? "Found" : "Contradicts"),
604 PL_colors[0],
33b8afdf
JH
605 (int)(SvCUR(must)
606 - (SvTAIL(must)!=0)),
3f7c398e 607 SvPVX_const(must),
33b8afdf 608 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
609 if (!s) {
610 if (last1 >= last2) {
a3621e74 611 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
612 ", giving up...\n"));
613 goto fail_finish;
614 }
a3621e74 615 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 616 ", trying floating at offset %ld...\n",
1aa99e6b
IH
617 (long)(HOP3c(s1, 1, strend) - i_strpos)));
618 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
619 s = HOP3c(last, 1, strend);
2c2d71f5
JH
620 goto restart;
621 }
622 else {
a3621e74 623 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 624 (long)(s - i_strpos)));
1aa99e6b
IH
625 t = HOP3c(s, -prog->anchored_offset, strbeg);
626 other_last = HOP3c(s, 1, strend);
30944b6d 627 s = s1;
2c2d71f5
JH
628 if (t == strpos)
629 goto try_at_start;
2c2d71f5
JH
630 goto try_at_offset;
631 }
30944b6d 632 }
2c2d71f5
JH
633 }
634 else { /* Take into account the floating substring. */
33b8afdf
JH
635 char *last, *last1;
636 char *s1 = s;
637 SV* must;
638
639 t = HOP3c(s, -start_shift, strbeg);
640 last1 = last =
641 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
642 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
643 last = HOP3c(t, prog->float_max_offset, strend);
644 s = HOP3c(t, prog->float_min_offset, strend);
645 if (s < other_last)
646 s = other_last;
2c2d71f5 647 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
648 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
649 /* fbm_instr() takes into account exact value of end-of-str
650 if the check is SvTAIL(ed). Since false positives are OK,
651 and end-of-str is not later than strend we are OK. */
652 if (must == &PL_sv_undef) {
653 s = (char*)NULL;
a3621e74 654 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
655 }
656 else
2c2d71f5 657 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
658 (unsigned char*)last + SvCUR(must)
659 - (SvTAIL(must)!=0),
7fba1cd6 660 must, multiline ? FBMrf_MULTILINE : 0);
a0288114 661 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr \"%s%.*s%s\"%s",
33b8afdf
JH
662 (s ? "Found" : "Contradicts"),
663 PL_colors[0],
664 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 665 SvPVX_const(must),
33b8afdf
JH
666 PL_colors[1], (SvTAIL(must) ? "$" : "")));
667 if (!s) {
668 if (last1 == last) {
a3621e74 669 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
670 ", giving up...\n"));
671 goto fail_finish;
2c2d71f5 672 }
a3621e74 673 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
674 ", trying anchored starting at offset %ld...\n",
675 (long)(s1 + 1 - i_strpos)));
676 other_last = last;
677 s = HOP3c(t, 1, strend);
678 goto restart;
679 }
680 else {
a3621e74 681 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
682 (long)(s - i_strpos)));
683 other_last = s; /* Fix this later. --Hugo */
684 s = s1;
685 if (t == strpos)
686 goto try_at_start;
687 goto try_at_offset;
688 }
2c2d71f5 689 }
cad2e5aa 690 }
2c2d71f5
JH
691
692 t = s - prog->check_offset_max;
2c2d71f5 693 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 694 && (!do_utf8
1aa99e6b
IH
695 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
696 && t > strpos))) {
2c2d71f5
JH
697 /* Fixed substring is found far enough so that the match
698 cannot start at strpos. */
699 try_at_offset:
cad2e5aa 700 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
701 /* Eventually fbm_*() should handle this, but often
702 anchored_offset is not 0, so this check will not be wasted. */
703 /* XXXX In the code below we prefer to look for "^" even in
704 presence of anchored substrings. And we search even
705 beyond the found float position. These pessimizations
706 are historical artefacts only. */
707 find_anchor:
2c2d71f5 708 while (t < strend - prog->minlen) {
cad2e5aa 709 if (*t == '\n') {
4ee3650e 710 if (t < check_at - prog->check_offset_min) {
33b8afdf 711 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
712 /* Since we moved from the found position,
713 we definitely contradict the found anchored
30944b6d
IZ
714 substr. Due to the above check we do not
715 contradict "check" substr.
716 Thus we can arrive here only if check substr
717 is float. Redo checking for "other"=="fixed".
718 */
9041c2e3 719 strpos = t + 1;
a3621e74 720 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
e4584336 721 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
30944b6d
IZ
722 goto do_other_anchored;
723 }
4ee3650e
GS
724 /* We don't contradict the found floating substring. */
725 /* XXXX Why not check for STCLASS? */
cad2e5aa 726 s = t + 1;
a3621e74 727 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
e4584336 728 PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
729 goto set_useful;
730 }
4ee3650e
GS
731 /* Position contradicts check-string */
732 /* XXXX probably better to look for check-string
733 than for "\n", so one should lower the limit for t? */
a3621e74 734 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
e4584336 735 PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 736 other_last = strpos = s = t + 1;
cad2e5aa
JH
737 goto restart;
738 }
739 t++;
740 }
a3621e74 741 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
e4584336 742 PL_colors[0], PL_colors[1]));
2c2d71f5 743 goto fail_finish;
cad2e5aa 744 }
f5952150 745 else {
a3621e74 746 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
e4584336 747 PL_colors[0], PL_colors[1]));
f5952150 748 }
cad2e5aa
JH
749 s = t;
750 set_useful:
33b8afdf 751 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
752 }
753 else {
f5952150 754 /* The found string does not prohibit matching at strpos,
2c2d71f5 755 - no optimization of calling REx engine can be performed,
f5952150
GS
756 unless it was an MBOL and we are not after MBOL,
757 or a future STCLASS check will fail this. */
2c2d71f5
JH
758 try_at_start:
759 /* Even in this situation we may use MBOL flag if strpos is offset
760 wrt the start of the string. */
05b4157f 761 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 762 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
763 /* May be due to an implicit anchor of m{.*foo} */
764 && !(prog->reganch & ROPT_IMPLICIT))
765 {
cad2e5aa
JH
766 t = strpos;
767 goto find_anchor;
768 }
a3621e74 769 DEBUG_EXECUTE_r( if (ml_anch)
f5952150 770 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
e4584336 771 (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
30944b6d 772 );
2c2d71f5 773 success_at_start:
30944b6d 774 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
775 && (do_utf8 ? (
776 prog->check_utf8 /* Could be deleted already */
777 && --BmUSEFUL(prog->check_utf8) < 0
778 && (prog->check_utf8 == prog->float_utf8)
779 ) : (
780 prog->check_substr /* Could be deleted already */
781 && --BmUSEFUL(prog->check_substr) < 0
782 && (prog->check_substr == prog->float_substr)
783 )))
66e933ab 784 {
cad2e5aa 785 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 786 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
787 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
788 if (do_utf8 ? prog->check_substr : prog->check_utf8)
789 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
a0714e2c
SS
790 prog->check_substr = prog->check_utf8 = NULL; /* disable */
791 prog->float_substr = prog->float_utf8 = NULL; /* clear */
792 check = NULL; /* abort */
cad2e5aa 793 s = strpos;
3cf5c195
IZ
794 /* XXXX This is a remnant of the old implementation. It
795 looks wasteful, since now INTUIT can use many
6eb5f6b9 796 other heuristics. */
cad2e5aa
JH
797 prog->reganch &= ~RE_USE_INTUIT;
798 }
799 else
800 s = strpos;
801 }
802
6eb5f6b9
JH
803 /* Last resort... */
804 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
805 if (prog->regstclass) {
806 /* minlen == 0 is possible if regstclass is \b or \B,
807 and the fixed substr is ''$.
808 Since minlen is already taken into account, s+1 is before strend;
809 accidentally, minlen >= 1 guaranties no false positives at s + 1
810 even for \b or \B. But (minlen? 1 : 0) below assumes that
811 regstclass does not come from lookahead... */
812 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
813 This leaves EXACTF only, which is dealt with in find_byclass(). */
890ce7af 814 const U8* const str = (U8*)STRING(prog->regstclass);
06b5626a 815 const int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 816 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 817 : 1);
a3b680e6 818 const char * const endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 819 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 820 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
821 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
822 cl_l, strend)
823 : strend);
6eb5f6b9
JH
824
825 t = s;
3b0527fe 826 s = find_byclass(prog, prog->regstclass, s, endpos, NULL);
6eb5f6b9
JH
827 if (!s) {
828#ifdef DEBUGGING
cbbf8932 829 const char *what = NULL;
6eb5f6b9
JH
830#endif
831 if (endpos == strend) {
a3621e74 832 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
833 "Could not match STCLASS...\n") );
834 goto fail;
835 }
a3621e74 836 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 837 "This position contradicts STCLASS...\n") );
653099ff
GS
838 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
839 goto fail;
6eb5f6b9 840 /* Contradict one of substrings */
33b8afdf
JH
841 if (prog->anchored_substr || prog->anchored_utf8) {
842 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 843 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 844 hop_and_restart:
1aa99e6b 845 s = HOP3c(t, 1, strend);
66e933ab
GS
846 if (s + start_shift + end_shift > strend) {
847 /* XXXX Should be taken into account earlier? */
a3621e74 848 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
849 "Could not match STCLASS...\n") );
850 goto fail;
851 }
5e39e1e5
HS
852 if (!check)
853 goto giveup;
a3621e74 854 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 855 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
856 what, (long)(s + start_shift - i_strpos)) );
857 goto restart;
858 }
66e933ab 859 /* Have both, check_string is floating */
6eb5f6b9
JH
860 if (t + start_shift >= check_at) /* Contradicts floating=check */
861 goto retry_floating_check;
862 /* Recheck anchored substring, but not floating... */
9041c2e3 863 s = check_at;
5e39e1e5
HS
864 if (!check)
865 goto giveup;
a3621e74 866 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 867 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
868 (long)(other_last - i_strpos)) );
869 goto do_other_anchored;
870 }
60e71179
GS
871 /* Another way we could have checked stclass at the
872 current position only: */
873 if (ml_anch) {
874 s = t = t + 1;
5e39e1e5
HS
875 if (!check)
876 goto giveup;
a3621e74 877 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 878 "Looking for /%s^%s/m starting at offset %ld...\n",
e4584336 879 PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
60e71179 880 goto try_at_offset;
66e933ab 881 }
33b8afdf 882 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 883 goto fail;
6eb5f6b9
JH
884 /* Check is floating subtring. */
885 retry_floating_check:
886 t = check_at - start_shift;
a3621e74 887 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
888 goto hop_and_restart;
889 }
b7953727 890 if (t != s) {
a3621e74 891 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 892 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
893 (long)(t - i_strpos), (long)(s - i_strpos))
894 );
895 }
896 else {
a3621e74 897 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
898 "Does not contradict STCLASS...\n");
899 );
900 }
6eb5f6b9 901 }
5e39e1e5 902 giveup:
a3621e74 903 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
904 PL_colors[4], (check ? "Guessed" : "Giving up"),
905 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 906 return s;
2c2d71f5
JH
907
908 fail_finish: /* Substring not found */
33b8afdf
JH
909 if (prog->check_substr || prog->check_utf8) /* could be removed already */
910 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 911 fail:
a3621e74 912 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
e4584336 913 PL_colors[4], PL_colors[5]));
bd61b366 914 return NULL;
cad2e5aa 915}
9661b544 916
6eb5f6b9 917/* We know what class REx starts with. Try to find this position... */
3b0527fe
DM
918/* if reginfo is NULL, its a dryrun */
919
3c3eec57 920STATIC char *
3b0527fe
DM
921S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char
922*strend, const regmatch_info *reginfo)
a687059c 923{
27da23d5 924 dVAR;
1df70142 925 const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
6eb5f6b9 926 char *m;
d8093b23 927 STRLEN ln;
5dab1207 928 STRLEN lnc;
078c425b 929 register STRLEN uskip;
d8093b23
G
930 unsigned int c1;
931 unsigned int c2;
6eb5f6b9
JH
932 char *e;
933 register I32 tmp = 1; /* Scratch variable? */
a3b680e6 934 register const bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 935
6eb5f6b9
JH
936 /* We know what class it must start with. */
937 switch (OP(c)) {
6eb5f6b9 938 case ANYOF:
388cc4de 939 if (do_utf8) {
078c425b 940 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
941 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
942 !UTF8_IS_INVARIANT((U8)s[0]) ?
32fc9b6a
DM
943 reginclass(prog, c, (U8*)s, 0, do_utf8) :
944 REGINCLASS(prog, c, (U8*)s)) {
3b0527fe 945 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
946 goto got_it;
947 else
948 tmp = doevery;
949 }
950 else
951 tmp = 1;
078c425b 952 s += uskip;
388cc4de
HS
953 }
954 }
955 else {
956 while (s < strend) {
957 STRLEN skip = 1;
958
32fc9b6a 959 if (REGINCLASS(prog, c, (U8*)s) ||
388cc4de
HS
960 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
961 /* The assignment of 2 is intentional:
962 * for the folded sharp s, the skip is 2. */
963 (skip = SHARP_S_SKIP))) {
3b0527fe 964 if (tmp && (!reginfo || regtry(reginfo, s)))
388cc4de
HS
965 goto got_it;
966 else
967 tmp = doevery;
968 }
969 else
970 tmp = 1;
971 s += skip;
972 }
a0d0e21e 973 }
6eb5f6b9 974 break;
f33976b4
DB
975 case CANY:
976 while (s < strend) {
3b0527fe 977 if (tmp && (!reginfo || regtry(reginfo, s)))
f33976b4
DB
978 goto got_it;
979 else
980 tmp = doevery;
981 s++;
982 }
983 break;
6eb5f6b9 984 case EXACTF:
5dab1207
NIS
985 m = STRING(c);
986 ln = STR_LEN(c); /* length to match in octets/bytes */
987 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 988 if (UTF) {
a2a2844f 989 STRLEN ulen1, ulen2;
5dab1207 990 U8 *sm = (U8 *) m;
89ebb4a3
JH
991 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
992 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4ad0818d 993 const U32 uniflags = UTF8_ALLOW_DEFAULT;
a2a2844f
JH
994
995 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
996 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
997
89ebb4a3 998 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
041457d9 999 0, uniflags);
89ebb4a3 1000 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
041457d9 1001 0, uniflags);
5dab1207
NIS
1002 lnc = 0;
1003 while (sm < ((U8 *) m + ln)) {
1004 lnc++;
1005 sm += UTF8SKIP(sm);
1006 }
1aa99e6b
IH
1007 }
1008 else {
1009 c1 = *(U8*)m;
1010 c2 = PL_fold[c1];
1011 }
6eb5f6b9
JH
1012 goto do_exactf;
1013 case EXACTFL:
5dab1207
NIS
1014 m = STRING(c);
1015 ln = STR_LEN(c);
1016 lnc = (I32) ln;
d8093b23 1017 c1 = *(U8*)m;
6eb5f6b9
JH
1018 c2 = PL_fold_locale[c1];
1019 do_exactf:
db12adc6 1020 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1021
3b0527fe 1022 if (!reginfo && e < s)
6eb5f6b9 1023 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1024
60a8b682
JH
1025 /* The idea in the EXACTF* cases is to first find the
1026 * first character of the EXACTF* node and then, if
1027 * necessary, case-insensitively compare the full
1028 * text of the node. The c1 and c2 are the first
1029 * characters (though in Unicode it gets a bit
1030 * more complicated because there are more cases
7f16dd3d
JH
1031 * than just upper and lower: one needs to use
1032 * the so-called folding case for case-insensitive
1033 * matching (called "loose matching" in Unicode).
1034 * ibcmp_utf8() will do just that. */
60a8b682 1035
1aa99e6b 1036 if (do_utf8) {
575cac57 1037 UV c, f;
89ebb4a3 1038 U8 tmpbuf [UTF8_MAXBYTES+1];
575cac57 1039 STRLEN len, foldlen;
4ad0818d 1040 const U32 uniflags = UTF8_ALLOW_DEFAULT;
09091399 1041 if (c1 == c2) {
5dab1207
NIS
1042 /* Upper and lower of 1st char are equal -
1043 * probably not a "letter". */
1aa99e6b 1044 while (s <= e) {
89ebb4a3 1045 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1046 uniflags);
80aecb99
JH
1047 if ( c == c1
1048 && (ln == len ||
66423254 1049 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1050 m, (char **)0, ln, (bool)UTF))
3b0527fe 1051 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1052 goto got_it;
80aecb99 1053 else {
1df70142 1054 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1055 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1056 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1057 if ( f != c
1058 && (f == c1 || f == c2)
1059 && (ln == foldlen ||
66423254
JH
1060 !ibcmp_utf8((char *) foldbuf,
1061 (char **)0, foldlen, do_utf8,
d07ddd77 1062 m,
eb160463 1063 (char **)0, ln, (bool)UTF))
3b0527fe 1064 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1065 goto got_it;
1066 }
1aa99e6b
IH
1067 s += len;
1068 }
09091399
JH
1069 }
1070 else {
1aa99e6b 1071 while (s <= e) {
89ebb4a3 1072 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
041457d9 1073 uniflags);
80aecb99 1074
60a8b682 1075 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1076 * Note that not all the possible combinations
1077 * are handled here: some of them are handled
1078 * by the standard folding rules, and some of
1079 * them (the character class or ANYOF cases)
1080 * are handled during compiletime in
1081 * regexec.c:S_regclass(). */
880bd946
JH
1082 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1083 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1084 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1085
1086 if ( (c == c1 || c == c2)
1087 && (ln == len ||
66423254 1088 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1089 m, (char **)0, ln, (bool)UTF))
3b0527fe 1090 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b 1091 goto got_it;
80aecb99 1092 else {
1df70142 1093 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
80aecb99 1094 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1095 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1096 if ( f != c
1097 && (f == c1 || f == c2)
1098 && (ln == foldlen ||
a6872d42 1099 !ibcmp_utf8((char *) foldbuf,
66423254 1100 (char **)0, foldlen, do_utf8,
d07ddd77 1101 m,
eb160463 1102 (char **)0, ln, (bool)UTF))
3b0527fe 1103 && (!reginfo || regtry(reginfo, s)) )
80aecb99
JH
1104 goto got_it;
1105 }
1aa99e6b
IH
1106 s += len;
1107 }
09091399 1108 }
1aa99e6b
IH
1109 }
1110 else {
1111 if (c1 == c2)
1112 while (s <= e) {
1113 if ( *(U8*)s == c1
1114 && (ln == 1 || !(OP(c) == EXACTF
1115 ? ibcmp(s, m, ln)
1116 : ibcmp_locale(s, m, ln)))
3b0527fe 1117 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1118 goto got_it;
1119 s++;
1120 }
1121 else
1122 while (s <= e) {
1123 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1124 && (ln == 1 || !(OP(c) == EXACTF
1125 ? ibcmp(s, m, ln)
1126 : ibcmp_locale(s, m, ln)))
3b0527fe 1127 && (!reginfo || regtry(reginfo, s)) )
1aa99e6b
IH
1128 goto got_it;
1129 s++;
1130 }
b3c9acc1
IZ
1131 }
1132 break;
bbce6d69 1133 case BOUNDL:
3280af22 1134 PL_reg_flags |= RF_tainted;
bbce6d69 1135 /* FALL THROUGH */
a0d0e21e 1136 case BOUND:
ffc61ed2 1137 if (do_utf8) {
12d33761 1138 if (s == PL_bostr)
ffc61ed2
JH
1139 tmp = '\n';
1140 else {
6136c704 1141 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1142 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1143 }
1144 tmp = ((OP(c) == BOUND ?
9041c2e3 1145 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1146 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1147 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1148 if (tmp == !(OP(c) == BOUND ?
3568d838 1149 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1150 isALNUM_LC_utf8((U8*)s)))
1151 {
1152 tmp = !tmp;
3b0527fe 1153 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1154 goto got_it;
1155 }
078c425b 1156 s += uskip;
a687059c 1157 }
a0d0e21e 1158 }
667bb95a 1159 else {
12d33761 1160 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1161 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1162 while (s < strend) {
1163 if (tmp ==
1164 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1165 tmp = !tmp;
3b0527fe 1166 if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1167 goto got_it;
1168 }
1169 s++;
a0ed51b3 1170 }
a0ed51b3 1171 }
3b0527fe 1172 if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1173 goto got_it;
1174 break;
bbce6d69 1175 case NBOUNDL:
3280af22 1176 PL_reg_flags |= RF_tainted;
bbce6d69 1177 /* FALL THROUGH */
a0d0e21e 1178 case NBOUND:
ffc61ed2 1179 if (do_utf8) {
12d33761 1180 if (s == PL_bostr)
ffc61ed2
JH
1181 tmp = '\n';
1182 else {
6136c704 1183 U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
4ad0818d 1184 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
ffc61ed2
JH
1185 }
1186 tmp = ((OP(c) == NBOUND ?
9041c2e3 1187 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1a4fad37 1188 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1189 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1190 if (tmp == !(OP(c) == NBOUND ?
3568d838 1191 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1192 isALNUM_LC_utf8((U8*)s)))
1193 tmp = !tmp;
3b0527fe 1194 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2 1195 goto got_it;
078c425b 1196 s += uskip;
ffc61ed2 1197 }
a0d0e21e 1198 }
667bb95a 1199 else {
12d33761 1200 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1201 tmp = ((OP(c) == NBOUND ?
1202 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1203 while (s < strend) {
1204 if (tmp ==
1205 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1206 tmp = !tmp;
3b0527fe 1207 else if ((!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1208 goto got_it;
1209 s++;
1210 }
a0ed51b3 1211 }
3b0527fe 1212 if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, s)))
a0ed51b3
LW
1213 goto got_it;
1214 break;
a0d0e21e 1215 case ALNUM:
ffc61ed2 1216 if (do_utf8) {
1a4fad37 1217 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1218 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1219 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1220 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
bbce6d69 1225 else
ffc61ed2 1226 tmp = 1;
078c425b 1227 s += uskip;
bbce6d69 1228 }
bbce6d69 1229 }
ffc61ed2
JH
1230 else {
1231 while (s < strend) {
1232 if (isALNUM(*s)) {
3b0527fe 1233 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1234 goto got_it;
1235 else
1236 tmp = doevery;
1237 }
a0ed51b3 1238 else
ffc61ed2
JH
1239 tmp = 1;
1240 s++;
a0ed51b3 1241 }
a0ed51b3
LW
1242 }
1243 break;
bbce6d69 1244 case ALNUML:
3280af22 1245 PL_reg_flags |= RF_tainted;
ffc61ed2 1246 if (do_utf8) {
078c425b 1247 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1248 if (isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1249 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1250 goto got_it;
1251 else
1252 tmp = doevery;
1253 }
a687059c 1254 else
ffc61ed2 1255 tmp = 1;
078c425b 1256 s += uskip;
a0d0e21e 1257 }
a0d0e21e 1258 }
ffc61ed2
JH
1259 else {
1260 while (s < strend) {
1261 if (isALNUM_LC(*s)) {
3b0527fe 1262 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1263 goto got_it;
1264 else
1265 tmp = doevery;
1266 }
a0ed51b3 1267 else
ffc61ed2
JH
1268 tmp = 1;
1269 s++;
a0ed51b3 1270 }
a0ed51b3
LW
1271 }
1272 break;
a0d0e21e 1273 case NALNUM:
ffc61ed2 1274 if (do_utf8) {
1a4fad37 1275 LOAD_UTF8_CHARCLASS_ALNUM();
078c425b 1276 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1277 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
3b0527fe 1278 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1279 goto got_it;
1280 else
1281 tmp = doevery;
1282 }
bbce6d69 1283 else
ffc61ed2 1284 tmp = 1;
078c425b 1285 s += uskip;
bbce6d69 1286 }
bbce6d69 1287 }
ffc61ed2
JH
1288 else {
1289 while (s < strend) {
1290 if (!isALNUM(*s)) {
3b0527fe 1291 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1292 goto got_it;
1293 else
1294 tmp = doevery;
1295 }
a0ed51b3 1296 else
ffc61ed2
JH
1297 tmp = 1;
1298 s++;
a0ed51b3 1299 }
a0ed51b3
LW
1300 }
1301 break;
bbce6d69 1302 case NALNUML:
3280af22 1303 PL_reg_flags |= RF_tainted;
ffc61ed2 1304 if (do_utf8) {
078c425b 1305 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1306 if (!isALNUM_LC_utf8((U8*)s)) {
3b0527fe 1307 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1308 goto got_it;
1309 else
1310 tmp = doevery;
1311 }
a687059c 1312 else
ffc61ed2 1313 tmp = 1;
078c425b 1314 s += uskip;
a687059c 1315 }
a0d0e21e 1316 }
ffc61ed2
JH
1317 else {
1318 while (s < strend) {
1319 if (!isALNUM_LC(*s)) {
3b0527fe 1320 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
a0ed51b3 1325 else
ffc61ed2
JH
1326 tmp = 1;
1327 s++;
a0ed51b3 1328 }
a0ed51b3
LW
1329 }
1330 break;
a0d0e21e 1331 case SPACE:
ffc61ed2 1332 if (do_utf8) {
1a4fad37 1333 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1334 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1335 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
3b0527fe 1336 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1337 goto got_it;
1338 else
1339 tmp = doevery;
1340 }
a0d0e21e 1341 else
ffc61ed2 1342 tmp = 1;
078c425b 1343 s += uskip;
2304df62 1344 }
a0d0e21e 1345 }
ffc61ed2
JH
1346 else {
1347 while (s < strend) {
1348 if (isSPACE(*s)) {
3b0527fe 1349 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
a0ed51b3 1354 else
ffc61ed2
JH
1355 tmp = 1;
1356 s++;
a0ed51b3 1357 }
a0ed51b3
LW
1358 }
1359 break;
bbce6d69 1360 case SPACEL:
3280af22 1361 PL_reg_flags |= RF_tainted;
ffc61ed2 1362 if (do_utf8) {
078c425b 1363 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1364 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
3b0527fe 1365 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1366 goto got_it;
1367 else
1368 tmp = doevery;
1369 }
bbce6d69 1370 else
ffc61ed2 1371 tmp = 1;
078c425b 1372 s += uskip;
bbce6d69 1373 }
bbce6d69 1374 }
ffc61ed2
JH
1375 else {
1376 while (s < strend) {
1377 if (isSPACE_LC(*s)) {
3b0527fe 1378 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1379 goto got_it;
1380 else
1381 tmp = doevery;
1382 }
a0ed51b3 1383 else
ffc61ed2
JH
1384 tmp = 1;
1385 s++;
a0ed51b3 1386 }
a0ed51b3
LW
1387 }
1388 break;
a0d0e21e 1389 case NSPACE:
ffc61ed2 1390 if (do_utf8) {
1a4fad37 1391 LOAD_UTF8_CHARCLASS_SPACE();
078c425b 1392 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1393 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
3b0527fe 1394 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1395 goto got_it;
1396 else
1397 tmp = doevery;
1398 }
a0d0e21e 1399 else
ffc61ed2 1400 tmp = 1;
078c425b 1401 s += uskip;
a687059c 1402 }
a0d0e21e 1403 }
ffc61ed2
JH
1404 else {
1405 while (s < strend) {
1406 if (!isSPACE(*s)) {
3b0527fe 1407 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1408 goto got_it;
1409 else
1410 tmp = doevery;
1411 }
a0ed51b3 1412 else
ffc61ed2
JH
1413 tmp = 1;
1414 s++;
a0ed51b3 1415 }
a0ed51b3
LW
1416 }
1417 break;
bbce6d69 1418 case NSPACEL:
3280af22 1419 PL_reg_flags |= RF_tainted;
ffc61ed2 1420 if (do_utf8) {
078c425b 1421 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1422 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
3b0527fe 1423 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1424 goto got_it;
1425 else
1426 tmp = doevery;
1427 }
bbce6d69 1428 else
ffc61ed2 1429 tmp = 1;
078c425b 1430 s += uskip;
bbce6d69 1431 }
bbce6d69 1432 }
ffc61ed2
JH
1433 else {
1434 while (s < strend) {
1435 if (!isSPACE_LC(*s)) {
3b0527fe 1436 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1437 goto got_it;
1438 else
1439 tmp = doevery;
1440 }
a0ed51b3 1441 else
ffc61ed2
JH
1442 tmp = 1;
1443 s++;
a0ed51b3 1444 }
a0ed51b3
LW
1445 }
1446 break;
a0d0e21e 1447 case DIGIT:
ffc61ed2 1448 if (do_utf8) {
1a4fad37 1449 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1450 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1451 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1452 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1453 goto got_it;
1454 else
1455 tmp = doevery;
1456 }
a0d0e21e 1457 else
ffc61ed2 1458 tmp = 1;
078c425b 1459 s += uskip;
2b69d0c2 1460 }
a0d0e21e 1461 }
ffc61ed2
JH
1462 else {
1463 while (s < strend) {
1464 if (isDIGIT(*s)) {
3b0527fe 1465 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1466 goto got_it;
1467 else
1468 tmp = doevery;
1469 }
a0ed51b3 1470 else
ffc61ed2
JH
1471 tmp = 1;
1472 s++;
a0ed51b3 1473 }
a0ed51b3
LW
1474 }
1475 break;
b8c5462f
JH
1476 case DIGITL:
1477 PL_reg_flags |= RF_tainted;
ffc61ed2 1478 if (do_utf8) {
078c425b 1479 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1480 if (isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1481 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1482 goto got_it;
1483 else
1484 tmp = doevery;
1485 }
b8c5462f 1486 else
ffc61ed2 1487 tmp = 1;
078c425b 1488 s += uskip;
b8c5462f 1489 }
b8c5462f 1490 }
ffc61ed2
JH
1491 else {
1492 while (s < strend) {
1493 if (isDIGIT_LC(*s)) {
3b0527fe 1494 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1495 goto got_it;
1496 else
1497 tmp = doevery;
1498 }
b8c5462f 1499 else
ffc61ed2
JH
1500 tmp = 1;
1501 s++;
b8c5462f 1502 }
b8c5462f
JH
1503 }
1504 break;
a0d0e21e 1505 case NDIGIT:
ffc61ed2 1506 if (do_utf8) {
1a4fad37 1507 LOAD_UTF8_CHARCLASS_DIGIT();
078c425b 1508 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1509 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
3b0527fe 1510 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1511 goto got_it;
1512 else
1513 tmp = doevery;
1514 }
a0d0e21e 1515 else
ffc61ed2 1516 tmp = 1;
078c425b 1517 s += uskip;
a687059c 1518 }
a0d0e21e 1519 }
ffc61ed2
JH
1520 else {
1521 while (s < strend) {
1522 if (!isDIGIT(*s)) {
3b0527fe 1523 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1524 goto got_it;
1525 else
1526 tmp = doevery;
1527 }
a0ed51b3 1528 else
ffc61ed2
JH
1529 tmp = 1;
1530 s++;
a0ed51b3 1531 }
a0ed51b3
LW
1532 }
1533 break;
b8c5462f
JH
1534 case NDIGITL:
1535 PL_reg_flags |= RF_tainted;
ffc61ed2 1536 if (do_utf8) {
078c425b 1537 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1538 if (!isDIGIT_LC_utf8((U8*)s)) {
3b0527fe 1539 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1540 goto got_it;
1541 else
1542 tmp = doevery;
1543 }
b8c5462f 1544 else
ffc61ed2 1545 tmp = 1;
078c425b 1546 s += uskip;
b8c5462f 1547 }
a0ed51b3 1548 }
ffc61ed2
JH
1549 else {
1550 while (s < strend) {
1551 if (!isDIGIT_LC(*s)) {
3b0527fe 1552 if (tmp && (!reginfo || regtry(reginfo, s)))
ffc61ed2
JH
1553 goto got_it;
1554 else
1555 tmp = doevery;
1556 }
cf93c79d 1557 else
ffc61ed2
JH
1558 tmp = 1;
1559 s++;
b8c5462f 1560 }
b8c5462f
JH
1561 }
1562 break;
b3c9acc1 1563 default:
3c3eec57
GS
1564 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1565 break;
d6a28714 1566 }
6eb5f6b9
JH
1567 return 0;
1568 got_it:
1569 return s;
1570}
1571
1572/*
1573 - regexec_flags - match a regexp against a string
1574 */
1575I32
1576Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1577 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1578/* strend: pointer to null at end of string */
1579/* strbeg: real beginning of string */
1580/* minend: end of match must be >=minend after stringarg. */
1581/* data: May be used for some additional optimizations. */
1582/* nosave: For optimizations. */
1583{
97aff369 1584 dVAR;
6eb5f6b9
JH
1585 register char *s;
1586 register regnode *c;
1587 register char *startpos = stringarg;
6eb5f6b9
JH
1588 I32 minlen; /* must match at least this many chars */
1589 I32 dontbother = 0; /* how many characters not to try at end */
6eb5f6b9
JH
1590 I32 end_shift = 0; /* Same for the end. */ /* CC */
1591 I32 scream_pos = -1; /* Internal iterator of scream. */
ccac19ea 1592 char *scream_olds = NULL;
6eb5f6b9 1593 SV* oreplsv = GvSV(PL_replgv);
1df70142 1594 const bool do_utf8 = DO_UTF8(sv);
2757e526 1595 I32 multiline;
2a782b5b 1596#ifdef DEBUGGING
2757e526
JH
1597 SV* dsv0;
1598 SV* dsv1;
2a782b5b 1599#endif
3b0527fe 1600 regmatch_info reginfo; /* create some info to pass to regtry etc */
a3621e74
YO
1601
1602 GET_RE_DEBUG_FLAGS_DECL;
1603
9d4ba2ae 1604 PERL_UNUSED_ARG(data);
6eb5f6b9
JH
1605
1606 /* Be paranoid... */
1607 if (prog == NULL || startpos == NULL) {
1608 Perl_croak(aTHX_ "NULL regexp parameter");
1609 return 0;
1610 }
1611
2757e526 1612 multiline = prog->reganch & PMf_MULTILINE;
3b0527fe 1613 reginfo.prog = prog;
2757e526
JH
1614
1615#ifdef DEBUGGING
1616 dsv0 = PERL_DEBUG_PAD_ZERO(0);
1617 dsv1 = PERL_DEBUG_PAD_ZERO(1);
1618#endif
1619
bac06658
JH
1620 RX_MATCH_UTF8_set(prog, do_utf8);
1621
6eb5f6b9 1622 minlen = prog->minlen;
61a36c01 1623 if (strend - startpos < minlen) {
a3621e74 1624 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1625 "String too short [regexec_flags]...\n"));
1626 goto phooey;
1aa99e6b 1627 }
6eb5f6b9 1628
6eb5f6b9
JH
1629 /* Check validity of program. */
1630 if (UCHARAT(prog->program) != REG_MAGIC) {
1631 Perl_croak(aTHX_ "corrupted regexp program");
1632 }
1633
1634 PL_reg_flags = 0;
1635 PL_reg_eval_set = 0;
1636 PL_reg_maxiter = 0;
1637
1638 if (prog->reganch & ROPT_UTF8)
1639 PL_reg_flags |= RF_utf8;
1640
1641 /* Mark beginning of line for ^ and lookbehind. */
3b0527fe 1642 reginfo.bol = startpos; /* XXX not used ??? */
6eb5f6b9 1643 PL_bostr = strbeg;
3b0527fe 1644 reginfo.sv = sv;
6eb5f6b9
JH
1645
1646 /* Mark end of line for $ (and such) */
1647 PL_regeol = strend;
1648
1649 /* see how far we have to get to not match where we matched before */
3b0527fe 1650 reginfo.till = startpos+minend;
6eb5f6b9 1651
6eb5f6b9
JH
1652 /* If there is a "must appear" string, look for it. */
1653 s = startpos;
1654
3b0527fe 1655 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */
6eb5f6b9
JH
1656 MAGIC *mg;
1657
1658 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
3b0527fe 1659 reginfo.ganch = startpos;
6eb5f6b9
JH
1660 else if (sv && SvTYPE(sv) >= SVt_PVMG
1661 && SvMAGIC(sv)
14befaf4
DM
1662 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1663 && mg->mg_len >= 0) {
3b0527fe 1664 reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
6eb5f6b9 1665 if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1666 if (s > reginfo.ganch)
6eb5f6b9 1667 goto phooey;
3b0527fe 1668 s = reginfo.ganch;
6eb5f6b9
JH
1669 }
1670 }
1671 else /* pos() not defined */
3b0527fe 1672 reginfo.ganch = strbeg;
6eb5f6b9
JH
1673 }
1674
a0714e2c 1675 if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
6eb5f6b9
JH
1676 re_scream_pos_data d;
1677
1678 d.scream_olds = &scream_olds;
1679 d.scream_pos = &scream_pos;
1680 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1681 if (!s) {
a3621e74 1682 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1683 goto phooey; /* not present */
3fa9c3d7 1684 }
6eb5f6b9
JH
1685 }
1686
a3621e74 1687 DEBUG_EXECUTE_r({
1df70142
AL
1688 const char * const s0 = UTF
1689 ? pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
1690 UNI_DISPLAY_REGEX)
1691 : prog->precomp;
1692 const int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1693 const char * const s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1694 UNI_DISPLAY_REGEX) : startpos;
1df70142 1695 const int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1696 if (!PL_colorset)
1697 reginitcolors();
1698 PerlIO_printf(Perl_debug_log,
a0288114 1699 "%sMatching REx%s \"%s%*.*s%s%s\" against \"%s%.*s%s%s\"\n",
e4584336 1700 PL_colors[4], PL_colors[5], PL_colors[0],
9e55ce06 1701 len0, len0, s0,
2a782b5b 1702 PL_colors[1],
9e55ce06 1703 len0 > 60 ? "..." : "",
2a782b5b 1704 PL_colors[0],
9e55ce06
JH
1705 (int)(len1 > 60 ? 60 : len1),
1706 s1, PL_colors[1],
1707 (len1 > 60 ? "..." : "")
2a782b5b
JH
1708 );
1709 });
6eb5f6b9
JH
1710
1711 /* Simplest case: anchored match need be tried only once. */
1712 /* [unless only anchor is BOL and multiline is set] */
1713 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
3b0527fe 1714 if (s == startpos && regtry(&reginfo, startpos))
6eb5f6b9 1715 goto got_it;
7fba1cd6 1716 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1717 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1718 {
1719 char *end;
1720
1721 if (minlen)
1722 dontbother = minlen - 1;
1aa99e6b 1723 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1724 /* for multiline we only have to try after newlines */
33b8afdf 1725 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1726 if (s == startpos)
1727 goto after_try;
1728 while (1) {
3b0527fe 1729 if (regtry(&reginfo, s))
6eb5f6b9
JH
1730 goto got_it;
1731 after_try:
1732 if (s >= end)
1733 goto phooey;
1734 if (prog->reganch & RE_USE_INTUIT) {
1735 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1736 if (!s)
1737 goto phooey;
1738 }
1739 else
1740 s++;
1741 }
1742 } else {
1743 if (s > startpos)
1744 s--;
1745 while (s < end) {
1746 if (*s++ == '\n') { /* don't need PL_utf8skip here */
3b0527fe 1747 if (regtry(&reginfo, s))
6eb5f6b9
JH
1748 goto got_it;
1749 }
1750 }
1751 }
1752 }
1753 goto phooey;
1754 } else if (prog->reganch & ROPT_ANCH_GPOS) {
3b0527fe 1755 if (regtry(&reginfo, reginfo.ganch))
6eb5f6b9
JH
1756 goto got_it;
1757 goto phooey;
1758 }
1759
1760 /* Messy cases: unanchored match. */
33b8afdf 1761 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1762 /* we have /x+whatever/ */
1763 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1764 char ch;
bf93d4cc
GS
1765#ifdef DEBUGGING
1766 int did_match = 0;
1767#endif
33b8afdf
JH
1768 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1769 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
3f7c398e 1770 ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1771
1aa99e6b 1772 if (do_utf8) {
6eb5f6b9
JH
1773 while (s < strend) {
1774 if (*s == ch) {
a3621e74 1775 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1776 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1777 s += UTF8SKIP(s);
1778 while (s < strend && *s == ch)
1779 s += UTF8SKIP(s);
1780 }
1781 s += UTF8SKIP(s);
1782 }
1783 }
1784 else {
1785 while (s < strend) {
1786 if (*s == ch) {
a3621e74 1787 DEBUG_EXECUTE_r( did_match = 1 );
3b0527fe 1788 if (regtry(&reginfo, s)) goto got_it;
6eb5f6b9
JH
1789 s++;
1790 while (s < strend && *s == ch)
1791 s++;
1792 }
1793 s++;
1794 }
1795 }
a3621e74 1796 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1797 PerlIO_printf(Perl_debug_log,
b7953727
JH
1798 "Did not find anchored character...\n")
1799 );
6eb5f6b9 1800 }
a0714e2c
SS
1801 else if (prog->anchored_substr != NULL
1802 || prog->anchored_utf8 != NULL
1803 || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
33b8afdf
JH
1804 && prog->float_max_offset < strend - s)) {
1805 SV *must;
1806 I32 back_max;
1807 I32 back_min;
1808 char *last;
6eb5f6b9 1809 char *last1; /* Last position checked before */
bf93d4cc
GS
1810#ifdef DEBUGGING
1811 int did_match = 0;
1812#endif
33b8afdf
JH
1813 if (prog->anchored_substr || prog->anchored_utf8) {
1814 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1815 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1816 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1817 back_max = back_min = prog->anchored_offset;
1818 } else {
1819 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1820 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1821 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1822 back_max = prog->float_max_offset;
1823 back_min = prog->float_min_offset;
1824 }
1825 if (must == &PL_sv_undef)
1826 /* could not downgrade utf8 check substring, so must fail */
1827 goto phooey;
1828
1829 last = HOP3c(strend, /* Cannot start after this */
1830 -(I32)(CHR_SVLEN(must)
1831 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1832
1833 if (s > PL_bostr)
1834 last1 = HOPc(s, -1);
1835 else
1836 last1 = s - 1; /* bogus */
1837
a0288114 1838 /* XXXX check_substr already used to find "s", can optimize if
6eb5f6b9
JH
1839 check_substr==must. */
1840 scream_pos = -1;
1841 dontbother = end_shift;
1842 strend = HOPc(strend, -dontbother);
1843 while ( (s <= last) &&
9041c2e3 1844 ((flags & REXEC_SCREAM)
1aa99e6b 1845 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1846 end_shift, &scream_pos, 0))
1aa99e6b 1847 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1848 (unsigned char*)strend, must,
7fba1cd6 1849 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1850 /* we may be pointing at the wrong string */
1851 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
3f7c398e 1852 s = strbeg + (s - SvPVX_const(sv));
a3621e74 1853 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1854 if (HOPc(s, -back_max) > last1) {
1855 last1 = HOPc(s, -back_min);
1856 s = HOPc(s, -back_max);
1857 }
1858 else {
52657f30 1859 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
6eb5f6b9
JH
1860
1861 last1 = HOPc(s, -back_min);
52657f30 1862 s = t;
6eb5f6b9 1863 }
1aa99e6b 1864 if (do_utf8) {
6eb5f6b9 1865 while (s <= last1) {
3b0527fe 1866 if (regtry(&reginfo, s))
6eb5f6b9
JH
1867 goto got_it;
1868 s += UTF8SKIP(s);
1869 }
1870 }
1871 else {
1872 while (s <= last1) {
3b0527fe 1873 if (regtry(&reginfo, s))
6eb5f6b9
JH
1874 goto got_it;
1875 s++;
1876 }
1877 }
1878 }
a3621e74 1879 DEBUG_EXECUTE_r(if (!did_match)
b7953727 1880 PerlIO_printf(Perl_debug_log,
a0288114 1881 "Did not find %s substr \"%s%.*s%s\"%s...\n",
33b8afdf 1882 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1883 ? "anchored" : "floating"),
1884 PL_colors[0],
1885 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
3f7c398e 1886 SvPVX_const(must),
b7953727
JH
1887 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1888 );
6eb5f6b9
JH
1889 goto phooey;
1890 }
155aba94 1891 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1892 if (minlen) {
1893 I32 op = (U8)OP(prog->regstclass);
66e933ab 1894 /* don't bother with what can't match */
f14c76ed
RGS
1895 if (PL_regkind[op] != EXACT && op != CANY)
1896 strend = HOPc(strend, -(minlen - 1));
1897 }
a3621e74 1898 DEBUG_EXECUTE_r({
ffc61ed2 1899 SV *prop = sv_newmortal();
cfd0369c
NC
1900 const char *s0;
1901 const char *s1;
9e55ce06
JH
1902 int len0;
1903 int len1;
1904
32fc9b6a 1905 regprop(prog, prop, c);
9e55ce06 1906 s0 = UTF ?
3f7c398e 1907 pv_uni_display(dsv0, (U8*)SvPVX_const(prop), SvCUR(prop), 60,
c728cb41 1908 UNI_DISPLAY_REGEX) :
cfd0369c 1909 SvPVX_const(prop);
9e55ce06
JH
1910 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1911 s1 = UTF ?
c728cb41 1912 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1913 len1 = UTF ? SvCUR(dsv1) : strend - s;
1914 PerlIO_printf(Perl_debug_log,
a0288114 1915 "Matching stclass \"%*.*s\" against \"%*.*s\"\n",
9e55ce06
JH
1916 len0, len0, s0,
1917 len1, len1, s1);
ffc61ed2 1918 });
3b0527fe 1919 if (find_byclass(prog, c, s, strend, &reginfo))
6eb5f6b9 1920 goto got_it;
a3621e74 1921 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1922 }
1923 else {
1924 dontbother = 0;
a0714e2c 1925 if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
33b8afdf 1926 /* Trim the end. */
d6a28714 1927 char *last;
33b8afdf
JH
1928 SV* float_real;
1929
1930 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1931 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1932 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1933
1934 if (flags & REXEC_SCREAM) {
33b8afdf 1935 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1936 end_shift, &scream_pos, 1); /* last one */
1937 if (!last)
ffc61ed2 1938 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1939 /* we may be pointing at the wrong string */
1940 else if (RX_MATCH_COPIED(prog))
3f7c398e 1941 s = strbeg + (s - SvPVX_const(sv));
b8c5462f 1942 }
d6a28714
JH
1943 else {
1944 STRLEN len;
cfd0369c 1945 const char * const little = SvPV_const(float_real, len);
d6a28714 1946
33b8afdf 1947 if (SvTAIL(float_real)) {
d6a28714
JH
1948 if (memEQ(strend - len + 1, little, len - 1))
1949 last = strend - len + 1;
7fba1cd6 1950 else if (!multiline)
9041c2e3 1951 last = memEQ(strend - len, little, len)
bd61b366 1952 ? strend - len : NULL;
b8c5462f 1953 else
d6a28714
JH
1954 goto find_last;
1955 } else {
1956 find_last:
9041c2e3 1957 if (len)
d6a28714 1958 last = rninstr(s, strend, little, little + len);
b8c5462f 1959 else
a0288114 1960 last = strend; /* matching "$" */
b8c5462f 1961 }
b8c5462f 1962 }
bf93d4cc 1963 if (last == NULL) {
a3621e74 1964 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc 1965 "%sCan't trim the tail, match fails (should not happen)%s\n",
e4584336 1966 PL_colors[4], PL_colors[5]));
bf93d4cc
GS
1967 goto phooey; /* Should not happen! */
1968 }
d6a28714
JH
1969 dontbother = strend - last + prog->float_min_offset;
1970 }
1971 if (minlen && (dontbother < minlen))
1972 dontbother = minlen - 1;
1973 strend -= dontbother; /* this one's always in bytes! */
1974 /* We don't know much -- general case. */
1aa99e6b 1975 if (do_utf8) {
d6a28714 1976 for (;;) {
3b0527fe 1977 if (regtry(&reginfo, s))
d6a28714
JH
1978 goto got_it;
1979 if (s >= strend)
1980 break;
b8c5462f 1981 s += UTF8SKIP(s);
d6a28714
JH
1982 };
1983 }
1984 else {
1985 do {
3b0527fe 1986 if (regtry(&reginfo, s))
d6a28714
JH
1987 goto got_it;
1988 } while (s++ < strend);
1989 }
1990 }
1991
1992 /* Failure. */
1993 goto phooey;
1994
1995got_it:
1996 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1997
1998 if (PL_reg_eval_set) {
1999 /* Preserve the current value of $^R */
2000 if (oreplsv != GvSV(PL_replgv))
2001 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2002 restored, the value remains
2003 the same. */
4f639d21 2004 restore_pos(aTHX_ prog);
d6a28714
JH
2005 }
2006
2007 /* make sure $`, $&, $', and $digit will work later */
2008 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2009 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2010 if (flags & REXEC_COPY_STR) {
2011 I32 i = PL_regeol - startpos + (stringarg - strbeg);
f8c7b90f 2012#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2013 if ((SvIsCOW(sv)
2014 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2015 if (DEBUG_C_TEST) {
2016 PerlIO_printf(Perl_debug_log,
2017 "Copy on write: regexp capture, type %d\n",
2018 (int) SvTYPE(sv));
2019 }
2020 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
d5263905 2021 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
ed252734
NC
2022 assert (SvPOKp(prog->saved_copy));
2023 } else
2024#endif
2025 {
2026 RX_MATCH_COPIED_on(prog);
2027 s = savepvn(strbeg, i);
2028 prog->subbeg = s;
2029 }
d6a28714 2030 prog->sublen = i;
d6a28714
JH
2031 }
2032 else {
2033 prog->subbeg = strbeg;
2034 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2035 }
2036 }
9041c2e3 2037
d6a28714
JH
2038 return 1;
2039
2040phooey:
a3621e74 2041 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
e4584336 2042 PL_colors[4], PL_colors[5]));
d6a28714 2043 if (PL_reg_eval_set)
4f639d21 2044 restore_pos(aTHX_ prog);
d6a28714
JH
2045 return 0;
2046}
2047
2048/*
2049 - regtry - try match at specific point
2050 */
2051STATIC I32 /* 0 failure, 1 success */
3b0527fe 2052S_regtry(pTHX_ const regmatch_info *reginfo, char *startpos)
d6a28714 2053{
97aff369 2054 dVAR;
d6a28714
JH
2055 register I32 *sp;
2056 register I32 *ep;
2057 CHECKPOINT lastcp;
3b0527fe 2058 regexp *prog = reginfo->prog;
a3621e74 2059 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2060
02db2b7b
IZ
2061#ifdef DEBUGGING
2062 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2063#endif
d6a28714
JH
2064 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2065 MAGIC *mg;
2066
2067 PL_reg_eval_set = RS_init;
a3621e74 2068 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2069 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2070 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2071 ));
e8347627 2072 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2073 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2074 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2075 SAVETMPS;
2076 /* Apparently this is not needed, judging by wantarray. */
e8347627 2077 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2078 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2079
3b0527fe 2080 if (reginfo->sv) {
d6a28714 2081 /* Make $_ available to executed code. */
3b0527fe 2082 if (reginfo->sv != DEFSV) {
59f00321 2083 SAVE_DEFSV;
3b0527fe 2084 DEFSV = reginfo->sv;
b8c5462f 2085 }
d6a28714 2086
3b0527fe
DM
2087 if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2088 && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
d6a28714 2089 /* prepare for quick setting of pos */
d300d9fa
NC
2090#ifdef PERL_OLD_COPY_ON_WRITE
2091 if (SvIsCOW(sv))
2092 sv_force_normal_flags(sv, 0);
2093#endif
3b0527fe 2094 mg = sv_magicext(reginfo->sv, (SV*)0, PERL_MAGIC_regex_global,
d300d9fa 2095 &PL_vtbl_mglob, NULL, 0);
d6a28714 2096 mg->mg_len = -1;
b8c5462f 2097 }
d6a28714
JH
2098 PL_reg_magic = mg;
2099 PL_reg_oldpos = mg->mg_len;
4f639d21 2100 SAVEDESTRUCTOR_X(restore_pos, prog);
d6a28714 2101 }
09687e5a 2102 if (!PL_reg_curpm) {
a02a5408 2103 Newxz(PL_reg_curpm, 1, PMOP);
09687e5a
AB
2104#ifdef USE_ITHREADS
2105 {
2106 SV* repointer = newSViv(0);
577e12cc 2107 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2108 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2109 av_push(PL_regex_padav,repointer);
2110 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2111 PL_regex_pad = AvARRAY(PL_regex_padav);
2112 }
2113#endif
2114 }
aaa362c4 2115 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2116 PL_reg_oldcurpm = PL_curpm;
2117 PL_curpm = PL_reg_curpm;
2118 if (RX_MATCH_COPIED(prog)) {
2119 /* Here is a serious problem: we cannot rewrite subbeg,
2120 since it may be needed if this match fails. Thus
2121 $` inside (?{}) could fail... */
2122 PL_reg_oldsaved = prog->subbeg;
2123 PL_reg_oldsavedlen = prog->sublen;
f8c7b90f 2124#ifdef PERL_OLD_COPY_ON_WRITE
ed252734
NC
2125 PL_nrs = prog->saved_copy;
2126#endif
d6a28714
JH
2127 RX_MATCH_COPIED_off(prog);
2128 }
2129 else
bd61b366 2130 PL_reg_oldsaved = NULL;
d6a28714
JH
2131 prog->subbeg = PL_bostr;
2132 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2133 }
973dddac 2134 prog->startp[0] = startpos - PL_bostr;
d6a28714
JH
2135 PL_reginput = startpos;
2136 PL_regstartp = prog->startp;
2137 PL_regendp = prog->endp;
2138 PL_reglastparen = &prog->lastparen;
a01268b5 2139 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2140 prog->lastparen = 0;
03994de8 2141 prog->lastcloseparen = 0;
d6a28714 2142 PL_regsize = 0;
a3621e74 2143 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2144 if (PL_reg_start_tmpl <= prog->nparens) {
2145 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2146 if(PL_reg_start_tmp)
2147 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2148 else
a02a5408 2149 Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
d6a28714
JH
2150 }
2151
2152 /* XXXX What this code is doing here?!!! There should be no need
2153 to do this again and again, PL_reglastparen should take care of
3dd2943c 2154 this! --ilya*/
dafc8851
JH
2155
2156 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2157 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2158 * PL_reglastparen), is not needed at all by the test suite
2159 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2160 * enough, for building DynaLoader, or otherwise this
2161 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2162 * will happen. Meanwhile, this code *is* needed for the
2163 * above-mentioned test suite tests to succeed. The common theme
2164 * on those tests seems to be returning null fields from matches.
2165 * --jhi */
dafc8851 2166#if 1
d6a28714
JH
2167 sp = prog->startp;
2168 ep = prog->endp;
2169 if (prog->nparens) {
097eb12c 2170 register I32 i;
eb160463 2171 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2172 *++sp = -1;
2173 *++ep = -1;
2174 }
2175 }
dafc8851 2176#endif
02db2b7b 2177 REGCP_SET(lastcp);
3b0527fe 2178 if (regmatch(reginfo, prog->program + 1)) {
d6a28714
JH
2179 prog->endp[0] = PL_reginput - PL_bostr;
2180 return 1;
2181 }
02db2b7b 2182 REGCP_UNWIND(lastcp);
d6a28714
JH
2183 return 0;
2184}
2185
02db2b7b
IZ
2186#define RE_UNWIND_BRANCH 1
2187#define RE_UNWIND_BRANCHJ 2
2188
2189union re_unwind_t;
2190
2191typedef struct { /* XX: makes sense to enlarge it... */
2192 I32 type;
2193 I32 prev;
2194 CHECKPOINT lastcp;
2195} re_unwind_generic_t;
2196
2197typedef struct {
2198 I32 type;
2199 I32 prev;
2200 CHECKPOINT lastcp;
2201 I32 lastparen;
2202 regnode *next;
2203 char *locinput;
2204 I32 nextchr;
3a2830be 2205 int minmod;
02db2b7b
IZ
2206#ifdef DEBUGGING
2207 int regindent;
2208#endif
2209} re_unwind_branch_t;
2210
2211typedef union re_unwind_t {
2212 I32 type;
2213 re_unwind_generic_t generic;
2214 re_unwind_branch_t branch;
2215} re_unwind_t;
2216
8ba1375e
MJD
2217#define sayYES goto yes
2218#define sayNO goto no
e0f9d4a8 2219#define sayNO_ANYOF goto no_anyof
8ba1375e 2220#define sayYES_FINAL goto yes_final
8ba1375e
MJD
2221#define sayNO_FINAL goto no_final
2222#define sayNO_SILENT goto do_no
2223#define saySAME(x) if (x) goto yes; else goto no
2224
3ab3c9b4
HS
2225#define POSCACHE_SUCCESS 0 /* caching success rather than failure */
2226#define POSCACHE_SEEN 1 /* we know what we're caching */
2227#define POSCACHE_START 2 /* the real cache: this bit maps to pos 0 */
7409bbd3 2228
3ab3c9b4 2229#define CACHEsayYES STMT_START { \
d8319b27 2230 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3
DM
2231 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
2232 PL_reg_poscache[0] |= (1<<POSCACHE_SUCCESS) | (1<<POSCACHE_SEEN); \
2233 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2234 } \
2235 else if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS)) { \
2236 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2237 } \
2238 else { \
3ab3c9b4
HS
2239 /* cache records failure, but this is success */ \
2240 DEBUG_r( \
2241 PerlIO_printf(Perl_debug_log, \
2242 "%*s (remove success from failure cache)\n", \
2243 REPORT_CODE_OFF+PL_regindent*2, "") \
2244 ); \
d8319b27 2245 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2246 } \
2247 } \
2248 sayYES; \
2249} STMT_END
7409bbd3 2250
3ab3c9b4 2251#define CACHEsayNO STMT_START { \
d8319b27 2252 if (st->u.whilem.cache_offset | st->u.whilem.cache_bit) { \
7409bbd3 2253 if (!(PL_reg_poscache[0] & (1<<POSCACHE_SEEN))) { \
3ab3c9b4 2254 PL_reg_poscache[0] |= (1<<POSCACHE_SEEN); \
7409bbd3
DM
2255 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2256 } \
2257 else if (!(PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))) { \
2258 PL_reg_poscache[st->u.whilem.cache_offset] |= (1<<st->u.whilem.cache_bit); \
2259 } \
2260 else { \
3ab3c9b4
HS
2261 /* cache records success, but this is failure */ \
2262 DEBUG_r( \
2263 PerlIO_printf(Perl_debug_log, \
2264 "%*s (remove failure from success cache)\n", \
2265 REPORT_CODE_OFF+PL_regindent*2, "") \
2266 ); \
d8319b27 2267 PL_reg_poscache[st->u.whilem.cache_offset] &= ~(1<<st->u.whilem.cache_bit); \
3ab3c9b4
HS
2268 } \
2269 } \
2270 sayNO; \
2271} STMT_END
2272
a3621e74
YO
2273/* this is used to determine how far from the left messages like
2274 'failed...' are printed. Currently 29 makes these messages line
2275 up with the opcode they refer to. Earlier perls used 25 which
2276 left these messages outdented making reviewing a debug output
2277 quite difficult.
2278*/
2279#define REPORT_CODE_OFF 29
2280
2281
2282/* Make sure there is a test for this +1 options in re_tests */
2283#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2284
9e137952
DM
2285/* this value indiciates that the c1/c2 "next char" test should be skipped */
2286#define CHRTEST_VOID -1000
2287
86545054
DM
2288#define SLAB_FIRST(s) (&(s)->states[0])
2289#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2290
5d9a96ca
DM
2291/* grab a new slab and return the first slot in it */
2292
2293STATIC regmatch_state *
2294S_push_slab(pTHX)
2295{
54df2634
NC
2296#if PERL_VERSION < 9
2297 dMY_CXT;
2298#endif
5d9a96ca
DM
2299 regmatch_slab *s = PL_regmatch_slab->next;
2300 if (!s) {
2301 Newx(s, 1, regmatch_slab);
2302 s->prev = PL_regmatch_slab;
2303 s->next = NULL;
2304 PL_regmatch_slab->next = s;
2305 }
2306 PL_regmatch_slab = s;
86545054 2307 return SLAB_FIRST(s);
5d9a96ca 2308}
5b47454d 2309
95b24440
DM
2310/* simulate a recursive call to regmatch */
2311
2312#define REGMATCH(ns, where) \
5d9a96ca
DM
2313 st->scan = scan; \
2314 scan = (ns); \
2315 st->resume_state = resume_##where; \
95b24440
DM
2316 goto start_recurse; \
2317 resume_point_##where:
2318
aa283a38
DM
2319
2320/* push a new regex state. Set newst to point to it */
2321
2322#define PUSH_STATE(newst, resume) \
2323 depth++; \
2324 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "PUSH STATE(%d)\n", depth)); \
2325 st->scan = scan; \
2326 st->next = next; \
2327 st->n = n; \
2328 st->locinput = locinput; \
2329 st->resume_state = resume; \
2330 newst = st+1; \
86545054 2331 if (newst > SLAB_LAST(PL_regmatch_slab)) \
aa283a38
DM
2332 newst = S_push_slab(aTHX); \
2333 PL_regmatch_state = newst; \
2334 newst->cc = 0; \
2335 newst->minmod = 0; \
2336 newst->sw = 0; \
2337 newst->logical = 0; \
2338 newst->unwind = 0; \
2339 locinput = PL_reginput; \
2340 nextchr = UCHARAT(locinput);
2341
2342#define POP_STATE \
2343 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE(%d)\n", depth)); \
2344 depth--; \
2345 st--; \
86545054 2346 if (st < SLAB_FIRST(PL_regmatch_slab)) { \
aa283a38 2347 PL_regmatch_slab = PL_regmatch_slab->prev; \
86545054 2348 st = SLAB_LAST(PL_regmatch_slab); \
aa283a38
DM
2349 } \
2350 PL_regmatch_state = st; \
2351 scan = st->scan; \
2352 next = st->next; \
2353 n = st->n; \
2354 locinput = st->locinput; \
2355 nextchr = UCHARAT(locinput);
2356
d6a28714
JH
2357/*
2358 - regmatch - main matching routine
2359 *
2360 * Conceptually the strategy is simple: check to see whether the current
2361 * node matches, call self recursively to see whether the rest matches,
2362 * and then act accordingly. In practice we make some effort to avoid
2363 * recursion, in particular by going through "ordinary" nodes (that don't
2364 * need to know whether the rest of the match failed) by a loop instead of
2365 * by recursion.
2366 */
2367/* [lwall] I've hoisted the register declarations to the outer block in order to
2368 * maybe save a little bit of pushing and popping on the stack. It also takes
2369 * advantage of machines that use a register save mask on subroutine entry.
95b24440
DM
2370 *
2371 * This function used to be heavily recursive, but since this had the
2372 * effect of blowing the CPU stack on complex regexes, it has been
2373 * restructured to be iterative, and to save state onto the heap rather
2374 * than the stack. Essentially whereever regmatch() used to be called, it
2375 * pushes the current state, notes where to return, then jumps back into
2376 * the main loop.
2377 *
2378 * Originally the structure of this function used to look something like
2379
2380 S_regmatch() {
2381 int a = 1, b = 2;
2382 ...
2383 while (scan != NULL) {
5d9a96ca 2384 a++; // do stuff with a and b
95b24440
DM
2385 ...
2386 switch (OP(scan)) {
2387 case FOO: {
2388 int local = 3;
2389 ...
2390 if (regmatch(...)) // recurse
2391 goto yes;
2392 }
2393 ...
2394 }
2395 }
2396 yes:
2397 return 1;
2398 }
2399
2400 * Now it looks something like this:
2401
5d9a96ca 2402 typedef struct {
95b24440
DM
2403 int a, b, local;
2404 int resume_state;
5d9a96ca 2405 } regmatch_state;
95b24440
DM
2406
2407 S_regmatch() {
5d9a96ca
DM
2408 regmatch_state *st = new();
2409 int depth=0;
2410 st->a++; // do stuff with a and b
95b24440
DM
2411 ...
2412 while (scan != NULL) {
2413 ...
2414 switch (OP(scan)) {
2415 case FOO: {
5d9a96ca 2416 st->local = 3;
95b24440 2417 ...
5d9a96ca
DM
2418 st->scan = scan;
2419 scan = ...;
2420 st->resume_state = resume_FOO;
2421 goto start_recurse; // recurse
95b24440 2422
5d9a96ca
DM
2423 resume_point_FOO:
2424 if (result)
95b24440
DM
2425 goto yes;
2426 }
2427 ...
2428 }
5d9a96ca
DM
2429 start_recurse:
2430 st = new(); push a new state
2431 st->a = 1; st->b = 2;
2432 depth++;
95b24440 2433 }
5d9a96ca 2434 yes:
95b24440 2435 result = 1;
5d9a96ca
DM
2436 if (depth--) {
2437 st = pop();
95b24440
DM
2438 switch (resume_state) {
2439 case resume_FOO:
2440 goto resume_point_FOO;
2441 ...
2442 }
2443 }
2444 return result
2445 }
2446
2447 * WARNING: this means that any line in this function that contains a
2448 * REGMATCH() or TRYPAREN() is actually simulating a recursive call to
2449 * regmatch() using gotos instead. Thus the values of any local variables
2450 * not saved in the regmatch_state structure will have been lost when
2451 * execution resumes on the next line .
5d9a96ca
DM
2452 *
2453 * States (ie the st pointer) are allocated in slabs of about 4K in size.
2454 * PL_regmatch_state always points to the currently active state, and
2455 * PL_regmatch_slab points to the slab currently containing PL_regmatch_state.
2456 * The first time regmatch is called, the first slab is allocated, and is
2457 * never freed until interpreter desctruction. When the slab is full,
2458 * a new one is allocated chained to the end. At exit from regmatch, slabs
2459 * allocated since entry are freed.
d6a28714 2460 */
95b24440
DM
2461
2462
d6a28714 2463STATIC I32 /* 0 failure, 1 success */
3b0527fe 2464S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
d6a28714 2465{
54df2634
NC
2466#if PERL_VERSION < 9
2467 dMY_CXT;
2468#endif
27da23d5 2469 dVAR;
95b24440 2470 register const bool do_utf8 = PL_reg_match_utf8;
4ad0818d 2471 const U32 uniflags = UTF8_ALLOW_DEFAULT;
95b24440 2472
3b0527fe
DM
2473 regexp *rex = reginfo->prog;
2474
5d9a96ca
DM
2475 regmatch_slab *orig_slab;
2476 regmatch_state *orig_state;
a3621e74 2477
5d9a96ca
DM
2478 /* the current state. This is a cached copy of PL_regmatch_state */
2479 register regmatch_state *st;
95b24440 2480
5d9a96ca
DM
2481 /* cache heavy used fields of st in registers */
2482 register regnode *scan;
2483 register regnode *next;
2484 register I32 n = 0; /* initialize to shut up compiler warning */
2485 register char *locinput = PL_reginput;
95b24440 2486
5d9a96ca
DM
2487 /* these variables are NOT saved during a recusive RFEGMATCH: */
2488 register I32 nextchr; /* is always set to UCHARAT(locinput) */
2489 bool result; /* return value of S_regmatch */
2490 regnode *inner; /* Next node in internal branch. */
2491 int depth = 0; /* depth of recursion */
aa283a38 2492 regmatch_state *newst; /* when pushing a state, this is the new one */
77cb431f
DM
2493 regmatch_state *yes_state = NULL; /* state to pop to on success of
2494 subpattern */
95b24440
DM
2495
2496#ifdef DEBUGGING
ab74612d 2497 SV *re_debug_flags = NULL;
a3621e74 2498 GET_RE_DEBUG_FLAGS;
d6a28714
JH
2499 PL_regindent++;
2500#endif
2501
5d9a96ca
DM
2502 /* on first ever call to regmatch, allocate first slab */
2503 if (!PL_regmatch_slab) {
2504 Newx(PL_regmatch_slab, 1, regmatch_slab);
2505 PL_regmatch_slab->prev = NULL;
2506 PL_regmatch_slab->next = NULL;
86545054 2507 PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
5d9a96ca
DM
2508 }
2509
2510 /* remember current high-water mark for exit */
2511 /* XXX this should be done with SAVE* instead */
2512 orig_slab = PL_regmatch_slab;
2513 orig_state = PL_regmatch_state;
2514
2515 /* grab next free state slot */
2516 st = ++PL_regmatch_state;
86545054 2517 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
2518 st = PL_regmatch_state = S_push_slab(aTHX);
2519
2520 st->minmod = 0;
2521 st->sw = 0;
2522 st->logical = 0;
2523 st->unwind = 0;
2524 st->cc = NULL;
d6a28714
JH
2525 /* Note that nextchr is a byte even in UTF */
2526 nextchr = UCHARAT(locinput);
2527 scan = prog;
2528 while (scan != NULL) {
8ba1375e 2529
a3621e74 2530 DEBUG_EXECUTE_r( {
6136c704 2531 SV * const prop = sv_newmortal();
1df70142
AL
2532 const int docolor = *PL_colors[0];
2533 const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2534 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2535 /* The part of the string before starttry has one color
2536 (pref0_len chars), between starttry and current
2537 position another one (pref_len - pref0_len chars),
2538 after the current position the third one.
2539 We assume that pref0_len <= pref_len, otherwise we
2540 decrease pref0_len. */
9041c2e3 2541 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2542 ? (5 + taill) - l : locinput - PL_bostr;
2543 int pref0_len;
d6a28714 2544
df1ffd02 2545 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2546 pref_len++;
2547 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2548 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2549 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2550 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2551 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2552 l--;
d6a28714
JH
2553 if (pref0_len < 0)
2554 pref0_len = 0;
2555 if (pref0_len > pref_len)
2556 pref0_len = pref_len;
32fc9b6a 2557 regprop(rex, prop, scan);
2a782b5b 2558 {
1df70142 2559 const char * const s0 =
f14c76ed 2560 do_utf8 && OP(scan) != CANY ?
95b24440 2561 pv_uni_display(PERL_DEBUG_PAD(0), (U8*)(locinput - pref_len),
c728cb41 2562 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2563 locinput - pref_len;
1df70142
AL
2564 const int len0 = do_utf8 ? strlen(s0) : pref0_len;
2565 const char * const s1 = do_utf8 && OP(scan) != CANY ?
95b24440
DM
2566 pv_uni_display(PERL_DEBUG_PAD(1),
2567 (U8*)(locinput - pref_len + pref0_len),
c728cb41 2568 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2569 locinput - pref_len + pref0_len;
1df70142
AL
2570 const int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2571 const char * const s2 = do_utf8 && OP(scan) != CANY ?
95b24440 2572 pv_uni_display(PERL_DEBUG_PAD(2), (U8*)locinput,
c728cb41 2573 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2574 locinput;
1df70142 2575 const int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2576 PerlIO_printf(Perl_debug_log,
2577 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2578 (IV)(locinput - PL_bostr),
2579 PL_colors[4],
2580 len0, s0,
2581 PL_colors[5],
2582 PL_colors[2],
2583 len1, s1,
2584 PL_colors[3],
2585 (docolor ? "" : "> <"),
2586 PL_colors[0],
2587 len2, s2,
2588 PL_colors[1],
2589 15 - l - pref_len + 1,
2590 "",
4f639d21 2591 (IV)(scan - rex->program), PL_regindent*2, "",
3f7c398e 2592 SvPVX_const(prop));
2a782b5b
JH
2593 }
2594 });
d6a28714
JH
2595
2596 next = scan + NEXT_OFF(scan);
2597 if (next == scan)
2598 next = NULL;
2599
2600 switch (OP(scan)) {
2601 case BOL:
7fba1cd6 2602 if (locinput == PL_bostr)
d6a28714 2603 {
3b0527fe 2604 /* reginfo->till = reginfo->bol; */
b8c5462f
JH
2605 break;
2606 }
d6a28714
JH
2607 sayNO;
2608 case MBOL:
12d33761
HS
2609 if (locinput == PL_bostr ||
2610 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2611 {
b8c5462f
JH
2612 break;
2613 }
d6a28714
JH
2614 sayNO;
2615 case SBOL:
c2a73568 2616 if (locinput == PL_bostr)
b8c5462f 2617 break;
d6a28714
JH
2618 sayNO;
2619 case GPOS:
3b0527fe 2620 if (locinput == reginfo->ganch)
d6a28714
JH
2621 break;
2622 sayNO;
2623 case EOL:
d6a28714
JH
2624 goto seol;
2625 case MEOL:
d6a28714 2626 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2627 sayNO;
b8c5462f 2628 break;
d6a28714
JH
2629 case SEOL:
2630 seol:
2631 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2632 sayNO;
d6a28714 2633 if (PL_regeol - locinput > 1)
b8c5462f 2634 sayNO;
b8c5462f 2635 break;
d6a28714
JH
2636 case EOS:
2637 if (PL_regeol != locinput)
b8c5462f 2638 sayNO;
d6a28714 2639 break;
ffc61ed2 2640 case SANY:
d6a28714 2641 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2642 sayNO;
f33976b4
DB
2643 if (do_utf8) {
2644 locinput += PL_utf8skip[nextchr];
2645 if (locinput > PL_regeol)
2646 sayNO;
2647 nextchr = UCHARAT(locinput);
2648 }
2649 else
2650 nextchr = UCHARAT(++locinput);
2651 break;
2652 case CANY:
2653 if (!nextchr && locinput >= PL_regeol)
2654 sayNO;
b8c5462f 2655 nextchr = UCHARAT(++locinput);
a0d0e21e 2656 break;
ffc61ed2 2657 case REG_ANY:
1aa99e6b
IH
2658 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2659 sayNO;
2660 if (do_utf8) {
b8c5462f 2661 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2662 if (locinput > PL_regeol)
2663 sayNO;
a0ed51b3 2664 nextchr = UCHARAT(locinput);
a0ed51b3 2665 }
1aa99e6b
IH
2666 else
2667 nextchr = UCHARAT(++locinput);
a0ed51b3 2668 break;
a3621e74
YO
2669
2670
2671
2672 /*
2673 traverse the TRIE keeping track of all accepting states
2674 we transition through until we get to a failing node.
2675
a3621e74
YO
2676
2677 */
5b47454d 2678 case TRIE:
a3621e74
YO
2679 case TRIEF:
2680 case TRIEFL:
2681 {
a3621e74
YO
2682 U8 *uc = ( U8* )locinput;
2683 U32 state = 1;
2684 U16 charid = 0;
2685 U32 base = 0;
2686 UV uvc = 0;
2687 STRLEN len = 0;
2688 STRLEN foldlen = 0;
a3621e74
YO
2689 U8 *uscan = (U8*)NULL;
2690 STRLEN bufflen=0;
95b24440 2691 SV *sv_accept_buff = NULL;
5b47454d
DM
2692 const enum { trie_plain, trie_utf8, trie_uft8_fold }
2693 trie_type = do_utf8 ?
2694 (OP(scan) == TRIE ? trie_utf8 : trie_uft8_fold)
2695 : trie_plain;
2696
7087a21c
NC
2697 /* what trie are we using right now */
2698 reg_trie_data *trie
32fc9b6a 2699 = (reg_trie_data*)rex->data->data[ ARG( scan ) ];
d8319b27 2700 st->u.trie.accepted = 0; /* how many accepting states we have seen */
95b24440 2701 result = 0;
a3621e74
YO
2702
2703 while ( state && uc <= (U8*)PL_regeol ) {
2704
5b47454d 2705 if (trie->states[ state ].wordnum) {
d8319b27 2706 if (!st->u.trie.accepted ) {
5b47454d
DM
2707 ENTER;
2708 SAVETMPS;
2709 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
2710 sv_accept_buff=newSV(bufflen *
2711 sizeof(reg_trie_accepted) - 1);
2712 SvCUR_set(sv_accept_buff,
2713 sizeof(reg_trie_accepted));
2714 SvPOK_on(sv_accept_buff);
2715 sv_2mortal(sv_accept_buff);
d8319b27 2716 st->u.trie.accept_buff =
5b47454d
DM
2717 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
2718 }
2719 else {
d8319b27 2720 if (st->u.trie.accepted >= bufflen) {
5b47454d 2721 bufflen *= 2;
d8319b27 2722 st->u.trie.accept_buff =(reg_trie_accepted*)
5b47454d
DM
2723 SvGROW(sv_accept_buff,
2724 bufflen * sizeof(reg_trie_accepted));
2725 }
2726 SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
2727 + sizeof(reg_trie_accepted));
2728 }
d8319b27
DM
2729 st->u.trie.accept_buff[st->u.trie.accepted].wordnum = trie->states[state].wordnum;
2730 st->u.trie.accept_buff[st->u.trie.accepted].endpos = uc;
2731 ++st->u.trie.accepted;
5b47454d 2732 }
a3621e74
YO
2733
2734 base = trie->states[ state ].trans.base;
2735
2736 DEBUG_TRIE_EXECUTE_r(
2737 PerlIO_printf( Perl_debug_log,
e4584336 2738 "%*s %sState: %4"UVxf", Base: %4"UVxf", Accepted: %4"UVxf" ",
a3621e74 2739 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27 2740 (UV)state, (UV)base, (UV)st->u.trie.accepted );
a3621e74
YO
2741 );
2742
2743 if ( base ) {
5b47454d
DM
2744 switch (trie_type) {
2745 case trie_uft8_fold:
a3621e74
YO
2746 if ( foldlen>0 ) {
2747 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2748 foldlen -= len;
2749 uscan += len;
2750 len=0;
2751 } else {
1df70142 2752 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
a3621e74
YO
2753 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2754 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2755 foldlen -= UNISKIP( uvc );
2756 uscan = foldbuf + UNISKIP( uvc );
2757 }
5b47454d
DM
2758 break;
2759 case trie_utf8:
2760 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN,
2761 &len, uniflags );
2762 break;
2763 case trie_plain:
e4584336 2764 uvc = (UV)*uc;
a3621e74
YO
2765 len = 1;
2766 }
2767
5b47454d
DM
2768 if (uvc < 256) {
2769 charid = trie->charmap[ uvc ];
2770 }
2771 else {
2772 charid = 0;
2773 if (trie->widecharmap) {
2774 SV** svpp = (SV**)NULL;
2775 svpp = hv_fetch(trie->widecharmap,
2776 (char*)&uvc, sizeof(UV), 0);
2777 if (svpp)
2778 charid = (U16)SvIV(*svpp);
2779 }
2780 }
a3621e74 2781
5b47454d
DM
2782 if (charid &&
2783 (base + charid > trie->uniquecharcount )
2784 && (base + charid - 1 - trie->uniquecharcount
2785 < trie->lasttrans)
2786 && trie->trans[base + charid - 1 -
2787 trie->uniquecharcount].check == state)
2788 {
2789 state = trie->trans[base + charid - 1 -
2790 trie->uniquecharcount ].next;
2791 }
2792 else {
2793 state = 0;
2794 }
2795 uc += len;
2796
2797 }
2798 else {
a3621e74
YO
2799 state = 0;
2800 }
2801 DEBUG_TRIE_EXECUTE_r(
e4584336
RB
2802 PerlIO_printf( Perl_debug_log,
2803 "Charid:%3x CV:%4"UVxf" After State: %4"UVxf"%s\n",
2804 charid, uvc, (UV)state, PL_colors[5] );
a3621e74
YO
2805 );
2806 }
d8319b27 2807 if (!st->u.trie.accepted )
a3621e74 2808 sayNO;
a3621e74
YO
2809
2810 /*
2811 There was at least one accepting state that we
2812 transitioned through. Presumably the number of accepting
2813 states is going to be low, typically one or two. So we
2814 simply scan through to find the one with lowest wordnum.
2815 Once we find it, we swap the last state into its place
2816 and decrement the size. We then try to match the rest of
2817 the pattern at the point where the word ends, if we
2818 succeed then we end the loop, otherwise the loop
2819 eventually terminates once all of the accepting states
2820 have been tried.
2821 */
a3621e74 2822
d8319b27 2823 if ( st->u.trie.accepted == 1 ) {
a3621e74 2824 DEBUG_EXECUTE_r({
097eb12c 2825 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ 0 ].wordnum-1, 0 );
a3621e74
YO
2826 PerlIO_printf( Perl_debug_log,
2827 "%*s %sonly one match : #%d <%s>%s\n",
e4584336 2828 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2829 st->u.trie.accept_buff[ 0 ].wordnum,
cfd0369c 2830 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",
a3621e74
YO
2831 PL_colors[5] );
2832 });
d8319b27 2833 PL_reginput = (char *)st->u.trie.accept_buff[ 0 ].endpos;
a3621e74
YO
2834 /* in this case we free tmps/leave before we call regmatch
2835 as we wont be using accept_buff again. */
2836 FREETMPS;
2837 LEAVE;
95b24440
DM
2838 REGMATCH(scan + NEXT_OFF(scan), TRIE1);
2839 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2840 } else {
2841 DEBUG_EXECUTE_r(
e4584336 2842 PerlIO_printf( Perl_debug_log,"%*s %sgot %"IVdf" possible matches%s\n",
d8319b27 2843 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4], (IV)st->u.trie.accepted,
a3621e74
YO
2844 PL_colors[5] );
2845 );
d8319b27 2846 while ( !result && st->u.trie.accepted-- ) {
a3621e74
YO
2847 U32 best = 0;
2848 U32 cur;
d8319b27 2849 for( cur = 1 ; cur <= st->u.trie.accepted ; cur++ ) {
e4584336
RB
2850 DEBUG_TRIE_EXECUTE_r(
2851 PerlIO_printf( Perl_debug_log,
2852 "%*s %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
2853 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
d8319b27
DM
2854 (IV)best, st->u.trie.accept_buff[ best ].wordnum, (IV)cur,
2855 st->u.trie.accept_buff[ cur ].wordnum, PL_colors[5] );
e4584336 2856 );
a3621e74 2857
d8319b27
DM
2858 if (st->u.trie.accept_buff[cur].wordnum <
2859 st->u.trie.accept_buff[best].wordnum)
e822a8b4 2860 best = cur;
a3621e74
YO
2861 }
2862 DEBUG_EXECUTE_r({
87830502 2863 reg_trie_data * const trie = (reg_trie_data*)
32fc9b6a 2864 rex->data->data[ARG(scan)];
d8319b27 2865 SV ** const tmp = av_fetch( trie->words, st->u.trie.accept_buff[ best ].wordnum - 1, 0 );
a3621e74 2866 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
e4584336 2867 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],
d8319b27 2868 st->u.trie.accept_buff[best].wordnum,
cfd0369c 2869 tmp ? SvPV_nolen_const( *tmp ) : "not compiled under -Dr",scan,
a3621e74
YO
2870 PL_colors[5] );
2871 });
d8319b27
DM
2872 if ( best<st->u.trie.accepted ) {
2873 reg_trie_accepted tmp = st->u.trie.accept_buff[ best ];
2874 st->u.trie.accept_buff[ best ] = st->u.trie.accept_buff[ st->u.trie.accepted ];
2875 st->u.trie.accept_buff[ st->u.trie.accepted ] = tmp;
2876 best = st->u.trie.accepted;
a3621e74 2877 }
d8319b27 2878 PL_reginput = (char *)st->u.trie.accept_buff[ best ].endpos;
a3621e74
YO
2879
2880 /*
2881 as far as I can tell we only need the SAVETMPS/FREETMPS
2882 for re's with EVAL in them but I'm leaving them in for
2883 all until I can be sure.
2884 */
2885 SAVETMPS;
95b24440
DM
2886 REGMATCH(scan + NEXT_OFF(scan), TRIE2);
2887 /*** all unsaved local vars undefined at this point */
a3621e74
YO
2888 FREETMPS;
2889 }
2890 FREETMPS;
2891 LEAVE;
2892 }
2893
95b24440 2894 if (result) {
a3621e74
YO
2895 sayYES;
2896 } else {
2897 sayNO;
2898 }
2899 }
2900 /* unreached codepoint */
95b24440
DM
2901 case EXACT: {
2902 char *s = STRING(scan);
5d9a96ca 2903 st->ln = STR_LEN(scan);
eb160463 2904 if (do_utf8 != UTF) {
bc517b45 2905 /* The target and the pattern have differing utf8ness. */
1aa99e6b 2906 char *l = locinput;
5d9a96ca 2907 const char *e = s + st->ln;
a72c7584 2908
5ff6fc6d
JH
2909 if (do_utf8) {
2910 /* The target is utf8, the pattern is not utf8. */
1aa99e6b 2911 while (s < e) {
a3b680e6 2912 STRLEN ulen;
1aa99e6b 2913 if (l >= PL_regeol)
5ff6fc6d
JH
2914 sayNO;
2915 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2916 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
041457d9 2917 uniflags))
5ff6fc6d 2918 sayNO;
bc517b45 2919 l += ulen;
5ff6fc6d 2920 s ++;
1aa99e6b 2921 }
5ff6fc6d
JH
2922 }
2923 else {
2924 /* The target is not utf8, the pattern is utf8. */
1aa99e6b 2925 while (s < e) {
a3b680e6 2926 STRLEN ulen;
1aa99e6b
IH
2927 if (l >= PL_regeol)
2928 sayNO;
5ff6fc6d 2929 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2930 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
041457d9 2931 uniflags))
1aa99e6b 2932 sayNO;
bc517b45 2933 s += ulen;
a72c7584 2934 l ++;
1aa99e6b 2935 }
5ff6fc6d 2936 }
1aa99e6b
IH
2937 locinput = l;
2938 nextchr = UCHARAT(locinput);
2939 break;
2940 }
bc517b45 2941 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2942 /* Inline the first character, for speed. */
2943 if (UCHARAT(s) != nextchr)
2944 sayNO;
5d9a96ca 2945 if (PL_regeol - locinput < st->ln)
d6a28714 2946 sayNO;
5d9a96ca 2947 if (st->ln > 1 && memNE(s, locinput, st->ln))
d6a28714 2948 sayNO;
5d9a96ca 2949 locinput += st->ln;
d6a28714
JH
2950 nextchr = UCHARAT(locinput);
2951 break;
95b24440 2952 }
d6a28714 2953 case EXACTFL:
b8c5462f
JH
2954 PL_reg_flags |= RF_tainted;
2955 /* FALL THROUGH */
95b24440
DM
2956 case EXACTF: {
2957 char *s = STRING(scan);
5d9a96ca 2958 st->ln = STR_LEN(scan);
d6a28714 2959
d07ddd77
JH
2960 if (do_utf8 || UTF) {
2961 /* Either target or the pattern are utf8. */
d6a28714 2962 char *l = locinput;
d07ddd77 2963 char *e = PL_regeol;
bc517b45 2964
5d9a96ca 2965 if (ibcmp_utf8(s, 0, st->ln, (bool)UTF,
1feea2c7 2966 l, &e, 0, do_utf8)) {
5486206c
JH
2967 /* One more case for the sharp s:
2968 * pack("U0U*", 0xDF) =~ /ss/i,
2969 * the 0xC3 0x9F are the UTF-8
2970 * byte sequence for the U+00DF. */
2971 if (!(do_utf8 &&
2972 toLOWER(s[0]) == 's' &&
5d9a96ca 2973 st->ln >= 2 &&
5486206c
JH
2974 toLOWER(s[1]) == 's' &&
2975 (U8)l[0] == 0xC3 &&
2976 e - l >= 2 &&
2977 (U8)l[1] == 0x9F))
2978 sayNO;
2979 }
d07ddd77
JH
2980 locinput = e;
2981 nextchr = UCHARAT(locinput);
2982 break;
a0ed51b3 2983 }
d6a28714 2984
bc517b45
JH
2985 /* Neither the target and the pattern are utf8. */
2986
d6a28714
JH
2987 /* Inline the first character, for speed. */
2988 if (UCHARAT(s) != nextchr &&
2989 UCHARAT(s) != ((OP(scan) == EXACTF)
2990 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2991 sayNO;
5d9a96ca 2992 if (PL_regeol - locinput < st->ln)
b8c5462f 2993 sayNO;
5d9a96ca
DM
2994 if (st->ln > 1 && (OP(scan) == EXACTF
2995 ? ibcmp(s, locinput, st->ln)
2996 : ibcmp_locale(s, locinput, st->ln)))
4633a7c4 2997 sayNO;
5d9a96ca 2998 locinput += st->ln;
d6a28714 2999 nextchr = UCHARAT(locinput);
a0d0e21e 3000 break;
95b24440 3001 }
d6a28714 3002 case ANYOF:
ffc61ed2 3003 if (do_utf8) {
9e55ce06
JH
3004 STRLEN inclasslen = PL_regeol - locinput;
3005
32fc9b6a 3006 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 3007 sayNO_ANYOF;
ffc61ed2
JH
3008 if (locinput >= PL_regeol)
3009 sayNO;
0f0076b4 3010 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 3011 nextchr = UCHARAT(locinput);
e0f9d4a8 3012 break;
ffc61ed2
JH
3013 }
3014 else {
3015 if (nextchr < 0)
3016 nextchr = UCHARAT(locinput);
32fc9b6a 3017 if (!REGINCLASS(rex, scan, (U8*)locinput))
e0f9d4a8 3018 sayNO_ANYOF;
ffc61ed2
JH
3019 if (!nextchr && locinput >= PL_regeol)
3020 sayNO;
3021 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
3022 break;
3023 }
3024 no_anyof:
3025 /* If we might have the case of the German sharp s
3026 * in a casefolding Unicode character class. */
3027
ebc501f0
JH
3028 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3029 locinput += SHARP_S_SKIP;
e0f9d4a8 3030 nextchr = UCHARAT(locinput);
ffc61ed2 3031 }
e0f9d4a8
JH
3032 else
3033 sayNO;
b8c5462f 3034 break;
d6a28714 3035 case ALNUML:
b8c5462f
JH
3036 PL_reg_flags |= RF_tainted;
3037 /* FALL THROUGH */
d6a28714 3038 case ALNUM:
b8c5462f 3039 if (!nextchr)
4633a7c4 3040 sayNO;
ffc61ed2 3041 if (do_utf8) {
1a4fad37 3042 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3043 if (!(OP(scan) == ALNUM
3568d838 3044 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 3045 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
3046 {
3047 sayNO;
a0ed51b3 3048 }
b8c5462f 3049 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3050 nextchr = UCHARAT(locinput);
3051 break;
3052 }
ffc61ed2 3053 if (!(OP(scan) == ALNUM
d6a28714 3054 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 3055 sayNO;
b8c5462f 3056 nextchr = UCHARAT(++locinput);
a0d0e21e 3057 break;
d6a28714 3058 case NALNUML:
b8c5462f
JH
3059 PL_reg_flags |= RF_tainted;
3060 /* FALL THROUGH */
d6a28714
JH
3061 case NALNUM:
3062 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 3063 sayNO;
ffc61ed2 3064 if (do_utf8) {
1a4fad37 3065 LOAD_UTF8_CHARCLASS_ALNUM();
ffc61ed2 3066 if (OP(scan) == NALNUM
3568d838 3067 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
3068 : isALNUM_LC_utf8((U8*)locinput))
3069 {
b8c5462f 3070 sayNO;
d6a28714 3071 }
b8c5462f
JH
3072 locinput += PL_utf8skip[nextchr];
3073 nextchr = UCHARAT(locinput);
3074 break;
3075 }
ffc61ed2 3076 if (OP(scan) == NALNUM
d6a28714 3077 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 3078 sayNO;
76e3520e 3079 nextchr = UCHARAT(++locinput);
a0d0e21e 3080 break;
d6a28714
JH
3081 case BOUNDL:
3082 case NBOUNDL:
3280af22 3083 PL_reg_flags |= RF_tainted;
bbce6d69 3084 /* FALL THROUGH */
d6a28714
JH
3085 case BOUND:
3086 case NBOUND:
3087 /* was last char in word? */
ffc61ed2 3088 if (do_utf8) {
12d33761 3089 if (locinput == PL_bostr)
5d9a96ca 3090 st->ln = '\n';
ffc61ed2 3091 else {
a3b680e6 3092 const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 3093
4ad0818d 3094 st->ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
ffc61ed2
JH
3095 }
3096 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3097 st->ln = isALNUM_uni(st->ln);
1a4fad37 3098 LOAD_UTF8_CHARCLASS_ALNUM();
3568d838 3099 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
3100 }
3101 else {
5d9a96ca 3102 st->ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(st->ln));
ffc61ed2
JH
3103 n = isALNUM_LC_utf8((U8*)locinput);
3104 }
a0ed51b3 3105 }
d6a28714 3106 else {
5d9a96ca 3107 st->ln = (locinput != PL_bostr) ?
12d33761 3108 UCHARAT(locinput - 1) : '\n';
ffc61ed2 3109 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
5d9a96ca 3110 st->ln = isALNUM(st->ln);
ffc61ed2
JH
3111 n = isALNUM(nextchr);
3112 }
3113 else {
5d9a96ca 3114 st->ln = isALNUM_LC(st->ln);
ffc61ed2
JH
3115 n = isALNUM_LC(nextchr);
3116 }
d6a28714 3117 }
5d9a96ca 3118 if (((!st->ln) == (!n)) == (OP(scan) == BOUND ||
ffc61ed2
JH
3119 OP(scan) == BOUNDL))
3120 sayNO;
a0ed51b3 3121 break;
d6a28714 3122 case SPACEL:
3280af22 3123 PL_reg_flags |= RF_tainted;
bbce6d69 3124 /* FALL THROUGH */
d6a28714 3125 case SPACE:
9442cb0e 3126 if (!nextchr)
4633a7c4 3127 sayNO;
1aa99e6b 3128 if (do_utf8) {
fd400ab9 3129 if (UTF8_IS_CONTINUED(nextchr)) {
1a4fad37 3130 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3131 if (!(OP(scan) == SPACE
3568d838 3132 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
3133 : isSPACE_LC_utf8((U8*)locinput)))
3134 {
3135 sayNO;
3136 }
3137 locinput += PL_utf8skip[nextchr];
3138 nextchr = UCHARAT(locinput);
3139 break;
d6a28714 3140 }
ffc61ed2
JH
3141 if (!(OP(scan) == SPACE
3142 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3143 sayNO;
3144 nextchr = UCHARAT(++locinput);
3145 }
3146 else {
3147 if (!(OP(scan) == SPACE
3148 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3149 sayNO;
3150 nextchr = UCHARAT(++locinput);
a0ed51b3 3151 }
a0ed51b3 3152 break;
d6a28714 3153 case NSPACEL:
3280af22 3154 PL_reg_flags |= RF_tainted;
bbce6d69 3155 /* FALL THROUGH */
d6a28714 3156 case NSPACE:
9442cb0e 3157 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3158 sayNO;
1aa99e6b 3159 if (do_utf8) {
1a4fad37 3160 LOAD_UTF8_CHARCLASS_SPACE();
ffc61ed2 3161 if (OP(scan) == NSPACE
3568d838 3162 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3163 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3164 {
3165 sayNO;
3166 }
3167 locinput += PL_utf8skip[nextchr];
3168 nextchr = UCHARAT(locinput);
3169 break;
a0ed51b3 3170 }
ffc61ed2 3171 if (OP(scan) == NSPACE
d6a28714 3172 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3173 sayNO;
76e3520e 3174 nextchr = UCHARAT(++locinput);
a0d0e21e 3175 break;
d6a28714 3176 case DIGITL:
a0ed51b3
LW
3177 PL_reg_flags |= RF_tainted;
3178 /* FALL THROUGH */
d6a28714 3179 case DIGIT:
9442cb0e 3180 if (!nextchr)
a0ed51b3 3181 sayNO;
1aa99e6b 3182 if (do_utf8) {
1a4fad37 3183 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3184 if (!(OP(scan) == DIGIT
3568d838 3185 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3186 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3187 {
a0ed51b3 3188 sayNO;
dfe13c55 3189 }
6f06b55f 3190 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3191 nextchr = UCHARAT(locinput);
3192 break;
3193 }
ffc61ed2 3194 if (!(OP(scan) == DIGIT
9442cb0e 3195 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3196 sayNO;
3197 nextchr = UCHARAT(++locinput);
3198 break;
d6a28714 3199 case NDIGITL:
b8c5462f
JH
3200 PL_reg_flags |= RF_tainted;
3201 /* FALL THROUGH */
d6a28714 3202 case NDIGIT:
9442cb0e 3203 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3204 sayNO;
1aa99e6b 3205 if (do_utf8) {
1a4fad37 3206 LOAD_UTF8_CHARCLASS_DIGIT();
ffc61ed2 3207 if (OP(scan) == NDIGIT
3568d838 3208 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3209 : isDIGIT_LC_utf8((U8*)locinput))
3210 {
a0ed51b3 3211 sayNO;
9442cb0e 3212 }
6f06b55f 3213 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3214 nextchr = UCHARAT(locinput);
3215 break;
3216 }
ffc61ed2 3217 if (OP(scan) == NDIGIT
9442cb0e 3218 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3219 sayNO;
3220 nextchr = UCHARAT(++locinput);
3221 break;
3222 case CLUMP:
b7c83a7e 3223 if (locinput >= PL_regeol)
a0ed51b3 3224 sayNO;
b7c83a7e 3225 if (do_utf8) {
1a4fad37 3226 LOAD_UTF8_CHARCLASS_MARK();
b7c83a7e
JH
3227 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3228 sayNO;
3229 locinput += PL_utf8skip[nextchr];
3230 while (locinput < PL_regeol &&
3231 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3232 locinput += UTF8SKIP(locinput);
3233 if (locinput > PL_regeol)
3234 sayNO;
eb08e2da
JH
3235 }
3236 else
3237 locinput++;
a0ed51b3
LW
3238 nextchr = UCHARAT(locinput);
3239 break;
c8756f30 3240 case REFFL:
3280af22 3241 PL_reg_flags |= RF_tainted;
c8756f30 3242 /* FALL THROUGH */
c277df42 3243 case REF:
95b24440
DM
3244 case REFF: {
3245 char *s;
c277df42 3246 n = ARG(scan); /* which paren pair */
5d9a96ca 3247 st->ln = PL_regstartp[n];
2c2d71f5 3248 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3249 if ((I32)*PL_reglastparen < n || st->ln == -1)
af3f8c16 3250 sayNO; /* Do not match unless seen CLOSEn. */
5d9a96ca 3251 if (st->ln == PL_regendp[n])
a0d0e21e 3252 break;
a0ed51b3 3253
5d9a96ca 3254 s = PL_bostr + st->ln;
1aa99e6b 3255 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3256 char *l = locinput;
a3b680e6 3257 const char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3258 /*
3259 * Note that we can't do the "other character" lookup trick as
3260 * in the 8-bit case (no pun intended) because in Unicode we
3261 * have to map both upper and title case to lower case.
3262 */
3263 if (OP(scan) == REFF) {
3264 while (s < e) {
a3b680e6
AL
3265 STRLEN ulen1, ulen2;
3266 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3267 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3268
a0ed51b3
LW
3269 if (l >= PL_regeol)
3270 sayNO;
a2a2844f
JH
3271 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3272 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3273 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3274 sayNO;
a2a2844f
JH
3275 s += ulen1;
3276 l += ulen2;
a0ed51b3
LW
3277 }
3278 }
3279 locinput = l;
3280 nextchr = UCHARAT(locinput);
3281 break;
3282 }
3283
a0d0e21e 3284 /* Inline the first character, for speed. */
76e3520e 3285 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3286 (OP(scan) == REF ||
3287 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3288 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3289 sayNO;
5d9a96ca
DM
3290 st->ln = PL_regendp[n] - st->ln;
3291 if (locinput + st->ln > PL_regeol)
4633a7c4 3292 sayNO;
5d9a96ca
DM
3293 if (st->ln > 1 && (OP(scan) == REF
3294 ? memNE(s, locinput, st->ln)
c8756f30 3295 : (OP(scan) == REFF
5d9a96ca
DM
3296 ? ibcmp(s, locinput, st->ln)
3297 : ibcmp_locale(s, locinput, st->ln))))
4633a7c4 3298 sayNO;
5d9a96ca 3299 locinput += st->ln;
76e3520e 3300 nextchr = UCHARAT(locinput);
a0d0e21e 3301 break;
95b24440 3302 }
a0d0e21e
LW
3303
3304 case NOTHING:
c277df42 3305 case TAIL:
a0d0e21e
LW
3306 break;
3307 case BACK:
3308 break;
c277df42
IZ
3309 case EVAL:
3310 {
c277df42 3311 SV *ret;
8e5e9ebe 3312 {
4aabdb9b
DM
3313 /* execute the code in the {...} */
3314 dSP;
6136c704 3315 SV ** const before = SP;
4aabdb9b
DM
3316 OP_4tree * const oop = PL_op;
3317 COP * const ocurcop = PL_curcop;
3318 PAD *old_comppad;
4aabdb9b
DM
3319
3320 n = ARG(scan);
32fc9b6a 3321 PL_op = (OP_4tree*)rex->data->data[n];
4aabdb9b 3322 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
32fc9b6a 3323 PAD_SAVE_LOCAL(old_comppad, (PAD*)rex->data->data[n + 2]);
4aabdb9b
DM
3324 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
3325
8e5e9ebe
RGS
3326 CALLRUNOPS(aTHX); /* Scalar context. */
3327 SPAGAIN;
3328 if (SP == before)
075aa684 3329 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3330 else {
3331 ret = POPs;
3332 PUTBACK;
3333 }
4aabdb9b
DM
3334
3335 PL_op = oop;
3336 PAD_RESTORE_LOCAL(old_comppad);
3337 PL_curcop = ocurcop;
3338 if (!st->logical) {
3339 /* /(?{...})/ */
3340 sv_setsv(save_scalar(PL_replgv), ret);
4aabdb9b
DM
3341 break;
3342 }
8e5e9ebe 3343 }
4aabdb9b
DM
3344 if (st->logical == 2) { /* Postponed subexpression: /(??{...})/ */
3345 regexp *re;
4aabdb9b 3346 {
4f639d21
DM
3347 /* extract RE object from returned value; compiling if
3348 * necessary */
3349
6136c704 3350 MAGIC *mg = NULL;
4aabdb9b 3351 SV *sv;
faf82a0b
AE
3352 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3353 mg = mg_find(sv, PERL_MAGIC_qr);
3354 else if (SvSMAGICAL(ret)) {
3355 if (SvGMAGICAL(ret))
3356 sv_unmagic(ret, PERL_MAGIC_qr);
3357 else
3358 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3359 }
faf82a0b 3360
0f5d15d6
IZ
3361 if (mg) {
3362 re = (regexp *)mg->mg_obj;
df0003d4 3363 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3364 }
3365 else {
3366 STRLEN len;
6136c704 3367 const char * const t = SvPV_const(ret, len);
0f5d15d6 3368 PMOP pm;
a3b680e6 3369 const I32 osize = PL_regsize;
0f5d15d6 3370
5fcd1c1b 3371 Zero(&pm, 1, PMOP);
4aabdb9b 3372 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
83003860 3373 re = CALLREGCOMP(aTHX_ (char*)t, (char*)t + len, &pm);
9041c2e3 3374 if (!(SvFLAGS(ret)
faf82a0b
AE
3375 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3376 | SVs_GMG)))
14befaf4
DM
3377 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3378 PERL_MAGIC_qr,0,0);
0f5d15d6 3379 PL_regsize = osize;
0f5d15d6 3380 }
4aabdb9b 3381 }
aa283a38
DM
3382
3383 /* run the pattern returned from (??{...}) */
3384
4aabdb9b
DM
3385 DEBUG_EXECUTE_r(
3386 PerlIO_printf(Perl_debug_log,
3387 "Entering embedded \"%s%.60s%s%s\"\n",
3388 PL_colors[0],
3389 re->precomp,
3390 PL_colors[1],
3391 (strlen(re->precomp) > 60 ? "..." : ""))
3392 );
2c2d71f5 3393
4aabdb9b
DM
3394 st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
3395 REGCP_SET(st->u.eval.lastcp);
4aabdb9b
DM
3396 *PL_reglastparen = 0;
3397 *PL_reglastcloseparen = 0;
4aabdb9b 3398 PL_reginput = locinput;
4aabdb9b
DM
3399
3400 /* XXXX This is too dramatic a measure... */
3401 PL_reg_maxiter = 0;
3402
5d9a96ca 3403 st->logical = 0;
aa283a38
DM
3404 st->u.eval.toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3405 ((re->reganch & ROPT_UTF8) != 0);
3406 if (st->u.eval.toggleutf) PL_reg_flags ^= RF_utf8;
3407 st->u.eval.prev_rex = rex;
aa283a38 3408 rex = re;
aa283a38 3409
77cb431f
DM
3410 /* resume to current state on success */
3411 st->u.yes.prev_yes_state = yes_state;
3412 yes_state = st;
aa283a38
DM
3413 PUSH_STATE(newst, resume_EVAL);
3414 st = newst;
3415
3416 /* now continue from first node in postoned RE */
3417 next = re->program + 1;
3418 break;
4aabdb9b 3419 /* NOTREACHED */
a0ed51b3 3420 }
4aabdb9b
DM
3421 /* /(?(?{...})X|Y)/ */
3422 st->sw = SvTRUE(ret);
3423 st->logical = 0;
c277df42
IZ
3424 break;
3425 }
a0d0e21e 3426 case OPEN:
c277df42 3427 n = ARG(scan); /* which paren pair */
3280af22
NIS
3428 PL_reg_start_tmp[n] = locinput;
3429 if (n > PL_regsize)
3430 PL_regsize = n;
a0d0e21e
LW
3431 break;
3432 case CLOSE:
c277df42 3433 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3434 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3435 PL_regendp[n] = locinput - PL_bostr;
eb160463 3436 if (n > (I32)*PL_reglastparen)
3280af22 3437 *PL_reglastparen = n;
a01268b5 3438 *PL_reglastcloseparen = n;
a0d0e21e 3439 break;
c277df42
IZ
3440 case GROUPP:
3441 n = ARG(scan); /* which paren pair */
5d9a96ca 3442 st->sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3443 break;
3444 case IFTHEN:
2c2d71f5 3445 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
5d9a96ca 3446 if (st->sw)
c277df42
IZ
3447 next = NEXTOPER(NEXTOPER(scan));
3448 else {
3449 next = scan + ARG(scan);
3450 if (OP(next) == IFTHEN) /* Fake one. */
3451 next = NEXTOPER(NEXTOPER(next));
3452 }
3453 break;
3454 case LOGICAL:
5d9a96ca 3455 st->logical = scan->flags;
c277df42 3456 break;
2ab05381 3457/*******************************************************************
a0374537
DM
3458 cc points to the regmatch_state associated with the most recent CURLYX.
3459 This struct contains info about the innermost (...)* loop (an
3460 "infoblock"), and a pointer to the next outer cc.
2ab05381
IZ
3461
3462 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3463
95b24440 3464 1) After matching Y, regnode for CURLYX is processed;
2ab05381 3465
a0374537 3466 2) This regnode populates cc, and calls regmatch() recursively
95b24440 3467 with the starting point at WHILEM node;
2ab05381
IZ
3468
3469 3) Each hit of WHILEM node tries to match A and Z (in the order
3470 depending on the current iteration, min/max of {min,max} and
3471 greediness). The information about where are nodes for "A"
a0374537 3472 and "Z" is read from cc, as is info on how many times "A"
2ab05381
IZ
3473 was already matched, and greediness.
3474
3475 4) After A matches, the same WHILEM node is hit again.
3476
95b24440 3477 5) Each time WHILEM is hit, cc is the infoblock created by CURLYX
2ab05381 3478 of the same pair. Thus when WHILEM tries to match Z, it temporarily
95b24440 3479 resets cc, since this Y(A)*Z can be a part of some other loop:
2ab05381
IZ
3480 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3481 of the external loop.
3482
a0374537 3483 Currently present infoblocks form a tree with a stem formed by st->cc
2ab05381
IZ
3484 and whatever it mentions via ->next, and additional attached trees
3485 corresponding to temporarily unset infoblocks as in "5" above.
3486
95b24440 3487 In the following picture, infoblocks for outer loop of
2ab05381
IZ
3488 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3489 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3490 infoblocks are drawn below the "reset" infoblock.
3491
3492 In fact in the picture below we do not show failed matches for Z and T
3493 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3494 more obvious *why* one needs to *temporary* unset infoblocks.]
3495
3496 Matched REx position InfoBlocks Comment
3497 (Y(A)*?Z)*?T x
3498 Y(A)*?Z)*?T x <- O
3499 Y (A)*?Z)*?T x <- O
3500 Y A)*?Z)*?T x <- O <- I
3501 YA )*?Z)*?T x <- O <- I
3502 YA A)*?Z)*?T x <- O <- I
3503 YAA )*?Z)*?T x <- O <- I
3504 YAA Z)*?T x <- O # Temporary unset I
3505 I
3506
3507 YAAZ Y(A)*?Z)*?T x <- O
3508 I
3509
3510 YAAZY (A)*?Z)*?T x <- O
3511 I
3512
3513 YAAZY A)*?Z)*?T x <- O <- I
3514 I
3515
3516 YAAZYA )*?Z)*?T x <- O <- I
3517 I
3518
3519 YAAZYA Z)*?T x <- O # Temporary unset I
3520 I,I
3521
3522 YAAZYAZ )*?T x <- O
3523 I,I
3524
3525 YAAZYAZ T x # Temporary unset O
3526 O
3527 I,I
3528
3529 YAAZYAZT x
3530 O
3531 I,I
3532 *******************************************************************/
95b24440 3533
a0d0e21e 3534 case CURLYX: {
cb434fcc
IZ
3535 /* No need to save/restore up to this paren */
3536 I32 parenfloor = scan->flags;
c277df42 3537
c2b7afd3
NC
3538 /* Dave says:
3539
3540 CURLYX and WHILEM are always paired: they're the moral
3541 equivalent of pp_enteriter anbd pp_iter.
3542
3543 The only time next could be null is if the node tree is
3544 corrupt. This was mentioned on p5p a few days ago.
3545
3546 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3547 So we'll assert that this is true:
3548 */
3549 assert(next);
30b2893d 3550 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
c277df42 3551 next += ARG(next);
cb434fcc
IZ
3552 /* XXXX Probably it is better to teach regpush to support
3553 parenfloor > PL_regsize... */
eb160463 3554 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc 3555 parenfloor = *PL_reglastparen; /* Pessimization... */
a0374537 3556
d8319b27
DM
3557 st->u.curlyx.cp = PL_savestack_ix;
3558 st->u.curlyx.outercc = st->cc;
a0374537
DM
3559 st->cc = st;
3560 /* these fields contain the state of the current curly.
3561 * they are accessed by subsequent WHILEMs;
3562 * cur and lastloc are also updated by WHILEM */
d8319b27
DM
3563 st->u.curlyx.parenfloor = parenfloor;
3564 st->u.curlyx.cur = -1; /* this will be updated by WHILEM */
3565 st->u.curlyx.min = ARG1(scan);
3566 st->u.curlyx.max = ARG2(scan);
3567 st->u.curlyx.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
3568 st->u.curlyx.lastloc = 0;
a0374537
DM
3569 /* st->next and st->minmod are also read by WHILEM */
3570
3280af22 3571 PL_reginput = locinput;
95b24440
DM
3572 REGMATCH(PREVOPER(next), CURLYX); /* start on the WHILEM */
3573 /*** all unsaved local vars undefined at this point */
d8319b27
DM
3574 regcpblow(st->u.curlyx.cp);
3575 st->cc = st->u.curlyx.outercc;
95b24440 3576 saySAME(result);
a0d0e21e 3577 }
5f66b61c 3578 /* NOTREACHED */
a0d0e21e
LW
3579 case WHILEM: {
3580 /*
3581 * This is really hard to understand, because after we match
3582 * what we're trying to match, we must make sure the rest of
2c2d71f5 3583 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3584 * to go back UP the parse tree by recursing ever deeper. And
3585 * if it fails, we have to reset our parent's current state
3586 * that we can try again after backing off.
3587 */
3588
c2b7afd3
NC
3589 /* Dave says:
3590
3591 st->cc gets initialised by CURLYX ready for use by WHILEM.
3592 So again, unless somethings been corrupted, st->cc cannot
3593 be null at that point in WHILEM.
3594
3595 See http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2006-04/msg00556.html
3596 So we'll assert that this is true:
3597 */
3598 assert(st->cc);
d8319b27
DM
3599 st->u.whilem.lastloc = st->cc->u.curlyx.lastloc; /* Detection of 0-len. */
3600 st->u.whilem.cache_offset = 0;
3601 st->u.whilem.cache_bit = 0;
c277df42 3602
d8319b27 3603 n = st->cc->u.curlyx.cur + 1; /* how many we know we matched */
3280af22 3604 PL_reginput = locinput;
a0d0e21e 3605
a3621e74 3606 DEBUG_EXECUTE_r(
9041c2e3 3607 PerlIO_printf(Perl_debug_log,
91f3b821 3608 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3609 REPORT_CODE_OFF+PL_regindent*2, "",
d8319b27
DM
3610 (long)n, (long)st->cc->u.curlyx.min,
3611 (long)st->cc->u.curlyx.max, PTR2UV(st->cc))
c277df42 3612 );
4633a7c4 3613
a0d0e21e
LW
3614 /* If degenerate scan matches "", assume scan done. */
3615
d8319b27
DM
3616 if (locinput == st->cc->u.curlyx.lastloc && n >= st->cc->u.curlyx.min) {
3617 st->u.whilem.savecc = st->cc;
3618 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3619 if (st->cc)
d8319b27 3620 st->ln = st->cc->u.curlyx.cur;
a3621e74 3621 DEBUG_EXECUTE_r(
c3464db5
DD
3622 PerlIO_printf(Perl_debug_log,
3623 "%*s empty match detected, try continuation...\n",
3280af22 3624 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3625 );
d8319b27 3626 REGMATCH(st->u.whilem.savecc->next, WHILEM1);
95b24440 3627 /*** all unsaved local vars undefined at this point */
d8319b27 3628 st->cc = st->u.whilem.savecc;
95b24440 3629 if (result)
4633a7c4 3630 sayYES;
d8319b27
DM
3631 if (st->cc->u.curlyx.outercc)
3632 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
4633a7c4 3633 sayNO;
a0d0e21e
LW
3634 }
3635
3636 /* First just match a string of min scans. */
3637
d8319b27
DM
3638 if (n < st->cc->u.curlyx.min) {
3639 st->cc->u.curlyx.cur = n;
3640 st->cc->u.curlyx.lastloc = locinput;
3641 REGMATCH(st->cc->u.curlyx.scan, WHILEM2);
95b24440
DM
3642 /*** all unsaved local vars undefined at this point */
3643 if (result)
4633a7c4 3644 sayYES;
d8319b27
DM
3645 st->cc->u.curlyx.cur = n - 1;
3646 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
4633a7c4 3647 sayNO;
a0d0e21e
LW
3648 }
3649
2c2d71f5
JH
3650 if (scan->flags) {
3651 /* Check whether we already were at this position.
3652 Postpone detection until we know the match is not
3653 *that* much linear. */
3654 if (!PL_reg_maxiter) {
3655 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3656 PL_reg_leftiter = PL_reg_maxiter;
3657 }
3658 if (PL_reg_leftiter-- == 0) {
a3b680e6 3659 const I32 size = (PL_reg_maxiter + 7 + POSCACHE_START)/8;
2c2d71f5 3660 if (PL_reg_poscache) {
eb160463 3661 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3662 Renew(PL_reg_poscache, size, char);
3663 PL_reg_poscache_size = size;
3664 }
3665 Zero(PL_reg_poscache, size, char);
3666 }
3667 else {
3668 PL_reg_poscache_size = size;
a02a5408 3669 Newxz(PL_reg_poscache, size, char);
2c2d71f5 3670 }
a3621e74 3671 DEBUG_EXECUTE_r(
2c2d71f5
JH
3672 PerlIO_printf(Perl_debug_log,
3673 "%sDetected a super-linear match, switching on caching%s...\n",
3674 PL_colors[4], PL_colors[5])
3675 );
3676 }
3677 if (PL_reg_leftiter < 0) {
d8319b27 3678 st->u.whilem.cache_offset = locinput - PL_bostr;
2c2d71f5 3679
d8319b27
DM
3680 st->u.whilem.cache_offset = (scan->flags & 0xf) - 1 + POSCACHE_START
3681 + st->u.whilem.cache_offset * (scan->flags>>4);
3682 st->u.whilem.cache_bit = st->u.whilem.cache_offset % 8;
3683 st->u.whilem.cache_offset /= 8;
3684 if (PL_reg_poscache[st->u.whilem.cache_offset] & (1<<st->u.whilem.cache_bit)) {
a3621e74 3685 DEBUG_EXECUTE_r(
2c2d71f5
JH
3686 PerlIO_printf(Perl_debug_log,
3687 "%*s already tried at this position...\n",
3688 REPORT_CODE_OFF+PL_regindent*2, "")
3689 );
3ab3c9b4
HS
3690 if (PL_reg_poscache[0] & (1<<POSCACHE_SUCCESS))
3691 /* cache records success */
c2b0868c
HS
3692 sayYES;
3693 else
3ab3c9b4 3694 /* cache records failure */
c2b0868c 3695 sayNO_SILENT;
2c2d71f5 3696 }
2c2d71f5
JH
3697 }
3698 }
3699
a0d0e21e
LW
3700 /* Prefer next over scan for minimal matching. */
3701
5d9a96ca 3702 if (st->cc->minmod) {
d8319b27
DM
3703 st->u.whilem.savecc = st->cc;
3704 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3705 if (st->cc)
d8319b27
DM
3706 st->ln = st->cc->u.curlyx.cur;
3707 st->u.whilem.cp = regcppush(st->u.whilem.savecc->u.curlyx.parenfloor);
3708 REGCP_SET(st->u.whilem.lastcp);
3709 REGMATCH(st->u.whilem.savecc->next, WHILEM3);
95b24440 3710 /*** all unsaved local vars undefined at this point */
d8319b27 3711 st->cc = st->u.whilem.savecc;
95b24440 3712 if (result) {
d8319b27 3713 regcpblow(st->u.whilem.cp);
3ab3c9b4 3714 CACHEsayYES; /* All done. */
5f05dabc 3715 }
d8319b27 3716 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3717 regcppop(rex);
d8319b27
DM
3718 if (st->cc->u.curlyx.outercc)
3719 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
a0d0e21e 3720
d8319b27 3721 if (n >= st->cc->u.curlyx.max) { /* Maximum greed exceeded? */
9041c2e3 3722 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3723 && !(PL_reg_flags & RF_warned)) {
3724 PL_reg_flags |= RF_warned;
9014280d 3725 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3726 "Complex regular subexpression recursion",
3727 REG_INFTY - 1);
c277df42 3728 }
3ab3c9b4 3729 CACHEsayNO;
c277df42 3730 }
a687059c 3731
a3621e74 3732 DEBUG_EXECUTE_r(
c3464db5
DD
3733 PerlIO_printf(Perl_debug_log,
3734 "%*s trying longer...\n",
3280af22 3735 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3736 );
a0d0e21e 3737 /* Try scanning more and see if it helps. */
3280af22 3738 PL_reginput = locinput;
d8319b27
DM
3739 st->cc->u.curlyx.cur = n;
3740 st->cc->u.curlyx.lastloc = locinput;
3741 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3742 REGCP_SET(st->u.whilem.lastcp);
3743 REGMATCH(st->cc->u.curlyx.scan, WHILEM4);
95b24440
DM
3744 /*** all unsaved local vars undefined at this point */
3745 if (result) {
d8319b27 3746 regcpblow(st->u.whilem.cp);
3ab3c9b4 3747 CACHEsayYES;
5f05dabc 3748 }
d8319b27 3749 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3750 regcppop(rex);
d8319b27
DM
3751 st->cc->u.curlyx.cur = n - 1;
3752 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3753 CACHEsayNO;
a0d0e21e
LW
3754 }
3755
3756 /* Prefer scan over next for maximal matching. */
3757
d8319b27
DM
3758 if (n < st->cc->u.curlyx.max) { /* More greed allowed? */
3759 st->u.whilem.cp = regcppush(st->cc->u.curlyx.parenfloor);
3760 st->cc->u.curlyx.cur = n;
3761 st->cc->u.curlyx.lastloc = locinput;
3762 REGCP_SET(st->u.whilem.lastcp);
3763 REGMATCH(st->cc->u.curlyx.scan, WHILEM5);
95b24440
DM
3764 /*** all unsaved local vars undefined at this point */
3765 if (result) {
d8319b27 3766 regcpblow(st->u.whilem.cp);
3ab3c9b4 3767 CACHEsayYES;
5f05dabc 3768 }
d8319b27 3769 REGCP_UNWIND(st->u.whilem.lastcp);
4f639d21 3770 regcppop(rex); /* Restore some previous $<digit>s? */
3280af22 3771 PL_reginput = locinput;
a3621e74 3772 DEBUG_EXECUTE_r(
c3464db5
DD
3773 PerlIO_printf(Perl_debug_log,
3774 "%*s failed, try continuation...\n",
3280af22 3775 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3776 );
3777 }
9041c2e3 3778 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3779 && !(PL_reg_flags & RF_warned)) {
3280af22 3780 PL_reg_flags |= RF_warned;
9014280d 3781 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3782 "Complex regular subexpression recursion",
3783 REG_INFTY - 1);
a0d0e21e
LW
3784 }
3785
3786 /* Failed deeper matches of scan, so see if this one works. */
d8319b27
DM
3787 st->u.whilem.savecc = st->cc;
3788 st->cc = st->cc->u.curlyx.outercc;
5d9a96ca 3789 if (st->cc)
d8319b27
DM
3790 st->ln = st->cc->u.curlyx.cur;
3791 REGMATCH(st->u.whilem.savecc->next, WHILEM6);
95b24440 3792 /*** all unsaved local vars undefined at this point */
d8319b27 3793 st->cc = st->u.whilem.savecc;
95b24440 3794 if (result)
3ab3c9b4 3795 CACHEsayYES;
d8319b27
DM
3796 if (st->cc->u.curlyx.outercc)
3797 st->cc->u.curlyx.outercc->u.curlyx.cur = st->ln;
3798 st->cc->u.curlyx.cur = n - 1;
3799 st->cc->u.curlyx.lastloc = st->u.whilem.lastloc;
3ab3c9b4 3800 CACHEsayNO;
a0d0e21e 3801 }
5f66b61c 3802 /* NOTREACHED */
9041c2e3 3803 case BRANCHJ:
c277df42
IZ
3804 next = scan + ARG(scan);
3805 if (next == scan)
3806 next = NULL;
3807 inner = NEXTOPER(NEXTOPER(scan));
3808 goto do_branch;
9041c2e3 3809 case BRANCH:
c277df42
IZ
3810 inner = NEXTOPER(scan);
3811 do_branch:
3812 {
e822a8b4
DM
3813 I32 type;
3814 type = OP(scan);
ae5031b3 3815 if (!next || OP(next) != type) /* No choice. */
c277df42 3816 next = inner; /* Avoid recursion. */
a0d0e21e 3817 else {
a3b680e6 3818 const I32 lastparen = *PL_reglastparen;
02db2b7b 3819 /* Put unwinding data on stack */
6136c704
AL
3820 const I32 unwind1 = SSNEWt(1,re_unwind_branch_t);
3821 re_unwind_branch_t * const uw = SSPTRt(unwind1,re_unwind_branch_t);
3822
5d9a96ca
DM
3823 uw->prev = st->unwind;
3824 st->unwind = unwind1;
e822a8b4 3825 uw->type = ((type == BRANCH)
02db2b7b
IZ
3826 ? RE_UNWIND_BRANCH
3827 : RE_UNWIND_BRANCHJ);
3828 uw->lastparen = lastparen;
3829 uw->next = next;
3830 uw->locinput = locinput;
3831 uw->nextchr = nextchr;
3a2830be 3832 uw->minmod = st->minmod;
02db2b7b
IZ
3833#ifdef DEBUGGING
3834 uw->regindent = ++PL_regindent;
3835#endif
c277df42 3836
02db2b7b
IZ
3837 REGCP_SET(uw->lastcp);
3838
3839 /* Now go into the first branch */
3840 next = inner;
a687059c 3841 }
a0d0e21e
LW
3842 }
3843 break;
3844 case MINMOD:
5d9a96ca 3845 st->minmod = 1;
a0d0e21e 3846 break;
c277df42
IZ
3847 case CURLYM:
3848 {
d8319b27 3849 st->u.curlym.l = st->u.curlym.matches = 0;
9041c2e3 3850
c277df42 3851 /* We suppose that the next guy does not need
0e788c72 3852 backtracking: in particular, it is of constant non-zero length,
c277df42 3853 and has no parenths to influence future backrefs. */
5d9a96ca 3854 st->ln = ARG1(scan); /* min to match */
c277df42 3855 n = ARG2(scan); /* max to match */
d8319b27
DM
3856 st->u.curlym.paren = scan->flags;
3857 if (st->u.curlym.paren) {
3858 if (st->u.curlym.paren > PL_regsize)
3859 PL_regsize = st->u.curlym.paren;
3860 if (st->u.curlym.paren > (I32)*PL_reglastparen)
3861 *PL_reglastparen = st->u.curlym.paren;
c277df42 3862 }
dc45a647 3863 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
d8319b27 3864 if (st->u.curlym.paren)
c277df42 3865 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3866 PL_reginput = locinput;
d8319b27 3867 st->u.curlym.maxwanted = st->minmod ? st->ln : n;
0cadcf80
DM
3868 while (PL_reginput < PL_regeol && st->u.curlym.matches < st->u.curlym.maxwanted) {
3869 /* resume to current state on success */
3870 st->u.yes.prev_yes_state = yes_state;
3871 yes_state = st;
3872 REGMATCH(scan, CURLYM1);
3873 yes_state = st->u.yes.prev_yes_state;
3874 /*** all unsaved local vars undefined at this point */
3875 if (!result)
3876 break;
3877 /* on first match, determine length, u.curlym.l */
3878 if (!st->u.curlym.matches++) {
3879 if (PL_reg_match_utf8) {
3880 char *s = locinput;
3881 while (s < PL_reginput) {
3882 st->u.curlym.l++;
3883 s += UTF8SKIP(s);
6407bf3b
DM
3884 }
3885 }
0cadcf80
DM
3886 else {
3887 st->u.curlym.l = PL_reginput - locinput;
3888 }
3889 if (st->u.curlym.l == 0) {
3890 st->u.curlym.matches = st->u.curlym.maxwanted;
3891 break;
3892 }
6407bf3b 3893 }
0cadcf80 3894 locinput = PL_reginput;
6407bf3b
DM
3895 }
3896
3897 PL_reginput = locinput;
0cadcf80 3898 if (st->u.curlym.matches < st->ln) {
5d9a96ca 3899 st->minmod = 0;
0cadcf80
DM
3900 sayNO;
3901 }
5f80c4cf 3902
0cadcf80
DM
3903 DEBUG_EXECUTE_r(
3904 PerlIO_printf(Perl_debug_log,
3905 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
3906 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
3907 (IV) st->u.curlym.matches, (IV)st->u.curlym.l)
3908 );
3909
3910 /* calculate c1 and c1 for possible match of 1st char
3911 * following curly */
9e137952 3912 st->u.curlym.c1 = st->u.curlym.c2 = CHRTEST_VOID;
0cadcf80
DM
3913 if (HAS_TEXT(next) || JUMPABLE(next)) {
3914 regnode *text_node = next;
3915 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
3916 if (HAS_TEXT(text_node)
3917 && PL_regkind[(U8)OP(text_node)] != REF)
3918 {
3919 st->u.curlym.c1 = (U8)*STRING(text_node);
3920 st->u.curlym.c2 =
3921 (OP(text_node) == EXACTF || OP(text_node) == REFF)
3922 ? PL_fold[st->u.curlym.c1]
3923 : (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
3924 ? PL_fold_locale[st->u.curlym.c1]
3925 : st->u.curlym.c1;
3926 }
3927 }
5f80c4cf 3928
0cadcf80
DM
3929 REGCP_SET(st->u.curlym.lastcp);
3930
3931 st->u.curlym.minmod = st->minmod;
3932 st->minmod = 0;
3933 while (st->u.curlym.matches >= st->ln
3934 && (st->u.curlym.matches <= n
3935 /* for REG_INFTY, ln could overflow to negative */
3936 || (n == REG_INFTY && st->u.curlym.matches >= 0)))
3937 {
3938 /* If it could work, try it. */
9e137952 3939 if (st->u.curlym.c1 == CHRTEST_VOID ||
0cadcf80
DM
3940 UCHARAT(PL_reginput) == st->u.curlym.c1 ||
3941 UCHARAT(PL_reginput) == st->u.curlym.c2)
3942 {
3943 DEBUG_EXECUTE_r(
3944 PerlIO_printf(Perl_debug_log,
3945 "%*s trying tail with matches=%"IVdf"...\n",
3946 (int)(REPORT_CODE_OFF+PL_regindent*2),
3947 "", (IV)st->u.curlym.matches)
3948 );
3949 if (st->u.curlym.paren) {
3950 if (st->u.curlym.matches) {
3951 PL_regstartp[st->u.curlym.paren]
3952 = HOPc(PL_reginput, -st->u.curlym.l) - PL_bostr;
3953 PL_regendp[st->u.curlym.paren] = PL_reginput - PL_bostr;
cca55fe3 3954 }
5f80c4cf 3955 else
0cadcf80 3956 PL_regendp[st->u.curlym.paren] = -1;
5f80c4cf 3957 }
0cadcf80
DM
3958 /* resume to current state on success */
3959 st->u.yes.prev_yes_state = yes_state;
3960 yes_state = st;
3961 REGMATCH(next, CURLYM2);
3962 yes_state = st->u.yes.prev_yes_state;
3963 /*** all unsaved local vars undefined at this point */
3964 if (result)
3965 /* XXX tmp sayYES; */
3966 sayYES_FINAL;
3967 REGCP_UNWIND(st->u.curlym.lastcp);
a0ed51b3 3968 }
0cadcf80
DM
3969 /* Couldn't or didn't -- move forward/backward. */
3970 if (st->u.curlym.minmod) {
3280af22 3971 PL_reginput = locinput;
dad79028
DM
3972 /* resume to current state on success */
3973 st->u.yes.prev_yes_state = yes_state;
3974 yes_state = st;
95b24440 3975 REGMATCH(scan, CURLYM3);
dad79028 3976 yes_state = st->u.yes.prev_yes_state;
95b24440
DM
3977 /*** all unsaved local vars undefined at this point */
3978 if (result) {
0cadcf80 3979 st->u.curlym.matches++;
3280af22 3980 locinput = PL_reginput;
c277df42
IZ
3981 }
3982 else
3983 sayNO;
3984 }
0cadcf80 3985 else {
d8319b27
DM
3986 st->u.curlym.matches--;
3987 locinput = HOPc(locinput, -st->u.curlym.l);
3280af22 3988 PL_reginput = locinput;
c277df42
IZ
3989 }
3990 }
3991 sayNO;
5f66b61c 3992 /* NOTREACHED */
c277df42
IZ
3993 break;
3994 }
3995 case CURLYN:
d8319b27
DM
3996 st->u.plus.paren = scan->flags; /* Which paren to set */
3997 if (st->u.plus.paren > PL_regsize)
3998 PL_regsize = st->u.plus.paren;
3999 if (st->u.plus.paren > (I32)*PL_reglastparen)
4000 *PL_reglastparen = st->u.plus.paren;
5d9a96ca 4001 st->ln = ARG1(scan); /* min to match */
c277df42 4002 n = ARG2(scan); /* max to match */
dc45a647 4003 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 4004 goto repeat;
a0d0e21e 4005 case CURLY:
d8319b27 4006 st->u.plus.paren = 0;
5d9a96ca 4007 st->ln = ARG1(scan); /* min to match */
a0d0e21e 4008 n = ARG2(scan); /* max to match */
dc45a647 4009 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
4010 goto repeat;
4011 case STAR:
5d9a96ca 4012 st->ln = 0;
c277df42 4013 n = REG_INFTY;
a0d0e21e 4014 scan = NEXTOPER(scan);
d8319b27 4015 st->u.plus.paren = 0;
a0d0e21e
LW
4016 goto repeat;
4017 case PLUS:
5d9a96ca 4018 st->ln = 1;
c277df42
IZ
4019 n = REG_INFTY;
4020 scan = NEXTOPER(scan);
d8319b27 4021 st->u.plus.paren = 0;
c277df42 4022 repeat:
a0d0e21e
LW
4023 /*
4024 * Lookahead to avoid useless match attempts
4025 * when we know what character comes next.
4026 */
5f80c4cf
JP
4027
4028 /*
4029 * Used to only do .*x and .*?x, but now it allows
4030 * for )'s, ('s and (?{ ... })'s to be in the way
4031 * of the quantifier and the EXACT-like node. -- japhy
4032 */
4033
cca55fe3 4034 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
4035 U8 *s;
4036 regnode *text_node = next;
4037
cca55fe3 4038 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 4039
9e137952
DM
4040 if (! HAS_TEXT(text_node))
4041 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
5f80c4cf 4042 else {
cca55fe3 4043 if (PL_regkind[(U8)OP(text_node)] == REF) {
9e137952 4044 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
44a68960 4045 goto assume_ok_easy;
cca55fe3
JP
4046 }
4047 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
4048
4049 if (!UTF) {
d8319b27 4050 st->u.plus.c2 = st->u.plus.c1 = *s;
f65d3ee7 4051 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
d8319b27 4052 st->u.plus.c2 = PL_fold[st->u.plus.c1];
f65d3ee7 4053 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
d8319b27 4054 st->u.plus.c2 = PL_fold_locale[st->u.plus.c1];
1aa99e6b 4055 }
5f80c4cf 4056 else { /* UTF */
f65d3ee7 4057 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 4058 STRLEN ulen1, ulen2;
89ebb4a3
JH
4059 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4060 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
4061
4062 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4063 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4064
d8319b27 4065 st->u.plus.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
041457d9 4066 uniflags);
d8319b27 4067 st->u.plus.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
041457d9 4068 uniflags);
5f80c4cf
JP
4069 }
4070 else {
d8319b27 4071 st->u.plus.c2 = st->u.plus.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
041457d9 4072 uniflags);
5f80c4cf 4073 }
1aa99e6b
IH
4074 }
4075 }
bbce6d69 4076 }
a0d0e21e 4077 else
9e137952 4078 st->u.plus.c1 = st->u.plus.c2 = CHRTEST_VOID;
cca55fe3 4079 assume_ok_easy:
3280af22 4080 PL_reginput = locinput;
5d9a96ca
DM
4081 if (st->minmod) {
4082 st->minmod = 0;
32fc9b6a 4083 if (st->ln && regrepeat(rex, scan, st->ln) < st->ln)
4633a7c4 4084 sayNO;
a0ed51b3 4085 locinput = PL_reginput;
d8319b27 4086 REGCP_SET(st->u.plus.lastcp);
9e137952 4087 if (st->u.plus.c1 != CHRTEST_VOID) {
d8319b27
DM
4088 st->u.plus.old = locinput;
4089 st->u.plus.count = 0;
0fe9bf95 4090
1aa99e6b 4091 if (n == REG_INFTY) {
d8319b27 4092 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4093 if (do_utf8)
d8319b27
DM
4094 while (UTF8_IS_CONTINUATION(*(U8*)st->u.plus.e))
4095 st->u.plus.e--;
1aa99e6b
IH
4096 }
4097 else if (do_utf8) {
5d9a96ca 4098 int m = n - st->ln;
d8319b27
DM
4099 for (st->u.plus.e = locinput;
4100 m >0 && st->u.plus.e + UTF8SKIP(st->u.plus.e) <= PL_regeol; m--)
4101 st->u.plus.e += UTF8SKIP(st->u.plus.e);
1aa99e6b
IH
4102 }
4103 else {
d8319b27
DM
4104 st->u.plus.e = locinput + n - st->ln;
4105 if (st->u.plus.e >= PL_regeol)
4106 st->u.plus.e = PL_regeol - 1;
1aa99e6b 4107 }
0fe9bf95
IZ
4108 while (1) {
4109 /* Find place 'next' could work */
1aa99e6b 4110 if (!do_utf8) {
d8319b27
DM
4111 if (st->u.plus.c1 == st->u.plus.c2) {
4112 while (locinput <= st->u.plus.e &&
4113 UCHARAT(locinput) != st->u.plus.c1)
1aa99e6b
IH
4114 locinput++;
4115 } else {
d8319b27
DM
4116 while (locinput <= st->u.plus.e
4117 && UCHARAT(locinput) != st->u.plus.c1
4118 && UCHARAT(locinput) != st->u.plus.c2)
1aa99e6b
IH
4119 locinput++;
4120 }
d8319b27 4121 st->u.plus.count = locinput - st->u.plus.old;
1aa99e6b
IH
4122 }
4123 else {
d8319b27 4124 if (st->u.plus.c1 == st->u.plus.c2) {
a3b680e6 4125 STRLEN len;
872c91ae
JH
4126 /* count initialised to
4127 * utf8_distance(old, locinput) */
d8319b27 4128 while (locinput <= st->u.plus.e &&
872c91ae 4129 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4130 UTF8_MAXBYTES, &len,
d8319b27 4131 uniflags) != (UV)st->u.plus.c1) {
1aa99e6b 4132 locinput += len;
d8319b27 4133 st->u.plus.count++;
b2f2f093 4134 }
1aa99e6b 4135 } else {
872c91ae
JH
4136 /* count initialised to
4137 * utf8_distance(old, locinput) */
d8319b27 4138 while (locinput <= st->u.plus.e) {
c4fd8992
AL
4139 STRLEN len;
4140 const UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 4141 UTF8_MAXBYTES, &len,
041457d9 4142 uniflags);
d8319b27 4143 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
1aa99e6b 4144 break;
b2f2f093 4145 locinput += len;
d8319b27 4146 st->u.plus.count++;
1aa99e6b
IH
4147 }
4148 }
0fe9bf95 4149 }
d8319b27 4150 if (locinput > st->u.plus.e)
0fe9bf95
IZ
4151 sayNO;
4152 /* PL_reginput == old now */
d8319b27 4153 if (locinput != st->u.plus.old) {
5d9a96ca 4154 st->ln = 1; /* Did some */
32fc9b6a 4155 if (regrepeat(rex, scan, st->u.plus.count) < st->u.plus.count)
0fe9bf95
IZ
4156 sayNO;
4157 }
4158 /* PL_reginput == locinput now */
d8319b27 4159 TRYPAREN(st->u.plus.paren, st->ln, locinput, PLUS1);
95b24440 4160 /*** all unsaved local vars undefined at this point */
0fe9bf95 4161 PL_reginput = locinput; /* Could be reset... */
d8319b27 4162 REGCP_UNWIND(st->u.plus.lastcp);
0fe9bf95 4163 /* Couldn't or didn't -- move forward. */
d8319b27 4164 st->u.plus.old = locinput;
1aa99e6b
IH
4165 if (do_utf8)
4166 locinput += UTF8SKIP(locinput);
4167 else
4168 locinput++;
d8319b27 4169 st->u.plus.count = 1;
0fe9bf95
IZ
4170 }
4171 }
4172 else
5d9a96ca 4173 while (n >= st->ln || (n == REG_INFTY && st->ln > 0)) { /* ln overflow ? */
1aa99e6b 4174 UV c;
9e137952 4175 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4176 if (do_utf8)
872c91ae 4177 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4178 UTF8_MAXBYTES, 0,
041457d9 4179 uniflags);
1aa99e6b 4180 else
9041c2e3 4181 c = UCHARAT(PL_reginput);
2390ecbc 4182 /* If it could work, try it. */
d8319b27 4183 if (c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
2390ecbc 4184 {
d8319b27 4185 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS2);
95b24440 4186 /*** all unsaved local vars undefined at this point */
d8319b27 4187 REGCP_UNWIND(st->u.plus.lastcp);
2390ecbc 4188 }
1aa99e6b 4189 }
a0d0e21e 4190 /* If it could work, try it. */
9e137952 4191 else if (st->u.plus.c1 == CHRTEST_VOID)
bbce6d69 4192 {
d8319b27 4193 TRYPAREN(st->u.plus.paren, st->ln, PL_reginput, PLUS3);
95b24440 4194 /*** all unsaved local vars undefined at this point */
d8319b27 4195 REGCP_UNWIND(st->u.plus.lastcp);
bbce6d69 4196 }
c277df42 4197 /* Couldn't or didn't -- move forward. */
a0ed51b3 4198 PL_reginput = locinput;
32fc9b6a 4199 if (regrepeat(rex, scan, 1)) {
5d9a96ca 4200 st->ln++;
a0ed51b3
LW
4201 locinput = PL_reginput;
4202 }
4203 else
4633a7c4 4204 sayNO;
a0d0e21e
LW
4205 }
4206 }
4207 else {
32fc9b6a 4208 n = regrepeat(rex, scan, n);
a0ed51b3 4209 locinput = PL_reginput;
5d9a96ca 4210 if (st->ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4211 (OP(next) != MEOL ||
15272685
HS
4212 OP(next) == SEOL || OP(next) == EOS))
4213 {
5d9a96ca 4214 st->ln = n; /* why back off? */
1aeab75a
GS
4215 /* ...because $ and \Z can match before *and* after
4216 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4217 We should back off by one in this case. */
4218 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
5d9a96ca 4219 st->ln--;
1aeab75a 4220 }
d8319b27 4221 REGCP_SET(st->u.plus.lastcp);
1d5c262f 4222 {
8fa7f367 4223 UV c = 0;
5d9a96ca 4224 while (n >= st->ln) {
9e137952 4225 if (st->u.plus.c1 != CHRTEST_VOID) {
1aa99e6b 4226 if (do_utf8)
872c91ae 4227 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4228 UTF8_MAXBYTES, 0,
041457d9 4229 uniflags);
1aa99e6b 4230 else
9041c2e3 4231 c = UCHARAT(PL_reginput);
1aa99e6b 4232 }
c277df42 4233 /* If it could work, try it. */
9e137952 4234 if (st->u.plus.c1 == CHRTEST_VOID || c == (UV)st->u.plus.c1 || c == (UV)st->u.plus.c2)
c277df42 4235 {
d8319b27 4236 TRYPAREN(st->u.plus.paren, n, PL_reginput, PLUS4);
95b24440 4237 /*** all unsaved local vars undefined at this point */
d8319b27 4238 REGCP_UNWIND(st->u.plus.lastcp);
c277df42
IZ
4239 }
4240 /* Couldn't or didn't -- back up. */
4241 n--;
dfe13c55 4242 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4243 }
a0d0e21e
LW
4244 }
4245 }
4633a7c4 4246 sayNO;
c277df42 4247 break;
a0d0e21e 4248 case END:
3b0527fe 4249 if (locinput < reginfo->till) {
a3621e74 4250 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4251 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4252 PL_colors[4],
4253 (long)(locinput - PL_reg_starttry),
3b0527fe 4254 (long)(reginfo->till - PL_reg_starttry),
7821416a
IZ
4255 PL_colors[5]));
4256 sayNO_FINAL; /* Cannot match: too short. */
4257 }
4258 PL_reginput = locinput; /* put where regtry can find it */
4259 sayYES_FINAL; /* Success! */
dad79028
DM
4260
4261 case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4262 DEBUG_EXECUTE_r(
4263 PerlIO_printf(Perl_debug_log,
4264 "%*s %ssubpattern success...%s\n",
4265 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5]));
3280af22 4266 PL_reginput = locinput; /* put where regtry can find it */
dad79028
DM
4267 sayYES_FINAL; /* Success! */
4268
4269 case SUSPEND: /* (?>FOO) */
4270 st->u.ifmatch.wanted = 1;
9fe1d20c 4271 PL_reginput = locinput;
9041c2e3 4272 goto do_ifmatch;
dad79028
DM
4273
4274 case UNLESSM: /* -ve lookaround: (?!FOO), or with flags, (?<!foo) */
4275 st->u.ifmatch.wanted = 0;
4276 goto ifmatch_trivial_fail_test;
4277
4278 case IFMATCH: /* +ve lookaround: (?=FOO), or with flags, (?<=foo) */
4279 st->u.ifmatch.wanted = 1;
4280 ifmatch_trivial_fail_test:
a0ed51b3 4281 if (scan->flags) {
52657f30 4282 char * const s = HOPBACKc(locinput, scan->flags);
dad79028
DM
4283 if (!s) {
4284 /* trivial fail */
4285 if (st->logical) {
4286 st->logical = 0;
4287 st->sw = 1 - st->u.ifmatch.wanted;
4288 }
4289 else if (st->u.ifmatch.wanted)
4290 sayNO;
4291 next = scan + ARG(scan);
4292 if (next == scan)
4293 next = NULL;
4294 break;
4295 }
efb30f32 4296 PL_reginput = s;
a0ed51b3
LW
4297 }
4298 else
4299 PL_reginput = locinput;
4300
c277df42 4301 do_ifmatch:
dad79028
DM
4302 /* resume to current state on success */
4303 st->u.yes.prev_yes_state = yes_state;
4304 yes_state = st;
4305 PUSH_STATE(newst, resume_IFMATCH);
4306 st = newst;
4307 next = NEXTOPER(NEXTOPER(scan));
4308 break;
4309
c277df42 4310 case LONGJMP:
c277df42
IZ
4311 next = scan + ARG(scan);
4312 if (next == scan)
4313 next = NULL;
a0d0e21e
LW
4314 break;
4315 default:
b900a521 4316 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4317 PTR2UV(scan), OP(scan));
cea2e8a9 4318 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4319 }
95b24440 4320
02db2b7b 4321 reenter:
a0d0e21e 4322 scan = next;
95b24440
DM
4323 continue;
4324 /* NOTREACHED */
4325
4326 /* simulate recursively calling regmatch(), but without actually
4327 * recursing - ie save the current state on the heap rather than on
4328 * the stack, then re-enter the loop. This avoids complex regexes
4329 * blowing the processor stack */
4330
4331 start_recurse:
4332 {
5d9a96ca
DM
4333 /* push new state */
4334 regmatch_state *oldst = st;
4335
4336 depth++;
4337
4338 /* grab the next free state slot */
4339 st++;
86545054 4340 if (st > SLAB_LAST(PL_regmatch_slab))
5d9a96ca
DM
4341 st = S_push_slab(aTHX);
4342 PL_regmatch_state = st;
4343
4344 oldst->next = next;
4345 oldst->n = n;
4346 oldst->locinput = locinput;
5d9a96ca
DM
4347
4348 st->cc = oldst->cc;
95b24440
DM
4349 locinput = PL_reginput;
4350 nextchr = UCHARAT(locinput);
5d9a96ca
DM
4351 st->minmod = 0;
4352 st->sw = 0;
4353 st->logical = 0;
4354 st->unwind = 0;
95b24440
DM
4355#ifdef DEBUGGING
4356 PL_regindent++;
4357#endif
4358 }
a0d0e21e 4359 }
a687059c 4360
aa283a38
DM
4361
4362
a0d0e21e
LW
4363 /*
4364 * We get here only if there's trouble -- normally "case END" is
4365 * the terminating point.
4366 */
cea2e8a9 4367 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4368 /*NOTREACHED*/
4633a7c4
LW
4369 sayNO;
4370
7821416a 4371yes_final:
77cb431f
DM
4372
4373 if (yes_state) {
4374 /* we have successfully completed a subexpression, but we must now
4375 * pop to the state marked by yes_state and continue from there */
4376
dad79028 4377 /*XXX tmp for CURLYM*/
c4fd8992
AL
4378 regmatch_slab * const oslab = PL_regmatch_slab;
4379 regmatch_state * const ost = st;
4380 regmatch_state * const oys = yes_state;
dad79028
DM
4381 int odepth = depth;
4382
77cb431f
DM
4383 assert(st != yes_state);
4384 while (yes_state < SLAB_FIRST(PL_regmatch_slab)
4385 || yes_state > SLAB_LAST(PL_regmatch_slab))
4386 {
4387 /* not in this slab, pop slab */
4388 depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
4389 PL_regmatch_slab = PL_regmatch_slab->prev;
4390 st = SLAB_LAST(PL_regmatch_slab);
4391 }
4392 depth -= (st - yes_state);
dad79028 4393 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "POP STATE TO (%d)\n", depth));
77cb431f
DM
4394 st = yes_state;
4395 yes_state = st->u.yes.prev_yes_state;
4396 PL_regmatch_state = st;
4397
4398 switch (st->resume_state) {
4399 case resume_EVAL:
4400 if (st->u.eval.toggleutf)
4401 PL_reg_flags ^= RF_utf8;
4402 ReREFCNT_dec(rex);
4403 rex = st->u.eval.prev_rex;
4404 /* XXXX This is too dramatic a measure... */
4405 PL_reg_maxiter = 0;
4406 /* Restore parens of the caller without popping the
4407 * savestack */
4408 {
c4fd8992 4409 const I32 tmp = PL_savestack_ix;
77cb431f
DM
4410 PL_savestack_ix = st->u.eval.lastcp;
4411 regcppop(rex);
4412 PL_savestack_ix = tmp;
4413 }
4414 PL_reginput = locinput;
4415 /* continue at the node following the (??{...}) */
4416 next = st->next;
4417 goto reenter;
4418
dad79028
DM
4419 case resume_IFMATCH:
4420 if (st->logical) {
4421 st->logical = 0;
4422 st->sw = st->u.ifmatch.wanted;
4423 }
4424 else if (!st->u.ifmatch.wanted)
4425 sayNO;
4426
4427 if (OP(st->scan) == SUSPEND)
4428 locinput = PL_reginput;
4429 else {
4430 locinput = PL_reginput = st->locinput;
4431 nextchr = UCHARAT(locinput);
4432 }
4433 next = st->scan + ARG(st->scan);
4434 if (next == st->scan)
4435 next = NULL;
4436 goto reenter;
4437
4438 /* XXX tmp don't handle yes_state yet */
4439 case resume_CURLYM1:
4440 case resume_CURLYM2:
4441 case resume_CURLYM3:
dad79028
DM
4442 PL_regmatch_slab =oslab;
4443 st = ost;
4444 PL_regmatch_state = st;
4445 depth = odepth;
4446 yes_state = oys;
4447 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "XXX revering a CURLYM\n"));
4448 goto yes;
4449
77cb431f
DM
4450 default:
4451 Perl_croak(aTHX_ "unexpected yes reume state");
4452 }
4453 }
4454
a3621e74 4455 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
e4584336 4456 PL_colors[4], PL_colors[5]));
4633a7c4
LW
4457yes:
4458#ifdef DEBUGGING
3280af22 4459 PL_regindent--;
4633a7c4 4460#endif
02db2b7b 4461
95b24440 4462 result = 1;
aa283a38 4463 /* XXX this is duplicate(ish) code to that in the do_no section.
77cb431f
DM
4464 * eventually a yes should just pop the stack back to the current
4465 * yes_state */
aa283a38
DM
4466 if (depth) {
4467 /* restore previous state and re-enter */
4468 POP_STATE;
4469
4470 switch (st->resume_state) {
4471 case resume_TRIE1:
4472 goto resume_point_TRIE1;
4473 case resume_TRIE2:
4474 goto resume_point_TRIE2;
aa283a38
DM
4475 case resume_CURLYX:
4476 goto resume_point_CURLYX;
4477 case resume_WHILEM1:
4478 goto resume_point_WHILEM1;
4479 case resume_WHILEM2:
4480 goto resume_point_WHILEM2;
4481 case resume_WHILEM3:
4482 goto resume_point_WHILEM3;
4483 case resume_WHILEM4:
4484 goto resume_point_WHILEM4;
4485 case resume_WHILEM5:
4486 goto resume_point_WHILEM5;
4487 case resume_WHILEM6:
4488 goto resume_point_WHILEM6;
4489 case resume_CURLYM1:
4490 goto resume_point_CURLYM1;
4491 case resume_CURLYM2:
4492 goto resume_point_CURLYM2;
4493 case resume_CURLYM3:
4494 goto resume_point_CURLYM3;
aa283a38
DM
4495 case resume_PLUS1:
4496 goto resume_point_PLUS1;
4497 case resume_PLUS2:
4498 goto resume_point_PLUS2;
4499 case resume_PLUS3:
4500 goto resume_point_PLUS3;
4501 case resume_PLUS4:
4502 goto resume_point_PLUS4;
77cb431f 4503
dad79028 4504 case resume_IFMATCH:
77cb431f 4505 case resume_EVAL:
aa283a38
DM
4506 default:
4507 Perl_croak(aTHX_ "regexp resume memory corruption");
4508 }
4509 }
4510 goto final_exit;
4633a7c4
LW
4511
4512no:
a3621e74 4513 DEBUG_EXECUTE_r(
7821416a
IZ
4514 PerlIO_printf(Perl_debug_log,
4515 "%*s %sfailed...%s\n",
e4584336 4516 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4], PL_colors[5])
7821416a
IZ
4517 );
4518 goto do_no;
4519no_final:
4520do_no:
5d9a96ca
DM
4521 if (st->unwind) {
4522 re_unwind_t * const uw = SSPTRt(st->unwind,re_unwind_t);
02db2b7b
IZ
4523
4524 switch (uw->type) {
4525 case RE_UNWIND_BRANCH:
4526 case RE_UNWIND_BRANCHJ:
4527 {
6136c704 4528 re_unwind_branch_t * const uwb = &(uw->branch);
a3b680e6 4529 const I32 lastparen = uwb->lastparen;
9041c2e3 4530
02db2b7b
IZ
4531 REGCP_UNWIND(uwb->lastcp);
4532 for (n = *PL_reglastparen; n > lastparen; n--)
4533 PL_regendp[n] = -1;
4534 *PL_reglastparen = n;
4535 scan = next = uwb->next;
3a2830be 4536 st->minmod = uwb->minmod;
9041c2e3
NIS
4537 if ( !scan ||
4538 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b 4539 ? BRANCH : BRANCHJ) ) { /* Failure */
5d9a96ca 4540 st->unwind = uwb->prev;
02db2b7b
IZ
4541#ifdef DEBUGGING
4542 PL_regindent--;
4543#endif
4544 goto do_no;
4545 }
4546 /* Have more choice yet. Reuse the same uwb. */
02db2b7b
IZ
4547 if ((n = (uwb->type == RE_UNWIND_BRANCH
4548 ? NEXT_OFF(next) : ARG(next))))
4549 next += n;
4550 else
4551 next = NULL; /* XXXX Needn't unwinding in this case... */
4552 uwb->next = next;
4553 next = NEXTOPER(scan);
4554 if (uwb->type == RE_UNWIND_BRANCHJ)
4555 next = NEXTOPER(next);
4556 locinput = uwb->locinput;
4557 nextchr = uwb->nextchr;
4558#ifdef DEBUGGING
4559 PL_regindent = uwb->regindent;
4560#endif
4561
4562 goto reenter;
4563 }
5f66b61c 4564 /* NOTREACHED */
02db2b7b
IZ
4565 default:
4566 Perl_croak(aTHX_ "regexp unwind memory corruption");
4567 }
5f66b61c 4568 /* NOTREACHED */
02db2b7b 4569 }
aa283a38 4570
4633a7c4 4571#ifdef DEBUGGING
3280af22 4572 PL_regindent--;
4633a7c4 4573#endif
95b24440 4574 result = 0;
5d9a96ca 4575
aa283a38
DM
4576 if (depth) {
4577 /* there's a previous state to backtrack to */
4578 POP_STATE;
5d9a96ca 4579 switch (st->resume_state) {
95b24440
DM
4580 case resume_TRIE1:
4581 goto resume_point_TRIE1;
4582 case resume_TRIE2:
4583 goto resume_point_TRIE2;
aa283a38
DM
4584 case resume_EVAL:
4585 /* we have failed an (??{...}). Restore state to the outer re
4586 * then re-throw the failure */
4587 if (st->u.eval.toggleutf)
4588 PL_reg_flags ^= RF_utf8;
4589 ReREFCNT_dec(rex);
4590 rex = st->u.eval.prev_rex;
77cb431f 4591 yes_state = st->u.yes.prev_yes_state;
aa283a38
DM
4592
4593 /* XXXX This is too dramatic a measure... */
4594 PL_reg_maxiter = 0;
4595
4596 PL_reginput = locinput;
4597 REGCP_UNWIND(st->u.eval.lastcp);
4598 regcppop(rex);
4599 goto do_no;
4600
95b24440
DM
4601 case resume_CURLYX:
4602 goto resume_point_CURLYX;
4603 case resume_WHILEM1:
4604 goto resume_point_WHILEM1;
4605 case resume_WHILEM2:
4606 goto resume_point_WHILEM2;
4607 case resume_WHILEM3:
4608 goto resume_point_WHILEM3;
4609 case resume_WHILEM4:
4610 goto resume_point_WHILEM4;
4611 case resume_WHILEM5:
4612 goto resume_point_WHILEM5;
4613 case resume_WHILEM6:
4614 goto resume_point_WHILEM6;
4615 case resume_CURLYM1:
4616 goto resume_point_CURLYM1;
4617 case resume_CURLYM2:
4618 goto resume_point_CURLYM2;
4619 case resume_CURLYM3:
4620 goto resume_point_CURLYM3;
95b24440 4621 case resume_IFMATCH:
dad79028
DM
4622 yes_state = st->u.yes.prev_yes_state;
4623 if (st->logical) {
4624 st->logical = 0;
4625 st->sw = !st->u.ifmatch.wanted;
4626 }
4627 else if (st->u.ifmatch.wanted)
4628 sayNO;
4629
4630 assert(OP(scan) != SUSPEND); /* XXX DAPM tmp */
4631 locinput = PL_reginput = st->locinput;
4632 nextchr = UCHARAT(locinput);
4633 next = scan + ARG(scan);
4634 if (next == scan)
4635 next = NULL;
4636 goto reenter;
4637
95b24440
DM
4638 case resume_PLUS1:
4639 goto resume_point_PLUS1;
4640 case resume_PLUS2:
4641 goto resume_point_PLUS2;
4642 case resume_PLUS3:
4643 goto resume_point_PLUS3;
4644 case resume_PLUS4:
4645 goto resume_point_PLUS4;
95b24440
DM
4646 default:
4647 Perl_croak(aTHX_ "regexp resume memory corruption");
4648 }
95b24440 4649 }
aa283a38
DM
4650
4651final_exit:
4652
5d9a96ca
DM
4653 /* restore original high-water mark */
4654 PL_regmatch_slab = orig_slab;
4655 PL_regmatch_state = orig_state;
4656
4657 /* free all slabs above current one */
4658 if (orig_slab->next) {
c4fd8992 4659 regmatch_slab *sl = orig_slab->next;
5d9a96ca
DM
4660 orig_slab->next = NULL;
4661 while (sl) {
c4fd8992 4662 regmatch_slab * const osl = sl;
5d9a96ca 4663 sl = sl->next;
ad65c075 4664 Safefree(osl);
5d9a96ca
DM
4665 }
4666 }
4667
95b24440
DM
4668 return result;
4669
a687059c
LW
4670}
4671
4672/*
4673 - regrepeat - repeatedly match something simple, report how many
4674 */
4675/*
4676 * [This routine now assumes that it will only match on things of length 1.
4677 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4678 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4679 */
76e3520e 4680STATIC I32
32fc9b6a 4681S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
a687059c 4682{
27da23d5 4683 dVAR;
a0d0e21e 4684 register char *scan;
a0d0e21e 4685 register I32 c;
3280af22 4686 register char *loceol = PL_regeol;
a0ed51b3 4687 register I32 hardcount = 0;
53c4c00c 4688 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4689
3280af22 4690 scan = PL_reginput;
faf11cac
HS
4691 if (max == REG_INFTY)
4692 max = I32_MAX;
4693 else if (max < loceol - scan)
7f596f4c 4694 loceol = scan + max;
a0d0e21e 4695 switch (OP(p)) {
22c35a8c 4696 case REG_ANY:
1aa99e6b 4697 if (do_utf8) {
ffc61ed2 4698 loceol = PL_regeol;
1aa99e6b 4699 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4700 scan += UTF8SKIP(scan);
4701 hardcount++;
4702 }
4703 } else {
4704 while (scan < loceol && *scan != '\n')
4705 scan++;
a0ed51b3
LW
4706 }
4707 break;
ffc61ed2 4708 case SANY:
def8e4ea
JH
4709 if (do_utf8) {
4710 loceol = PL_regeol;
a0804c9e 4711 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4712 scan += UTF8SKIP(scan);
4713 hardcount++;
4714 }
4715 }
4716 else
4717 scan = loceol;
a0ed51b3 4718 break;
f33976b4
DB
4719 case CANY:
4720 scan = loceol;
4721 break;
090f7165
JH
4722 case EXACT: /* length of string is 1 */
4723 c = (U8)*STRING(p);
4724 while (scan < loceol && UCHARAT(scan) == c)
4725 scan++;
bbce6d69 4726 break;
4727 case EXACTF: /* length of string is 1 */
cd439c50 4728 c = (U8)*STRING(p);
bbce6d69 4729 while (scan < loceol &&
22c35a8c 4730 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4731 scan++;
4732 break;
4733 case EXACTFL: /* length of string is 1 */
3280af22 4734 PL_reg_flags |= RF_tainted;
cd439c50 4735 c = (U8)*STRING(p);
bbce6d69 4736 while (scan < loceol &&
22c35a8c 4737 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4738 scan++;
4739 break;
4740 case ANYOF:
ffc61ed2
JH
4741 if (do_utf8) {
4742 loceol = PL_regeol;
cfc92286 4743 while (hardcount < max && scan < loceol &&
32fc9b6a 4744 reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4745 scan += UTF8SKIP(scan);
4746 hardcount++;
4747 }
4748 } else {
32fc9b6a 4749 while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
ffc61ed2
JH
4750 scan++;
4751 }
a0d0e21e
LW
4752 break;
4753 case ALNUM:
1aa99e6b 4754 if (do_utf8) {
ffc61ed2 4755 loceol = PL_regeol;
1a4fad37 4756 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4757 while (hardcount < max && scan < loceol &&
3568d838 4758 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4759 scan += UTF8SKIP(scan);
4760 hardcount++;
4761 }
4762 } else {
4763 while (scan < loceol && isALNUM(*scan))
4764 scan++;
a0ed51b3
LW
4765 }
4766 break;
bbce6d69 4767 case ALNUML:
3280af22 4768 PL_reg_flags |= RF_tainted;
1aa99e6b 4769 if (do_utf8) {
ffc61ed2 4770 loceol = PL_regeol;
1aa99e6b
IH
4771 while (hardcount < max && scan < loceol &&
4772 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4773 scan += UTF8SKIP(scan);
4774 hardcount++;
4775 }
4776 } else {
4777 while (scan < loceol && isALNUM_LC(*scan))
4778 scan++;
a0ed51b3
LW
4779 }
4780 break;
a0d0e21e 4781 case NALNUM:
1aa99e6b 4782 if (do_utf8) {
ffc61ed2 4783 loceol = PL_regeol;
1a4fad37 4784 LOAD_UTF8_CHARCLASS_ALNUM();
1aa99e6b 4785 while (hardcount < max && scan < loceol &&
3568d838 4786 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4787 scan += UTF8SKIP(scan);
4788 hardcount++;
4789 }
4790 } else {
4791 while (scan < loceol && !isALNUM(*scan))
4792 scan++;
a0ed51b3
LW
4793 }
4794 break;
bbce6d69 4795 case NALNUML:
3280af22 4796 PL_reg_flags |= RF_tainted;
1aa99e6b 4797 if (do_utf8) {
ffc61ed2 4798 loceol = PL_regeol;
1aa99e6b
IH
4799 while (hardcount < max && scan < loceol &&
4800 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4801 scan += UTF8SKIP(scan);
4802 hardcount++;
4803 }
4804 } else {
4805 while (scan < loceol && !isALNUM_LC(*scan))
4806 scan++;
a0ed51b3
LW
4807 }
4808 break;
a0d0e21e 4809 case SPACE:
1aa99e6b 4810 if (do_utf8) {
ffc61ed2 4811 loceol = PL_regeol;
1a4fad37 4812 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4813 while (hardcount < max && scan < loceol &&
3568d838
JH
4814 (*scan == ' ' ||
4815 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4816 scan += UTF8SKIP(scan);
4817 hardcount++;
4818 }
4819 } else {
4820 while (scan < loceol && isSPACE(*scan))
4821 scan++;
a0ed51b3
LW
4822 }
4823 break;
bbce6d69 4824 case SPACEL:
3280af22 4825 PL_reg_flags |= RF_tainted;
1aa99e6b 4826 if (do_utf8) {
ffc61ed2 4827 loceol = PL_regeol;
1aa99e6b 4828 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4829 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4830 scan += UTF8SKIP(scan);
4831 hardcount++;
4832 }
4833 } else {
4834 while (scan < loceol && isSPACE_LC(*scan))
4835 scan++;
a0ed51b3
LW
4836 }
4837 break;
a0d0e21e 4838 case NSPACE:
1aa99e6b 4839 if (do_utf8) {
ffc61ed2 4840 loceol = PL_regeol;
1a4fad37 4841 LOAD_UTF8_CHARCLASS_SPACE();
1aa99e6b 4842 while (hardcount < max && scan < loceol &&
3568d838
JH
4843 !(*scan == ' ' ||
4844 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4845 scan += UTF8SKIP(scan);
4846 hardcount++;
4847 }
4848 } else {
4849 while (scan < loceol && !isSPACE(*scan))
4850 scan++;
4851 break;
a0ed51b3 4852 }
bbce6d69 4853 case NSPACEL:
3280af22 4854 PL_reg_flags |= RF_tainted;
1aa99e6b 4855 if (do_utf8) {
ffc61ed2 4856 loceol = PL_regeol;
1aa99e6b 4857 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4858 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4859 scan += UTF8SKIP(scan);
4860 hardcount++;
4861 }
4862 } else {
4863 while (scan < loceol && !isSPACE_LC(*scan))
4864 scan++;
a0ed51b3
LW
4865 }
4866 break;
a0d0e21e 4867 case DIGIT:
1aa99e6b 4868 if (do_utf8) {
ffc61ed2 4869 loceol = PL_regeol;
1a4fad37 4870 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4871 while (hardcount < max && scan < loceol &&
3568d838 4872 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4873 scan += UTF8SKIP(scan);
4874 hardcount++;
4875 }
4876 } else {
4877 while (scan < loceol && isDIGIT(*scan))
4878 scan++;
a0ed51b3
LW
4879 }
4880 break;
a0d0e21e 4881 case NDIGIT:
1aa99e6b 4882 if (do_utf8) {
ffc61ed2 4883 loceol = PL_regeol;
1a4fad37 4884 LOAD_UTF8_CHARCLASS_DIGIT();
1aa99e6b 4885 while (hardcount < max && scan < loceol &&
3568d838 4886 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4887 scan += UTF8SKIP(scan);
4888 hardcount++;
4889 }
4890 } else {
4891 while (scan < loceol && !isDIGIT(*scan))
4892 scan++;
a0ed51b3
LW
4893 }
4894 break;
a0d0e21e
LW
4895 default: /* Called on something of 0 width. */
4896 break; /* So match right here or not at all. */
4897 }
a687059c 4898
a0ed51b3
LW
4899 if (hardcount)
4900 c = hardcount;
4901 else
4902 c = scan - PL_reginput;
3280af22 4903 PL_reginput = scan;
a687059c 4904
a3621e74 4905 DEBUG_r({
ab74612d 4906 SV *re_debug_flags = NULL;
6136c704 4907 SV * const prop = sv_newmortal();
a3621e74
YO
4908 GET_RE_DEBUG_FLAGS;
4909 DEBUG_EXECUTE_r({
32fc9b6a 4910 regprop(prog, prop, p);
9041c2e3
NIS
4911 PerlIO_printf(Perl_debug_log,
4912 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
3f7c398e 4913 REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
c277df42 4914 });
a3621e74 4915 });
9041c2e3 4916
a0d0e21e 4917 return(c);
a687059c
LW
4918}
4919
c277df42 4920
76234dfb 4921#ifndef PERL_IN_XSUB_RE
c277df42 4922/*
ffc61ed2
JH
4923- regclass_swash - prepare the utf8 swash
4924*/
4925
4926SV *
32fc9b6a 4927Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4928{
97aff369 4929 dVAR;
9e55ce06
JH
4930 SV *sw = NULL;
4931 SV *si = NULL;
4932 SV *alt = NULL;
32fc9b6a 4933 const struct reg_data *data = prog ? prog->data : NULL;
ffc61ed2 4934
4f639d21 4935 if (data && data->count) {
a3b680e6 4936 const U32 n = ARG(node);
ffc61ed2 4937
4f639d21
DM
4938 if (data->what[n] == 's') {
4939 SV * const rv = (SV*)data->data[n];
890ce7af 4940 AV * const av = (AV*)SvRV((SV*)rv);
2d03de9c 4941 SV **const ary = AvARRAY(av);
9e55ce06 4942 SV **a, **b;
9041c2e3 4943
711a919c 4944 /* See the end of regcomp.c:S_regclass() for
9e55ce06
JH
4945 * documentation of these array elements. */
4946
b11f357e 4947 si = *ary;
8f7f7219 4948 a = SvROK(ary[1]) ? &ary[1] : 0;
b11f357e
JH
4949 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4950
ffc61ed2
JH
4951 if (a)
4952 sw = *a;
4953 else if (si && doinit) {
4954 sw = swash_init("utf8", "", si, 1, 0);
4955 (void)av_store(av, 1, sw);
4956 }
9e55ce06
JH
4957 if (b)
4958 alt = *b;
ffc61ed2
JH
4959 }
4960 }
4961
9e55ce06
JH
4962 if (listsvp)
4963 *listsvp = si;
4964 if (altsvp)
4965 *altsvp = alt;
ffc61ed2
JH
4966
4967 return sw;
4968}
76234dfb 4969#endif
ffc61ed2
JH
4970
4971/*
ba7b4546 4972 - reginclass - determine if a character falls into a character class
832705d4
JH
4973
4974 The n is the ANYOF regnode, the p is the target string, lenp
4975 is pointer to the maximum length of how far to go in the p
4976 (if the lenp is zero, UTF8SKIP(p) is used),
4977 do_utf8 tells whether the target string is in UTF-8.
4978
bbce6d69 4979 */
4980
76e3520e 4981STATIC bool
32fc9b6a 4982S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4983{
27da23d5 4984 dVAR;
a3b680e6 4985 const char flags = ANYOF_FLAGS(n);
bbce6d69 4986 bool match = FALSE;
cc07378b 4987 UV c = *p;
ae9ddab8 4988 STRLEN len = 0;
9e55ce06 4989 STRLEN plen;
1aa99e6b 4990
19f67299
TS
4991 if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
4992 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
4ad0818d
DM
4993 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
4994 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
19f67299
TS
4995 if (len == (STRLEN)-1)
4996 Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
4997 }
bbce6d69 4998
0f0076b4 4999 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 5000 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
5001 if (lenp)
5002 *lenp = 0;
ffc61ed2 5003 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
5004 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5005 match = TRUE;
bbce6d69 5006 }
3568d838 5007 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 5008 match = TRUE;
ffc61ed2 5009 if (!match) {
9e55ce06 5010 AV *av;
32fc9b6a 5011 SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
5012
5013 if (sw) {
3568d838 5014 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
5015 match = TRUE;
5016 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
5017 if (!match && lenp && av) {
5018 I32 i;
9e55ce06 5019 for (i = 0; i <= av_len(av); i++) {
890ce7af 5020 SV* const sv = *av_fetch(av, i, FALSE);
9e55ce06 5021 STRLEN len;
890ce7af 5022 const char * const s = SvPV_const(sv, len);
9e55ce06 5023
061b10df 5024 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
5025 *lenp = len;
5026 match = TRUE;
5027 break;
5028 }
5029 }
5030 }
5031 if (!match) {
89ebb4a3 5032 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
5033 STRLEN tmplen;
5034
9e55ce06
JH
5035 to_utf8_fold(p, tmpbuf, &tmplen);
5036 if (swash_fetch(sw, tmpbuf, do_utf8))
5037 match = TRUE;
5038 }
ffc61ed2
JH
5039 }
5040 }
bbce6d69 5041 }
9e55ce06 5042 if (match && lenp && *lenp == 0)
0f0076b4 5043 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 5044 }
1aa99e6b 5045 if (!match && c < 256) {
ffc61ed2
JH
5046 if (ANYOF_BITMAP_TEST(n, c))
5047 match = TRUE;
5048 else if (flags & ANYOF_FOLD) {
eb160463 5049 U8 f;
a0ed51b3 5050
ffc61ed2
JH
5051 if (flags & ANYOF_LOCALE) {
5052 PL_reg_flags |= RF_tainted;
5053 f = PL_fold_locale[c];
5054 }
5055 else
5056 f = PL_fold[c];
5057 if (f != c && ANYOF_BITMAP_TEST(n, f))
5058 match = TRUE;
5059 }
5060
5061 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 5062 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
5063 if (
5064 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
5065 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
5066 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
5067 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
5068 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
5069 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
5070 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
5071 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5072 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
5073 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
5074 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
5075 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
5076 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
5077 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
5078 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
5079 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
5080 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
5081 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
5082 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
5083 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
5084 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
5085 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
5086 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
5087 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
5088 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
5089 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
5090 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
5091 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
5092 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
5093 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
5094 ) /* How's that for a conditional? */
5095 {
5096 match = TRUE;
5097 }
a0ed51b3 5098 }
a0ed51b3
LW
5099 }
5100
a0ed51b3
LW
5101 return (flags & ANYOF_INVERT) ? !match : match;
5102}
161b471a 5103
dfe13c55 5104STATIC U8 *
5f66b61c 5105S_reghop3(U8 *s, I32 off, U8* lim)
9041c2e3 5106{
97aff369 5107 dVAR;
a0ed51b3 5108 if (off >= 0) {
1aa99e6b 5109 while (off-- && s < lim) {
ffc61ed2 5110 /* XXX could check well-formedness here */
a0ed51b3 5111 s += UTF8SKIP(s);
ffc61ed2 5112 }
a0ed51b3
LW
5113 }
5114 else {
5115 while (off++) {
1aa99e6b 5116 if (s > lim) {
a0ed51b3 5117 s--;
ffc61ed2 5118 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5119 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5120 s--;
ffc61ed2
JH
5121 }
5122 /* XXX could check well-formedness here */
a0ed51b3
LW
5123 }
5124 }
5125 }
5126 return s;
5127}
161b471a 5128
dfe13c55 5129STATIC U8 *
5f66b61c 5130S_reghopmaybe3(U8* s, I32 off, U8* lim)
a0ed51b3 5131{
97aff369 5132 dVAR;
a0ed51b3 5133 if (off >= 0) {
1aa99e6b 5134 while (off-- && s < lim) {
ffc61ed2 5135 /* XXX could check well-formedness here */
a0ed51b3 5136 s += UTF8SKIP(s);
ffc61ed2 5137 }
a0ed51b3
LW
5138 if (off >= 0)
5139 return 0;
5140 }
5141 else {
5142 while (off++) {
1aa99e6b 5143 if (s > lim) {
a0ed51b3 5144 s--;
ffc61ed2 5145 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 5146 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 5147 s--;
ffc61ed2
JH
5148 }
5149 /* XXX could check well-formedness here */
a0ed51b3
LW
5150 }
5151 else
5152 break;
5153 }
5154 if (off <= 0)
5155 return 0;
5156 }
5157 return s;
5158}
51371543 5159
51371543 5160static void
acfe0abc 5161restore_pos(pTHX_ void *arg)
51371543 5162{
97aff369 5163 dVAR;
097eb12c 5164 regexp * const rex = (regexp *)arg;
51371543
GS
5165 if (PL_reg_eval_set) {
5166 if (PL_reg_oldsaved) {
4f639d21
DM
5167 rex->subbeg = PL_reg_oldsaved;
5168 rex->sublen = PL_reg_oldsavedlen;
f8c7b90f 5169#ifdef PERL_OLD_COPY_ON_WRITE
4f639d21 5170 rex->saved_copy = PL_nrs;
ed252734 5171#endif
4f639d21 5172 RX_MATCH_COPIED_on(rex);
51371543
GS
5173 }
5174 PL_reg_magic->mg_len = PL_reg_oldpos;
5175 PL_reg_eval_set = 0;
5176 PL_curpm = PL_reg_oldcurpm;
5177 }
5178}
33b8afdf
JH
5179
5180STATIC void
5181S_to_utf8_substr(pTHX_ register regexp *prog)
5182{
33b8afdf 5183 if (prog->float_substr && !prog->float_utf8) {
097eb12c
AL
5184 SV* const sv = newSVsv(prog->float_substr);
5185 prog->float_utf8 = sv;
33b8afdf
JH
5186 sv_utf8_upgrade(sv);
5187 if (SvTAIL(prog->float_substr))
5188 SvTAIL_on(sv);
5189 if (prog->float_substr == prog->check_substr)
5190 prog->check_utf8 = sv;
5191 }
5192 if (prog->anchored_substr && !prog->anchored_utf8) {
097eb12c
AL
5193 SV* const sv = newSVsv(prog->anchored_substr);
5194 prog->anchored_utf8 = sv;
33b8afdf
JH
5195 sv_utf8_upgrade(sv);
5196 if (SvTAIL(prog->anchored_substr))
5197 SvTAIL_on(sv);
5198 if (prog->anchored_substr == prog->check_substr)
5199 prog->check_utf8 = sv;
5200 }
5201}
5202
5203STATIC void
5204S_to_byte_substr(pTHX_ register regexp *prog)
5205{
97aff369 5206 dVAR;
33b8afdf 5207 if (prog->float_utf8 && !prog->float_substr) {
097eb12c
AL
5208 SV* sv = newSVsv(prog->float_utf8);
5209 prog->float_substr = sv;
33b8afdf
JH
5210 if (sv_utf8_downgrade(sv, TRUE)) {
5211 if (SvTAIL(prog->float_utf8))
5212 SvTAIL_on(sv);
5213 } else {
5214 SvREFCNT_dec(sv);
5215 prog->float_substr = sv = &PL_sv_undef;
5216 }
5217 if (prog->float_utf8 == prog->check_utf8)
5218 prog->check_substr = sv;
5219 }
5220 if (prog->anchored_utf8 && !prog->anchored_substr) {
097eb12c
AL
5221 SV* sv = newSVsv(prog->anchored_utf8);
5222 prog->anchored_substr = sv;
33b8afdf
JH
5223 if (sv_utf8_downgrade(sv, TRUE)) {
5224 if (SvTAIL(prog->anchored_utf8))
5225 SvTAIL_on(sv);
5226 } else {
5227 SvREFCNT_dec(sv);
5228 prog->anchored_substr = sv = &PL_sv_undef;
5229 }
5230 if (prog->anchored_utf8 == prog->check_utf8)
5231 prog->check_substr = sv;
5232 }
5233}
66610fdd
RGS
5234
5235/*
5236 * Local variables:
5237 * c-indentation-style: bsd
5238 * c-basic-offset: 4
5239 * indent-tabs-mode: t
5240 * End:
5241 *
37442d52
RGS
5242 * ex: set ts=8 sts=4 sw=4 noet:
5243 */