This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove IPC::Run for 5.9.2
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
61296642
DM
8/* This file contains functions for executing a regular expression. See
9 * also regcomp.c which funnily enough, contains functions for compiling
166f8a29 10 * a regular expression.
e4a054ea
DM
11 *
12 * This file is also copied at build time to ext/re/re_exec.c, where
13 * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
14 * This causes the main functions to be compiled under new names and with
15 * debugging support added, which makes "use re 'debug'" work.
16
166f8a29
DM
17 */
18
a687059c
LW
19/* NOTE: this is derived from Henry Spencer's regexp code, and should not
20 * confused with the original package (see point 3 below). Thanks, Henry!
21 */
22
23/* Additional note: this code is very heavily munged from Henry's version
24 * in places. In some spots I've traded clarity for efficiency, so don't
25 * blame Henry for some of the lack of readability.
26 */
27
e50aee73
AD
28/* The names of the functions have been changed from regcomp and
29 * regexec to pregcomp and pregexec in order to avoid conflicts
30 * with the POSIX routines of the same names.
31*/
32
b9d5759e
AD
33#ifdef PERL_EXT_RE_BUILD
34/* need to replace pregcomp et al, so enable that */
35# ifndef PERL_IN_XSUB_RE
36# define PERL_IN_XSUB_RE
37# endif
38/* need access to debugger hooks */
cad2e5aa 39# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
40# define DEBUGGING
41# endif
42#endif
43
44#ifdef PERL_IN_XSUB_RE
d06ea78c 45/* We *really* need to overwrite these symbols: */
56953603
IZ
46# define Perl_regexec_flags my_regexec
47# define Perl_regdump my_regdump
48# define Perl_regprop my_regprop
cad2e5aa 49# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
50/* *These* symbols are masked to allow static link. */
51# define Perl_pregexec my_pregexec
9041c2e3 52# define Perl_reginitcolors my_reginitcolors
490a3f88 53# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
54
55# define PERL_NO_GET_CONTEXT
9041c2e3 56#endif
56953603 57
f0fcb552 58/*SUPPRESS 112*/
a687059c 59/*
e50aee73 60 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
61 *
62 * Copyright (c) 1986 by University of Toronto.
63 * Written by Henry Spencer. Not derived from licensed software.
64 *
65 * Permission is granted to anyone to use this software for any
66 * purpose on any computer system, and to redistribute it freely,
67 * subject to the following restrictions:
68 *
69 * 1. The author is not responsible for the consequences of use of
70 * this software, no matter how awful, even if they arise
71 * from defects in it.
72 *
73 * 2. The origin of this software must not be misrepresented, either
74 * by explicit claim or by omission.
75 *
76 * 3. Altered versions must be plainly marked as such, and must not
77 * be misrepresented as being the original software.
78 *
79 **** Alterations to Henry's code are...
80 ****
4bb101f2 81 **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
f2b990bf 82 **** 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 83 ****
9ef589d8
LW
84 **** You may distribute under the terms of either the GNU General Public
85 **** License or the Artistic License, as specified in the README file.
a687059c
LW
86 *
87 * Beware that some of this code is subtly aware of the way operator
88 * precedence is structured in regular expressions. Serious changes in
89 * regular-expression syntax might require a total rethink.
90 */
91#include "EXTERN.h"
864dbfa3 92#define PERL_IN_REGEXEC_C
a687059c 93#include "perl.h"
0f5d15d6 94
a687059c
LW
95#include "regcomp.h"
96
c277df42
IZ
97#define RF_tainted 1 /* tainted information used? */
98#define RF_warned 2 /* warned about big count? */
ce862d02 99#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3 100#define RF_utf8 8 /* String contains multibyte chars? */
69a95ace 101#define RF_false 16 /* odd number of nested negatives */
a0ed51b3 102
eb160463 103#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
104
105#define RS_init 1 /* eval environment created */
106#define RS_set 2 /* replsv value is set */
c277df42 107
a687059c
LW
108#ifndef STATIC
109#define STATIC static
110#endif
111
ba7b4546 112#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 113
c277df42
IZ
114/*
115 * Forwards.
116 */
117
33b8afdf 118#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 119#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 120
dfe13c55
GS
121#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
122#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
123#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
124#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
125#define HOPc(pos,off) ((char*)HOP(pos,off))
126#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 127
efb30f32 128#define HOPBACK(pos, off) ( \
e54858b0 129 (PL_reg_match_utf8) \
efb30f32
HS
130 ? reghopmaybe((U8*)pos, -off) \
131 : (pos - off >= PL_bostr) \
132 ? (U8*)(pos - off) \
133 : (U8*)NULL \
134)
135#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
136
1aa99e6b
IH
137#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
138#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
139#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
140#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
141#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
142#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
143
bfed75c6 144#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((const U8*)b); LEAVE; } } STMT_END
51371543 145
5f80c4cf 146/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
147#define JUMPABLE(rn) ( \
148 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
149 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
150 OP(rn) == PLUS || OP(rn) == MINMOD || \
151 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
152)
153
cca55fe3
JP
154#define HAS_TEXT(rn) ( \
155 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
156)
e2d8ce26 157
a84d97b6
HS
158/*
159 Search for mandatory following text node; for lookahead, the text must
160 follow but for lookbehind (rn->flags != 0) we skip to the next step.
161*/
cca55fe3 162#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 163 while (JUMPABLE(rn)) \
a84d97b6 164 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 165 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
166 else if (OP(rn) == PLUS) \
167 rn = NEXTOPER(rn); \
a84d97b6
HS
168 else if (OP(rn) == IFMATCH) \
169 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 170 else rn += NEXT_OFF(rn); \
5f80c4cf 171} STMT_END
74750237 172
acfe0abc 173static void restore_pos(pTHX_ void *arg);
51371543 174
76e3520e 175STATIC CHECKPOINT
cea2e8a9 176S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 177{
3280af22 178 int retval = PL_savestack_ix;
b1ce53c5
JH
179#define REGCP_PAREN_ELEMS 4
180 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
181 int p;
182
e49a9654
IH
183 if (paren_elems_to_push < 0)
184 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
185
a01268b5 186#define REGCP_OTHER_ELEMS 6
4b3c1a47 187 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 188 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 189/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
190 SSPUSHINT(PL_regendp[p]);
191 SSPUSHINT(PL_regstartp[p]);
3280af22 192 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
193 SSPUSHINT(p);
194 }
b1ce53c5 195/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
196 SSPUSHINT(PL_regsize);
197 SSPUSHINT(*PL_reglastparen);
a01268b5 198 SSPUSHINT(*PL_reglastcloseparen);
3280af22 199 SSPUSHPTR(PL_reginput);
41123dfd
JH
200#define REGCP_FRAME_ELEMS 2
201/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
202 * are needed for the regexp context stack bookkeeping. */
203 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 204 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 205
a0d0e21e
LW
206 return retval;
207}
208
c277df42 209/* These are needed since we do not localize EVAL nodes: */
a3621e74 210# define REGCP_SET(cp) DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, \
faccc32b 211 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 212 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 213
a3621e74 214# define REGCP_UNWIND(cp) DEBUG_EXECUTE_r(cp != PL_savestack_ix ? \
c3464db5 215 PerlIO_printf(Perl_debug_log, \
faccc32b 216 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 217 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 218
76e3520e 219STATIC char *
cea2e8a9 220S_regcppop(pTHX)
a0d0e21e 221{
b1ce53c5 222 I32 i;
a0d0e21e
LW
223 U32 paren = 0;
224 char *input;
cf93c79d 225 I32 tmps;
b1ce53c5 226
a3621e74
YO
227 GET_RE_DEBUG_FLAGS_DECL;
228
b1ce53c5 229 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 230 i = SSPOPINT;
b1ce53c5
JH
231 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
232 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 233 input = (char *) SSPOPPTR;
a01268b5 234 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
235 *PL_reglastparen = SSPOPINT;
236 PL_regsize = SSPOPINT;
b1ce53c5
JH
237
238 /* Now restore the parentheses context. */
41123dfd
JH
239 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
240 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 241 paren = (U32)SSPOPINT;
3280af22 242 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
243 PL_regstartp[paren] = SSPOPINT;
244 tmps = SSPOPINT;
3280af22
NIS
245 if (paren <= *PL_reglastparen)
246 PL_regendp[paren] = tmps;
a3621e74 247 DEBUG_EXECUTE_r(
c3464db5 248 PerlIO_printf(Perl_debug_log,
b900a521 249 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 250 (UV)paren, (IV)PL_regstartp[paren],
b900a521 251 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 252 (IV)PL_regendp[paren],
3280af22 253 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 254 );
a0d0e21e 255 }
a3621e74 256 DEBUG_EXECUTE_r(
eb160463 257 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 258 PerlIO_printf(Perl_debug_log,
faccc32b
JH
259 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
260 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
261 }
262 );
daf18116 263#if 1
dafc8851
JH
264 /* It would seem that the similar code in regtry()
265 * already takes care of this, and in fact it is in
266 * a better location to since this code can #if 0-ed out
267 * but the code in regtry() is needed or otherwise tests
268 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
269 * (as of patchlevel 7877) will fail. Then again,
270 * this code seems to be necessary or otherwise
271 * building DynaLoader will fail:
272 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
273 * --jhi */
eb160463
GS
274 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
275 if ((I32)paren > PL_regsize)
cf93c79d
IZ
276 PL_regstartp[paren] = -1;
277 PL_regendp[paren] = -1;
a0d0e21e 278 }
dafc8851 279#endif
a0d0e21e
LW
280 return input;
281}
282
0f5d15d6 283STATIC char *
cea2e8a9 284S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
285{
286 I32 tmp = PL_savestack_ix;
287
288 PL_savestack_ix = ss;
289 regcppop();
290 PL_savestack_ix = tmp;
942e002e 291 return Nullch;
0f5d15d6
IZ
292}
293
294typedef struct re_cc_state
295{
296 I32 ss;
297 regnode *node;
298 struct re_cc_state *prev;
299 CURCUR *cc;
300 regexp *re;
301} re_cc_state;
302
02db2b7b 303#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 304
29d1e993
HS
305#define TRYPAREN(paren, n, input) { \
306 if (paren) { \
307 if (n) { \
308 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
309 PL_regendp[paren] = input - PL_bostr; \
310 } \
311 else \
312 PL_regendp[paren] = -1; \
313 } \
314 if (regmatch(next)) \
315 sayYES; \
316 if (paren && n) \
317 PL_regendp[paren] = -1; \
318}
319
320
a687059c 321/*
e50aee73 322 * pregexec and friends
a687059c
LW
323 */
324
325/*
c277df42 326 - pregexec - match a regexp against a string
a687059c 327 */
c277df42 328I32
864dbfa3 329Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 330 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
331/* strend: pointer to null at end of string */
332/* strbeg: real beginning of string */
333/* minend: end of match must be >=minend after stringarg. */
334/* nosave: For optimizations. */
335{
336 return
9041c2e3 337 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
338 nosave ? 0 : REXEC_COPY_STR);
339}
0f5d15d6
IZ
340
341STATIC void
cea2e8a9 342S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
343{
344 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
345#ifdef DEBUGGING
346 PL_regprogram = prog->program;
347#endif
348 PL_regnpar = prog->nparens;
9041c2e3
NIS
349 PL_regdata = prog->data;
350 PL_reg_re = prog;
0f5d15d6 351}
22e551b9 352
9041c2e3 353/*
cad2e5aa
JH
354 * Need to implement the following flags for reg_anch:
355 *
356 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
357 * USE_INTUIT_ML
358 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
359 * INTUIT_AUTORITATIVE_ML
360 * INTUIT_ONCE_NOML - Intuit can match in one location only.
361 * INTUIT_ONCE_ML
362 *
363 * Another flag for this function: SECOND_TIME (so that float substrs
364 * with giant delta may be not rechecked).
365 */
366
367/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
368
2c2d71f5 369/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
370 Otherwise, only SvCUR(sv) is used to get strbeg. */
371
372/* XXXX We assume that strpos is strbeg unless sv. */
373
6eb5f6b9
JH
374/* XXXX Some places assume that there is a fixed substring.
375 An update may be needed if optimizer marks as "INTUITable"
376 RExen without fixed substrings. Similarly, it is assumed that
377 lengths of all the strings are no more than minlen, thus they
378 cannot come from lookahead.
379 (Or minlen should take into account lookahead.) */
380
2c2d71f5
JH
381/* A failure to find a constant substring means that there is no need to make
382 an expensive call to REx engine, thus we celebrate a failure. Similarly,
383 finding a substring too deep into the string means that less calls to
30944b6d
IZ
384 regtry() should be needed.
385
386 REx compiler's optimizer found 4 possible hints:
387 a) Anchored substring;
388 b) Fixed substring;
389 c) Whether we are anchored (beginning-of-line or \G);
390 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 391 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
392 string which does not contradict any of them.
393 */
2c2d71f5 394
6eb5f6b9
JH
395/* Most of decisions we do here should have been done at compile time.
396 The nodes of the REx which we used for the search should have been
397 deleted from the finite automaton. */
398
cad2e5aa
JH
399char *
400Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
401 char *strend, U32 flags, re_scream_pos_data *data)
402{
b7953727 403 register I32 start_shift = 0;
cad2e5aa 404 /* Should be nonnegative! */
b7953727 405 register I32 end_shift = 0;
2c2d71f5
JH
406 register char *s;
407 register SV *check;
a1933d95 408 char *strbeg;
cad2e5aa 409 char *t;
33b8afdf 410 int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 411 I32 ml_anch;
6eb5f6b9 412 register char *other_last = Nullch; /* other substr checked before this */
b7953727 413 char *check_at = Nullch; /* check substr found at this pos */
7fba1cd6 414 I32 multiline = prog->reganch & PMf_MULTILINE;
30944b6d
IZ
415#ifdef DEBUGGING
416 char *i_strpos = strpos;
ce333219 417 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 418#endif
a3621e74
YO
419
420 GET_RE_DEBUG_FLAGS_DECL;
421
a30b2f1f 422 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 423
b8d68ded 424 if (prog->reganch & ROPT_UTF8) {
a3621e74 425 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded
JH
426 "UTF-8 regex...\n"));
427 PL_reg_flags |= RF_utf8;
428 }
429
a3621e74 430 DEBUG_EXECUTE_r({
b8d68ded 431 char *s = PL_reg_match_utf8 ?
c728cb41
JH
432 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
433 strpos;
b8d68ded
JH
434 int len = PL_reg_match_utf8 ?
435 strlen(s) : strend - strpos;
2a782b5b
JH
436 if (!PL_colorset)
437 reginitcolors();
b8d68ded 438 if (PL_reg_match_utf8)
a3621e74 439 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b8d68ded 440 "UTF-8 target...\n"));
2a782b5b
JH
441 PerlIO_printf(Perl_debug_log,
442 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
443 PL_colors[4],PL_colors[5],PL_colors[0],
444 prog->precomp,
445 PL_colors[1],
446 (strlen(prog->precomp) > 60 ? "..." : ""),
447 PL_colors[0],
448 (int)(len > 60 ? 60 : len),
449 s, PL_colors[1],
450 (len > 60 ? "..." : "")
451 );
452 });
cad2e5aa 453
c344f387
JH
454 /* CHR_DIST() would be more correct here but it makes things slow. */
455 if (prog->minlen > strend - strpos) {
a3621e74 456 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584 457 "String too short... [re_intuit_start]\n"));
cad2e5aa 458 goto fail;
2c2d71f5 459 }
a1933d95 460 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 461 PL_regeol = strend;
33b8afdf
JH
462 if (do_utf8) {
463 if (!prog->check_utf8 && prog->check_substr)
464 to_utf8_substr(prog);
465 check = prog->check_utf8;
466 } else {
467 if (!prog->check_substr && prog->check_utf8)
468 to_byte_substr(prog);
469 check = prog->check_substr;
470 }
471 if (check == &PL_sv_undef) {
a3621e74 472 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
473 "Non-utf string cannot match utf check string\n"));
474 goto fail;
475 }
2c2d71f5 476 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
477 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
478 || ( (prog->reganch & ROPT_ANCH_BOL)
7fba1cd6 479 && !multiline ) ); /* Check after \n? */
cad2e5aa 480
7e25d62c
JH
481 if (!ml_anch) {
482 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
483 | ROPT_IMPLICIT)) /* not a real BOL */
484 /* SvCUR is not set on references: SvRV and SvPVX overlap */
485 && sv && !SvROK(sv)
486 && (strpos != strbeg)) {
a3621e74 487 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
7e25d62c
JH
488 goto fail;
489 }
490 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 491 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 492 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
493 I32 slen;
494
1aa99e6b 495 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
496 if (SvTAIL(check)) {
497 slen = SvCUR(check); /* >= 1 */
cad2e5aa 498
9041c2e3 499 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5 500 || (strend - s == slen && strend[-1] != '\n')) {
a3621e74 501 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
2c2d71f5 502 goto fail_finish;
cad2e5aa
JH
503 }
504 /* Now should match s[0..slen-2] */
505 slen--;
653099ff 506 if (slen && (*SvPVX(check) != *s
cad2e5aa 507 || (slen > 1
653099ff 508 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5 509 report_neq:
a3621e74 510 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
2c2d71f5
JH
511 goto fail_finish;
512 }
cad2e5aa 513 }
653099ff
GS
514 else if (*SvPVX(check) != *s
515 || ((slen = SvCUR(check)) > 1
516 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
517 goto report_neq;
518 goto success_at_start;
7e25d62c 519 }
cad2e5aa 520 }
2c2d71f5 521 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 522 s = strpos;
2c2d71f5 523 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 524 end_shift = prog->minlen - start_shift -
653099ff 525 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 526 if (!ml_anch) {
653099ff
GS
527 I32 end = prog->check_offset_max + CHR_SVLEN(check)
528 - (SvTAIL(check) != 0);
1aa99e6b 529 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
530
531 if (end_shift < eshift)
532 end_shift = eshift;
533 }
cad2e5aa 534 }
2c2d71f5 535 else { /* Can match at random position */
cad2e5aa
JH
536 ml_anch = 0;
537 s = strpos;
2c2d71f5
JH
538 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
539 /* Should be nonnegative! */
540 end_shift = prog->minlen - start_shift -
653099ff 541 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
542 }
543
2c2d71f5 544#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 545 if (end_shift < 0)
6bbae5e6 546 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
547#endif
548
2c2d71f5
JH
549 restart:
550 /* Find a possible match in the region s..strend by looking for
551 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 552 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
553 I32 p = -1; /* Internal iterator of scream. */
554 I32 *pp = data ? data->scream_pos : &p;
555
2c2d71f5
JH
556 if (PL_screamfirst[BmRARE(check)] >= 0
557 || ( BmRARE(check) == '\n'
558 && (BmPREVIOUS(check) == SvCUR(check) - 1)
559 && SvTAIL(check) ))
9041c2e3 560 s = screaminstr(sv, check,
2c2d71f5 561 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 562 else
2c2d71f5 563 goto fail_finish;
4addbd3b
HS
564 /* we may be pointing at the wrong string */
565 if (s && RX_MATCH_COPIED(prog))
7ef91622 566 s = strbeg + (s - SvPVX(sv));
cad2e5aa
JH
567 if (data)
568 *data->scream_olds = s;
569 }
f33976b4 570 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
571 s = fbm_instr((U8*)(s + start_shift),
572 (U8*)(strend - end_shift),
7fba1cd6 573 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 574 else
1aa99e6b
IH
575 s = fbm_instr(HOP3(s, start_shift, strend),
576 HOP3(strend, -end_shift, strbeg),
7fba1cd6 577 check, multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
578
579 /* Update the count-of-usability, remove useless subpatterns,
580 unshift s. */
2c2d71f5 581
a3621e74 582 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
2c2d71f5 583 (s ? "Found" : "Did not find"),
33b8afdf 584 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 585 PL_colors[0],
7b0972df
JH
586 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
587 SvPVX(check),
2c2d71f5
JH
588 PL_colors[1], (SvTAIL(check) ? "$" : ""),
589 (s ? " at offset " : "...\n") ) );
590
591 if (!s)
592 goto fail_finish;
593
6eb5f6b9
JH
594 check_at = s;
595
2c2d71f5 596 /* Finish the diagnostic message */
a3621e74 597 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
598
599 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
600 Start with the other substr.
601 XXXX no SCREAM optimization yet - and a very coarse implementation
602 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
603 *always* match. Probably should be marked during compile...
604 Probably it is right to do no SCREAM here...
605 */
606
33b8afdf 607 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 608 /* Take into account the "other" substring. */
2c2d71f5
JH
609 /* XXXX May be hopelessly wrong for UTF... */
610 if (!other_last)
6eb5f6b9 611 other_last = strpos;
33b8afdf 612 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
613 do_other_anchored:
614 {
1aa99e6b 615 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5 616 char *s1 = s;
33b8afdf 617 SV* must;
2c2d71f5 618
2c2d71f5
JH
619 t = s - prog->check_offset_max;
620 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 621 && (!do_utf8
1aa99e6b 622 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 623 && t > strpos)))
30944b6d 624 /* EMPTY */;
2c2d71f5
JH
625 else
626 t = strpos;
1aa99e6b 627 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
628 if (t < other_last) /* These positions already checked */
629 t = other_last;
1aa99e6b 630 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
631 if (last < last1)
632 last1 = last;
633 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
634 /* On end-of-str: see comment below. */
33b8afdf
JH
635 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
636 if (must == &PL_sv_undef) {
637 s = (char*)NULL;
a3621e74 638 DEBUG_EXECUTE_r(must = prog->anchored_utf8); /* for debug */
33b8afdf
JH
639 }
640 else
641 s = fbm_instr(
642 (unsigned char*)t,
643 HOP3(HOP3(last1, prog->anchored_offset, strend)
644 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
645 must,
7fba1cd6 646 multiline ? FBMrf_MULTILINE : 0
33b8afdf 647 );
a3621e74 648 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1aa99e6b 649 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
650 (s ? "Found" : "Contradicts"),
651 PL_colors[0],
33b8afdf
JH
652 (int)(SvCUR(must)
653 - (SvTAIL(must)!=0)),
654 SvPVX(must),
655 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
656 if (!s) {
657 if (last1 >= last2) {
a3621e74 658 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5
JH
659 ", giving up...\n"));
660 goto fail_finish;
661 }
a3621e74 662 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 663 ", trying floating at offset %ld...\n",
1aa99e6b
IH
664 (long)(HOP3c(s1, 1, strend) - i_strpos)));
665 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
666 s = HOP3c(last, 1, strend);
2c2d71f5
JH
667 goto restart;
668 }
669 else {
a3621e74 670 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 671 (long)(s - i_strpos)));
1aa99e6b
IH
672 t = HOP3c(s, -prog->anchored_offset, strbeg);
673 other_last = HOP3c(s, 1, strend);
30944b6d 674 s = s1;
2c2d71f5
JH
675 if (t == strpos)
676 goto try_at_start;
2c2d71f5
JH
677 goto try_at_offset;
678 }
30944b6d 679 }
2c2d71f5
JH
680 }
681 else { /* Take into account the floating substring. */
33b8afdf
JH
682 char *last, *last1;
683 char *s1 = s;
684 SV* must;
685
686 t = HOP3c(s, -start_shift, strbeg);
687 last1 = last =
688 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
689 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
690 last = HOP3c(t, prog->float_max_offset, strend);
691 s = HOP3c(t, prog->float_min_offset, strend);
692 if (s < other_last)
693 s = other_last;
2c2d71f5 694 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
695 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
696 /* fbm_instr() takes into account exact value of end-of-str
697 if the check is SvTAIL(ed). Since false positives are OK,
698 and end-of-str is not later than strend we are OK. */
699 if (must == &PL_sv_undef) {
700 s = (char*)NULL;
a3621e74 701 DEBUG_EXECUTE_r(must = prog->float_utf8); /* for debug message */
33b8afdf
JH
702 }
703 else
2c2d71f5 704 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
705 (unsigned char*)last + SvCUR(must)
706 - (SvTAIL(must)!=0),
7fba1cd6 707 must, multiline ? FBMrf_MULTILINE : 0);
a3621e74 708 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
33b8afdf
JH
709 (s ? "Found" : "Contradicts"),
710 PL_colors[0],
711 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
712 SvPVX(must),
713 PL_colors[1], (SvTAIL(must) ? "$" : "")));
714 if (!s) {
715 if (last1 == last) {
a3621e74 716 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
717 ", giving up...\n"));
718 goto fail_finish;
2c2d71f5 719 }
a3621e74 720 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
721 ", trying anchored starting at offset %ld...\n",
722 (long)(s1 + 1 - i_strpos)));
723 other_last = last;
724 s = HOP3c(t, 1, strend);
725 goto restart;
726 }
727 else {
a3621e74 728 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
33b8afdf
JH
729 (long)(s - i_strpos)));
730 other_last = s; /* Fix this later. --Hugo */
731 s = s1;
732 if (t == strpos)
733 goto try_at_start;
734 goto try_at_offset;
735 }
2c2d71f5 736 }
cad2e5aa 737 }
2c2d71f5
JH
738
739 t = s - prog->check_offset_max;
2c2d71f5 740 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 741 && (!do_utf8
1aa99e6b
IH
742 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
743 && t > strpos))) {
2c2d71f5
JH
744 /* Fixed substring is found far enough so that the match
745 cannot start at strpos. */
746 try_at_offset:
cad2e5aa 747 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
748 /* Eventually fbm_*() should handle this, but often
749 anchored_offset is not 0, so this check will not be wasted. */
750 /* XXXX In the code below we prefer to look for "^" even in
751 presence of anchored substrings. And we search even
752 beyond the found float position. These pessimizations
753 are historical artefacts only. */
754 find_anchor:
2c2d71f5 755 while (t < strend - prog->minlen) {
cad2e5aa 756 if (*t == '\n') {
4ee3650e 757 if (t < check_at - prog->check_offset_min) {
33b8afdf 758 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
759 /* Since we moved from the found position,
760 we definitely contradict the found anchored
30944b6d
IZ
761 substr. Due to the above check we do not
762 contradict "check" substr.
763 Thus we can arrive here only if check substr
764 is float. Redo checking for "other"=="fixed".
765 */
9041c2e3 766 strpos = t + 1;
a3621e74 767 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
30944b6d
IZ
768 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
769 goto do_other_anchored;
770 }
4ee3650e
GS
771 /* We don't contradict the found floating substring. */
772 /* XXXX Why not check for STCLASS? */
cad2e5aa 773 s = t + 1;
a3621e74 774 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 775 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
776 goto set_useful;
777 }
4ee3650e
GS
778 /* Position contradicts check-string */
779 /* XXXX probably better to look for check-string
780 than for "\n", so one should lower the limit for t? */
a3621e74 781 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 782 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 783 other_last = strpos = s = t + 1;
cad2e5aa
JH
784 goto restart;
785 }
786 t++;
787 }
a3621e74 788 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
2c2d71f5
JH
789 PL_colors[0],PL_colors[1]));
790 goto fail_finish;
cad2e5aa 791 }
f5952150 792 else {
a3621e74 793 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
f5952150
GS
794 PL_colors[0],PL_colors[1]));
795 }
cad2e5aa
JH
796 s = t;
797 set_useful:
33b8afdf 798 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
799 }
800 else {
f5952150 801 /* The found string does not prohibit matching at strpos,
2c2d71f5 802 - no optimization of calling REx engine can be performed,
f5952150
GS
803 unless it was an MBOL and we are not after MBOL,
804 or a future STCLASS check will fail this. */
2c2d71f5
JH
805 try_at_start:
806 /* Even in this situation we may use MBOL flag if strpos is offset
807 wrt the start of the string. */
05b4157f 808 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 809 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
810 /* May be due to an implicit anchor of m{.*foo} */
811 && !(prog->reganch & ROPT_IMPLICIT))
812 {
cad2e5aa
JH
813 t = strpos;
814 goto find_anchor;
815 }
a3621e74 816 DEBUG_EXECUTE_r( if (ml_anch)
f5952150
GS
817 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
818 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 819 );
2c2d71f5 820 success_at_start:
30944b6d 821 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
822 && (do_utf8 ? (
823 prog->check_utf8 /* Could be deleted already */
824 && --BmUSEFUL(prog->check_utf8) < 0
825 && (prog->check_utf8 == prog->float_utf8)
826 ) : (
827 prog->check_substr /* Could be deleted already */
828 && --BmUSEFUL(prog->check_substr) < 0
829 && (prog->check_substr == prog->float_substr)
830 )))
66e933ab 831 {
cad2e5aa 832 /* If flags & SOMETHING - do not do it many times on the same match */
a3621e74 833 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
834 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
835 if (do_utf8 ? prog->check_substr : prog->check_utf8)
836 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
837 prog->check_substr = prog->check_utf8 = Nullsv; /* disable */
838 prog->float_substr = prog->float_utf8 = Nullsv; /* clear */
5e39e1e5 839 check = Nullsv; /* abort */
cad2e5aa 840 s = strpos;
3cf5c195
IZ
841 /* XXXX This is a remnant of the old implementation. It
842 looks wasteful, since now INTUIT can use many
6eb5f6b9 843 other heuristics. */
cad2e5aa
JH
844 prog->reganch &= ~RE_USE_INTUIT;
845 }
846 else
847 s = strpos;
848 }
849
6eb5f6b9
JH
850 /* Last resort... */
851 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
852 if (prog->regstclass) {
853 /* minlen == 0 is possible if regstclass is \b or \B,
854 and the fixed substr is ''$.
855 Since minlen is already taken into account, s+1 is before strend;
856 accidentally, minlen >= 1 guaranties no false positives at s + 1
857 even for \b or \B. But (minlen? 1 : 0) below assumes that
858 regstclass does not come from lookahead... */
859 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
860 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 861 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 862 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 863 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 864 : 1);
33b8afdf 865 char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 866 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 867 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
868 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
869 cl_l, strend)
870 : strend);
a1933d95 871 char *startpos = strbeg;
6eb5f6b9
JH
872
873 t = s;
9926ca43 874 cache_re(prog);
f33976b4 875 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
876 if (!s) {
877#ifdef DEBUGGING
b7953727 878 char *what = 0;
6eb5f6b9
JH
879#endif
880 if (endpos == strend) {
a3621e74 881 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
882 "Could not match STCLASS...\n") );
883 goto fail;
884 }
a3621e74 885 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab 886 "This position contradicts STCLASS...\n") );
653099ff
GS
887 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
888 goto fail;
6eb5f6b9 889 /* Contradict one of substrings */
33b8afdf
JH
890 if (prog->anchored_substr || prog->anchored_utf8) {
891 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
a3621e74 892 DEBUG_EXECUTE_r( what = "anchored" );
6eb5f6b9 893 hop_and_restart:
1aa99e6b 894 s = HOP3c(t, 1, strend);
66e933ab
GS
895 if (s + start_shift + end_shift > strend) {
896 /* XXXX Should be taken into account earlier? */
a3621e74 897 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
66e933ab
GS
898 "Could not match STCLASS...\n") );
899 goto fail;
900 }
5e39e1e5
HS
901 if (!check)
902 goto giveup;
a3621e74 903 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 904 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
905 what, (long)(s + start_shift - i_strpos)) );
906 goto restart;
907 }
66e933ab 908 /* Have both, check_string is floating */
6eb5f6b9
JH
909 if (t + start_shift >= check_at) /* Contradicts floating=check */
910 goto retry_floating_check;
911 /* Recheck anchored substring, but not floating... */
9041c2e3 912 s = check_at;
5e39e1e5
HS
913 if (!check)
914 goto giveup;
a3621e74 915 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150 916 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
917 (long)(other_last - i_strpos)) );
918 goto do_other_anchored;
919 }
60e71179
GS
920 /* Another way we could have checked stclass at the
921 current position only: */
922 if (ml_anch) {
923 s = t = t + 1;
5e39e1e5
HS
924 if (!check)
925 goto giveup;
a3621e74 926 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
927 "Looking for /%s^%s/m starting at offset %ld...\n",
928 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 929 goto try_at_offset;
66e933ab 930 }
33b8afdf 931 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 932 goto fail;
6eb5f6b9
JH
933 /* Check is floating subtring. */
934 retry_floating_check:
935 t = check_at - start_shift;
a3621e74 936 DEBUG_EXECUTE_r( what = "floating" );
6eb5f6b9
JH
937 goto hop_and_restart;
938 }
b7953727 939 if (t != s) {
a3621e74 940 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 941 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
942 (long)(t - i_strpos), (long)(s - i_strpos))
943 );
944 }
945 else {
a3621e74 946 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
b7953727
JH
947 "Does not contradict STCLASS...\n");
948 );
949 }
6eb5f6b9 950 }
5e39e1e5 951 giveup:
a3621e74 952 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
5e39e1e5
HS
953 PL_colors[4], (check ? "Guessed" : "Giving up"),
954 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 955 return s;
2c2d71f5
JH
956
957 fail_finish: /* Substring not found */
33b8afdf
JH
958 if (prog->check_substr || prog->check_utf8) /* could be removed already */
959 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 960 fail:
a3621e74 961 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
962 PL_colors[4],PL_colors[5]));
963 return Nullch;
964}
9661b544 965
6eb5f6b9 966/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
967STATIC char *
968S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 969{
6eb5f6b9
JH
970 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
971 char *m;
d8093b23 972 STRLEN ln;
5dab1207 973 STRLEN lnc;
078c425b 974 register STRLEN uskip;
d8093b23
G
975 unsigned int c1;
976 unsigned int c2;
6eb5f6b9
JH
977 char *e;
978 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 979 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 980
6eb5f6b9
JH
981 /* We know what class it must start with. */
982 switch (OP(c)) {
6eb5f6b9 983 case ANYOF:
388cc4de 984 if (do_utf8) {
078c425b 985 while (s + (uskip = UTF8SKIP(s)) <= strend) {
388cc4de
HS
986 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
987 !UTF8_IS_INVARIANT((U8)s[0]) ?
988 reginclass(c, (U8*)s, 0, do_utf8) :
989 REGINCLASS(c, (U8*)s)) {
990 if (tmp && (norun || regtry(prog, s)))
991 goto got_it;
992 else
993 tmp = doevery;
994 }
995 else
996 tmp = 1;
078c425b 997 s += uskip;
388cc4de
HS
998 }
999 }
1000 else {
1001 while (s < strend) {
1002 STRLEN skip = 1;
1003
1004 if (REGINCLASS(c, (U8*)s) ||
1005 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1006 /* The assignment of 2 is intentional:
1007 * for the folded sharp s, the skip is 2. */
1008 (skip = SHARP_S_SKIP))) {
1009 if (tmp && (norun || regtry(prog, s)))
1010 goto got_it;
1011 else
1012 tmp = doevery;
1013 }
1014 else
1015 tmp = 1;
1016 s += skip;
1017 }
a0d0e21e 1018 }
6eb5f6b9 1019 break;
f33976b4
DB
1020 case CANY:
1021 while (s < strend) {
1022 if (tmp && (norun || regtry(prog, s)))
1023 goto got_it;
1024 else
1025 tmp = doevery;
1026 s++;
1027 }
1028 break;
6eb5f6b9 1029 case EXACTF:
5dab1207
NIS
1030 m = STRING(c);
1031 ln = STR_LEN(c); /* length to match in octets/bytes */
1032 lnc = (I32) ln; /* length to match in characters */
1aa99e6b 1033 if (UTF) {
a2a2844f 1034 STRLEN ulen1, ulen2;
5dab1207 1035 U8 *sm = (U8 *) m;
89ebb4a3
JH
1036 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1037 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
1038
1039 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1040 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1041
89ebb4a3 1042 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
872c91ae 1043 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
89ebb4a3 1044 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
872c91ae 1045 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
5dab1207
NIS
1046 lnc = 0;
1047 while (sm < ((U8 *) m + ln)) {
1048 lnc++;
1049 sm += UTF8SKIP(sm);
1050 }
1aa99e6b
IH
1051 }
1052 else {
1053 c1 = *(U8*)m;
1054 c2 = PL_fold[c1];
1055 }
6eb5f6b9
JH
1056 goto do_exactf;
1057 case EXACTFL:
5dab1207
NIS
1058 m = STRING(c);
1059 ln = STR_LEN(c);
1060 lnc = (I32) ln;
d8093b23 1061 c1 = *(U8*)m;
6eb5f6b9
JH
1062 c2 = PL_fold_locale[c1];
1063 do_exactf:
db12adc6 1064 e = HOP3c(strend, -((I32)lnc), s);
b3c9acc1 1065
6eb5f6b9
JH
1066 if (norun && e < s)
1067 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1068
60a8b682
JH
1069 /* The idea in the EXACTF* cases is to first find the
1070 * first character of the EXACTF* node and then, if
1071 * necessary, case-insensitively compare the full
1072 * text of the node. The c1 and c2 are the first
1073 * characters (though in Unicode it gets a bit
1074 * more complicated because there are more cases
7f16dd3d
JH
1075 * than just upper and lower: one needs to use
1076 * the so-called folding case for case-insensitive
1077 * matching (called "loose matching" in Unicode).
1078 * ibcmp_utf8() will do just that. */
60a8b682 1079
1aa99e6b 1080 if (do_utf8) {
575cac57 1081 UV c, f;
89ebb4a3
JH
1082 U8 tmpbuf [UTF8_MAXBYTES+1];
1083 U8 foldbuf[UTF8_MAXBYTES_CASE+1];
575cac57 1084 STRLEN len, foldlen;
d7f013c8 1085
09091399 1086 if (c1 == c2) {
5dab1207
NIS
1087 /* Upper and lower of 1st char are equal -
1088 * probably not a "letter". */
1aa99e6b 1089 while (s <= e) {
89ebb4a3 1090 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
872c91ae
JH
1091 ckWARN(WARN_UTF8) ?
1092 0 : UTF8_ALLOW_ANY);
80aecb99
JH
1093 if ( c == c1
1094 && (ln == len ||
66423254 1095 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1096 m, (char **)0, ln, (bool)UTF))
55da9344 1097 && (norun || regtry(prog, s)) )
1aa99e6b 1098 goto got_it;
80aecb99
JH
1099 else {
1100 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1101 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1102 if ( f != c
1103 && (f == c1 || f == c2)
1104 && (ln == foldlen ||
66423254
JH
1105 !ibcmp_utf8((char *) foldbuf,
1106 (char **)0, foldlen, do_utf8,
d07ddd77 1107 m,
eb160463 1108 (char **)0, ln, (bool)UTF))
80aecb99
JH
1109 && (norun || regtry(prog, s)) )
1110 goto got_it;
1111 }
1aa99e6b
IH
1112 s += len;
1113 }
09091399
JH
1114 }
1115 else {
1aa99e6b 1116 while (s <= e) {
89ebb4a3 1117 c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
872c91ae
JH
1118 ckWARN(WARN_UTF8) ?
1119 0 : UTF8_ALLOW_ANY);
80aecb99 1120
60a8b682 1121 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1122 * Note that not all the possible combinations
1123 * are handled here: some of them are handled
1124 * by the standard folding rules, and some of
1125 * them (the character class or ANYOF cases)
1126 * are handled during compiletime in
1127 * regexec.c:S_regclass(). */
880bd946
JH
1128 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1129 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1130 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1131
1132 if ( (c == c1 || c == c2)
1133 && (ln == len ||
66423254 1134 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1135 m, (char **)0, ln, (bool)UTF))
55da9344 1136 && (norun || regtry(prog, s)) )
1aa99e6b 1137 goto got_it;
80aecb99
JH
1138 else {
1139 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1140 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1141 if ( f != c
1142 && (f == c1 || f == c2)
1143 && (ln == foldlen ||
a6872d42 1144 !ibcmp_utf8((char *) foldbuf,
66423254 1145 (char **)0, foldlen, do_utf8,
d07ddd77 1146 m,
eb160463 1147 (char **)0, ln, (bool)UTF))
80aecb99
JH
1148 && (norun || regtry(prog, s)) )
1149 goto got_it;
1150 }
1aa99e6b
IH
1151 s += len;
1152 }
09091399 1153 }
1aa99e6b
IH
1154 }
1155 else {
1156 if (c1 == c2)
1157 while (s <= e) {
1158 if ( *(U8*)s == c1
1159 && (ln == 1 || !(OP(c) == EXACTF
1160 ? ibcmp(s, m, ln)
1161 : ibcmp_locale(s, m, ln)))
1162 && (norun || regtry(prog, s)) )
1163 goto got_it;
1164 s++;
1165 }
1166 else
1167 while (s <= e) {
1168 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1169 && (ln == 1 || !(OP(c) == EXACTF
1170 ? ibcmp(s, m, ln)
1171 : ibcmp_locale(s, m, ln)))
1172 && (norun || regtry(prog, s)) )
1173 goto got_it;
1174 s++;
1175 }
b3c9acc1
IZ
1176 }
1177 break;
bbce6d69 1178 case BOUNDL:
3280af22 1179 PL_reg_flags |= RF_tainted;
bbce6d69 1180 /* FALL THROUGH */
a0d0e21e 1181 case BOUND:
ffc61ed2 1182 if (do_utf8) {
12d33761 1183 if (s == PL_bostr)
ffc61ed2
JH
1184 tmp = '\n';
1185 else {
b4f7163a 1186 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1187
b4f7163a 1188 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1189 }
1190 tmp = ((OP(c) == BOUND ?
9041c2e3 1191 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1192 LOAD_UTF8_CHARCLASS(alnum,"a");
078c425b 1193 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1194 if (tmp == !(OP(c) == BOUND ?
3568d838 1195 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1196 isALNUM_LC_utf8((U8*)s)))
1197 {
1198 tmp = !tmp;
1199 if ((norun || regtry(prog, s)))
1200 goto got_it;
1201 }
078c425b 1202 s += uskip;
a687059c 1203 }
a0d0e21e 1204 }
667bb95a 1205 else {
12d33761 1206 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1207 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1208 while (s < strend) {
1209 if (tmp ==
1210 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1211 tmp = !tmp;
1212 if ((norun || regtry(prog, s)))
1213 goto got_it;
1214 }
1215 s++;
a0ed51b3 1216 }
a0ed51b3 1217 }
6eb5f6b9 1218 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1219 goto got_it;
1220 break;
bbce6d69 1221 case NBOUNDL:
3280af22 1222 PL_reg_flags |= RF_tainted;
bbce6d69 1223 /* FALL THROUGH */
a0d0e21e 1224 case NBOUND:
ffc61ed2 1225 if (do_utf8) {
12d33761 1226 if (s == PL_bostr)
ffc61ed2
JH
1227 tmp = '\n';
1228 else {
b4f7163a 1229 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1230
b4f7163a 1231 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1232 }
1233 tmp = ((OP(c) == NBOUND ?
9041c2e3 1234 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1235 LOAD_UTF8_CHARCLASS(alnum,"a");
078c425b 1236 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2 1237 if (tmp == !(OP(c) == NBOUND ?
3568d838 1238 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1239 isALNUM_LC_utf8((U8*)s)))
1240 tmp = !tmp;
1241 else if ((norun || regtry(prog, s)))
1242 goto got_it;
078c425b 1243 s += uskip;
ffc61ed2 1244 }
a0d0e21e 1245 }
667bb95a 1246 else {
12d33761 1247 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1248 tmp = ((OP(c) == NBOUND ?
1249 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1250 while (s < strend) {
1251 if (tmp ==
1252 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1253 tmp = !tmp;
1254 else if ((norun || regtry(prog, s)))
1255 goto got_it;
1256 s++;
1257 }
a0ed51b3 1258 }
6eb5f6b9 1259 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1260 goto got_it;
1261 break;
a0d0e21e 1262 case ALNUM:
ffc61ed2 1263 if (do_utf8) {
8269fa76 1264 LOAD_UTF8_CHARCLASS(alnum,"a");
078c425b 1265 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1266 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1267 if (tmp && (norun || regtry(prog, s)))
1268 goto got_it;
1269 else
1270 tmp = doevery;
1271 }
bbce6d69 1272 else
ffc61ed2 1273 tmp = 1;
078c425b 1274 s += uskip;
bbce6d69 1275 }
bbce6d69 1276 }
ffc61ed2
JH
1277 else {
1278 while (s < strend) {
1279 if (isALNUM(*s)) {
1280 if (tmp && (norun || regtry(prog, s)))
1281 goto got_it;
1282 else
1283 tmp = doevery;
1284 }
a0ed51b3 1285 else
ffc61ed2
JH
1286 tmp = 1;
1287 s++;
a0ed51b3 1288 }
a0ed51b3
LW
1289 }
1290 break;
bbce6d69 1291 case ALNUML:
3280af22 1292 PL_reg_flags |= RF_tainted;
ffc61ed2 1293 if (do_utf8) {
078c425b 1294 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1295 if (isALNUM_LC_utf8((U8*)s)) {
1296 if (tmp && (norun || regtry(prog, s)))
1297 goto got_it;
1298 else
1299 tmp = doevery;
1300 }
a687059c 1301 else
ffc61ed2 1302 tmp = 1;
078c425b 1303 s += uskip;
a0d0e21e 1304 }
a0d0e21e 1305 }
ffc61ed2
JH
1306 else {
1307 while (s < strend) {
1308 if (isALNUM_LC(*s)) {
1309 if (tmp && (norun || regtry(prog, s)))
1310 goto got_it;
1311 else
1312 tmp = doevery;
1313 }
a0ed51b3 1314 else
ffc61ed2
JH
1315 tmp = 1;
1316 s++;
a0ed51b3 1317 }
a0ed51b3
LW
1318 }
1319 break;
a0d0e21e 1320 case NALNUM:
ffc61ed2 1321 if (do_utf8) {
8269fa76 1322 LOAD_UTF8_CHARCLASS(alnum,"a");
078c425b 1323 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1324 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1325 if (tmp && (norun || regtry(prog, s)))
1326 goto got_it;
1327 else
1328 tmp = doevery;
1329 }
bbce6d69 1330 else
ffc61ed2 1331 tmp = 1;
078c425b 1332 s += uskip;
bbce6d69 1333 }
bbce6d69 1334 }
ffc61ed2
JH
1335 else {
1336 while (s < strend) {
1337 if (!isALNUM(*s)) {
1338 if (tmp && (norun || regtry(prog, s)))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
a0ed51b3 1343 else
ffc61ed2
JH
1344 tmp = 1;
1345 s++;
a0ed51b3 1346 }
a0ed51b3
LW
1347 }
1348 break;
bbce6d69 1349 case NALNUML:
3280af22 1350 PL_reg_flags |= RF_tainted;
ffc61ed2 1351 if (do_utf8) {
078c425b 1352 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1353 if (!isALNUM_LC_utf8((U8*)s)) {
1354 if (tmp && (norun || regtry(prog, s)))
1355 goto got_it;
1356 else
1357 tmp = doevery;
1358 }
a687059c 1359 else
ffc61ed2 1360 tmp = 1;
078c425b 1361 s += uskip;
a687059c 1362 }
a0d0e21e 1363 }
ffc61ed2
JH
1364 else {
1365 while (s < strend) {
1366 if (!isALNUM_LC(*s)) {
1367 if (tmp && (norun || regtry(prog, s)))
1368 goto got_it;
1369 else
1370 tmp = doevery;
1371 }
a0ed51b3 1372 else
ffc61ed2
JH
1373 tmp = 1;
1374 s++;
a0ed51b3 1375 }
a0ed51b3
LW
1376 }
1377 break;
a0d0e21e 1378 case SPACE:
ffc61ed2 1379 if (do_utf8) {
8269fa76 1380 LOAD_UTF8_CHARCLASS(space," ");
078c425b 1381 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1382 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1383 if (tmp && (norun || regtry(prog, s)))
1384 goto got_it;
1385 else
1386 tmp = doevery;
1387 }
a0d0e21e 1388 else
ffc61ed2 1389 tmp = 1;
078c425b 1390 s += uskip;
2304df62 1391 }
a0d0e21e 1392 }
ffc61ed2
JH
1393 else {
1394 while (s < strend) {
1395 if (isSPACE(*s)) {
1396 if (tmp && (norun || regtry(prog, s)))
1397 goto got_it;
1398 else
1399 tmp = doevery;
1400 }
a0ed51b3 1401 else
ffc61ed2
JH
1402 tmp = 1;
1403 s++;
a0ed51b3 1404 }
a0ed51b3
LW
1405 }
1406 break;
bbce6d69 1407 case SPACEL:
3280af22 1408 PL_reg_flags |= RF_tainted;
ffc61ed2 1409 if (do_utf8) {
078c425b 1410 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1411 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1412 if (tmp && (norun || regtry(prog, s)))
1413 goto got_it;
1414 else
1415 tmp = doevery;
1416 }
bbce6d69 1417 else
ffc61ed2 1418 tmp = 1;
078c425b 1419 s += uskip;
bbce6d69 1420 }
bbce6d69 1421 }
ffc61ed2
JH
1422 else {
1423 while (s < strend) {
1424 if (isSPACE_LC(*s)) {
1425 if (tmp && (norun || regtry(prog, s)))
1426 goto got_it;
1427 else
1428 tmp = doevery;
1429 }
a0ed51b3 1430 else
ffc61ed2
JH
1431 tmp = 1;
1432 s++;
a0ed51b3 1433 }
a0ed51b3
LW
1434 }
1435 break;
a0d0e21e 1436 case NSPACE:
ffc61ed2 1437 if (do_utf8) {
8269fa76 1438 LOAD_UTF8_CHARCLASS(space," ");
078c425b 1439 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1440 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1441 if (tmp && (norun || regtry(prog, s)))
1442 goto got_it;
1443 else
1444 tmp = doevery;
1445 }
a0d0e21e 1446 else
ffc61ed2 1447 tmp = 1;
078c425b 1448 s += uskip;
a687059c 1449 }
a0d0e21e 1450 }
ffc61ed2
JH
1451 else {
1452 while (s < strend) {
1453 if (!isSPACE(*s)) {
1454 if (tmp && (norun || regtry(prog, s)))
1455 goto got_it;
1456 else
1457 tmp = doevery;
1458 }
a0ed51b3 1459 else
ffc61ed2
JH
1460 tmp = 1;
1461 s++;
a0ed51b3 1462 }
a0ed51b3
LW
1463 }
1464 break;
bbce6d69 1465 case NSPACEL:
3280af22 1466 PL_reg_flags |= RF_tainted;
ffc61ed2 1467 if (do_utf8) {
078c425b 1468 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1469 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1470 if (tmp && (norun || regtry(prog, s)))
1471 goto got_it;
1472 else
1473 tmp = doevery;
1474 }
bbce6d69 1475 else
ffc61ed2 1476 tmp = 1;
078c425b 1477 s += uskip;
bbce6d69 1478 }
bbce6d69 1479 }
ffc61ed2
JH
1480 else {
1481 while (s < strend) {
1482 if (!isSPACE_LC(*s)) {
1483 if (tmp && (norun || regtry(prog, s)))
1484 goto got_it;
1485 else
1486 tmp = doevery;
1487 }
a0ed51b3 1488 else
ffc61ed2
JH
1489 tmp = 1;
1490 s++;
a0ed51b3 1491 }
a0ed51b3
LW
1492 }
1493 break;
a0d0e21e 1494 case DIGIT:
ffc61ed2 1495 if (do_utf8) {
8269fa76 1496 LOAD_UTF8_CHARCLASS(digit,"0");
078c425b 1497 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1498 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1499 if (tmp && (norun || regtry(prog, s)))
1500 goto got_it;
1501 else
1502 tmp = doevery;
1503 }
a0d0e21e 1504 else
ffc61ed2 1505 tmp = 1;
078c425b 1506 s += uskip;
2b69d0c2 1507 }
a0d0e21e 1508 }
ffc61ed2
JH
1509 else {
1510 while (s < strend) {
1511 if (isDIGIT(*s)) {
1512 if (tmp && (norun || regtry(prog, s)))
1513 goto got_it;
1514 else
1515 tmp = doevery;
1516 }
a0ed51b3 1517 else
ffc61ed2
JH
1518 tmp = 1;
1519 s++;
a0ed51b3 1520 }
a0ed51b3
LW
1521 }
1522 break;
b8c5462f
JH
1523 case DIGITL:
1524 PL_reg_flags |= RF_tainted;
ffc61ed2 1525 if (do_utf8) {
078c425b 1526 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1527 if (isDIGIT_LC_utf8((U8*)s)) {
1528 if (tmp && (norun || regtry(prog, s)))
1529 goto got_it;
1530 else
1531 tmp = doevery;
1532 }
b8c5462f 1533 else
ffc61ed2 1534 tmp = 1;
078c425b 1535 s += uskip;
b8c5462f 1536 }
b8c5462f 1537 }
ffc61ed2
JH
1538 else {
1539 while (s < strend) {
1540 if (isDIGIT_LC(*s)) {
1541 if (tmp && (norun || regtry(prog, s)))
1542 goto got_it;
1543 else
1544 tmp = doevery;
1545 }
b8c5462f 1546 else
ffc61ed2
JH
1547 tmp = 1;
1548 s++;
b8c5462f 1549 }
b8c5462f
JH
1550 }
1551 break;
a0d0e21e 1552 case NDIGIT:
ffc61ed2 1553 if (do_utf8) {
8269fa76 1554 LOAD_UTF8_CHARCLASS(digit,"0");
078c425b 1555 while (s + (uskip = UTF8SKIP(s)) <= strend) {
3568d838 1556 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1557 if (tmp && (norun || regtry(prog, s)))
1558 goto got_it;
1559 else
1560 tmp = doevery;
1561 }
a0d0e21e 1562 else
ffc61ed2 1563 tmp = 1;
078c425b 1564 s += uskip;
a687059c 1565 }
a0d0e21e 1566 }
ffc61ed2
JH
1567 else {
1568 while (s < strend) {
1569 if (!isDIGIT(*s)) {
1570 if (tmp && (norun || regtry(prog, s)))
1571 goto got_it;
1572 else
1573 tmp = doevery;
1574 }
a0ed51b3 1575 else
ffc61ed2
JH
1576 tmp = 1;
1577 s++;
a0ed51b3 1578 }
a0ed51b3
LW
1579 }
1580 break;
b8c5462f
JH
1581 case NDIGITL:
1582 PL_reg_flags |= RF_tainted;
ffc61ed2 1583 if (do_utf8) {
078c425b 1584 while (s + (uskip = UTF8SKIP(s)) <= strend) {
ffc61ed2
JH
1585 if (!isDIGIT_LC_utf8((U8*)s)) {
1586 if (tmp && (norun || regtry(prog, s)))
1587 goto got_it;
1588 else
1589 tmp = doevery;
1590 }
b8c5462f 1591 else
ffc61ed2 1592 tmp = 1;
078c425b 1593 s += uskip;
b8c5462f 1594 }
a0ed51b3 1595 }
ffc61ed2
JH
1596 else {
1597 while (s < strend) {
1598 if (!isDIGIT_LC(*s)) {
1599 if (tmp && (norun || regtry(prog, s)))
1600 goto got_it;
1601 else
1602 tmp = doevery;
1603 }
cf93c79d 1604 else
ffc61ed2
JH
1605 tmp = 1;
1606 s++;
b8c5462f 1607 }
b8c5462f
JH
1608 }
1609 break;
b3c9acc1 1610 default:
3c3eec57
GS
1611 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1612 break;
d6a28714 1613 }
6eb5f6b9
JH
1614 return 0;
1615 got_it:
1616 return s;
1617}
1618
1619/*
1620 - regexec_flags - match a regexp against a string
1621 */
1622I32
1623Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1624 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1625/* strend: pointer to null at end of string */
1626/* strbeg: real beginning of string */
1627/* minend: end of match must be >=minend after stringarg. */
1628/* data: May be used for some additional optimizations. */
1629/* nosave: For optimizations. */
1630{
6eb5f6b9
JH
1631 register char *s;
1632 register regnode *c;
1633 register char *startpos = stringarg;
6eb5f6b9
JH
1634 I32 minlen; /* must match at least this many chars */
1635 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1636 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1637 constant substr. */ /* CC */
1638 I32 end_shift = 0; /* Same for the end. */ /* CC */
1639 I32 scream_pos = -1; /* Internal iterator of scream. */
1640 char *scream_olds;
1641 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1642 bool do_utf8 = DO_UTF8(sv);
7fba1cd6 1643 I32 multiline = prog->reganch & PMf_MULTILINE;
2a782b5b 1644#ifdef DEBUGGING
9e55ce06
JH
1645 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1646 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1647#endif
a3621e74
YO
1648
1649 GET_RE_DEBUG_FLAGS_DECL;
1650
a30b2f1f 1651 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9
JH
1652
1653 PL_regcc = 0;
1654
1655 cache_re(prog);
1656#ifdef DEBUGGING
aea4f609 1657 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1658#endif
1659
1660 /* Be paranoid... */
1661 if (prog == NULL || startpos == NULL) {
1662 Perl_croak(aTHX_ "NULL regexp parameter");
1663 return 0;
1664 }
1665
1666 minlen = prog->minlen;
61a36c01 1667 if (strend - startpos < minlen) {
a3621e74 1668 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
a72c7584
JH
1669 "String too short [regexec_flags]...\n"));
1670 goto phooey;
1aa99e6b 1671 }
6eb5f6b9 1672
6eb5f6b9
JH
1673 /* Check validity of program. */
1674 if (UCHARAT(prog->program) != REG_MAGIC) {
1675 Perl_croak(aTHX_ "corrupted regexp program");
1676 }
1677
1678 PL_reg_flags = 0;
1679 PL_reg_eval_set = 0;
1680 PL_reg_maxiter = 0;
1681
1682 if (prog->reganch & ROPT_UTF8)
1683 PL_reg_flags |= RF_utf8;
1684
1685 /* Mark beginning of line for ^ and lookbehind. */
1686 PL_regbol = startpos;
1687 PL_bostr = strbeg;
1688 PL_reg_sv = sv;
1689
1690 /* Mark end of line for $ (and such) */
1691 PL_regeol = strend;
1692
1693 /* see how far we have to get to not match where we matched before */
1694 PL_regtill = startpos+minend;
1695
1696 /* We start without call_cc context. */
1697 PL_reg_call_cc = 0;
1698
1699 /* If there is a "must appear" string, look for it. */
1700 s = startpos;
1701
1702 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1703 MAGIC *mg;
1704
1705 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1706 PL_reg_ganch = startpos;
1707 else if (sv && SvTYPE(sv) >= SVt_PVMG
1708 && SvMAGIC(sv)
14befaf4
DM
1709 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1710 && mg->mg_len >= 0) {
6eb5f6b9
JH
1711 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1712 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1713 if (s > PL_reg_ganch)
6eb5f6b9
JH
1714 goto phooey;
1715 s = PL_reg_ganch;
1716 }
1717 }
1718 else /* pos() not defined */
1719 PL_reg_ganch = strbeg;
1720 }
1721
33b8afdf 1722 if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
6eb5f6b9
JH
1723 re_scream_pos_data d;
1724
1725 d.scream_olds = &scream_olds;
1726 d.scream_pos = &scream_pos;
1727 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7 1728 if (!s) {
a3621e74 1729 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1730 goto phooey; /* not present */
3fa9c3d7 1731 }
6eb5f6b9
JH
1732 }
1733
a3621e74 1734 DEBUG_EXECUTE_r({
9e55ce06
JH
1735 char *s0 = UTF ?
1736 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1737 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1738 prog->precomp;
1739 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1740 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1741 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1742 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1743 if (!PL_colorset)
1744 reginitcolors();
1745 PerlIO_printf(Perl_debug_log,
9e55ce06 1746 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1747 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1748 len0, len0, s0,
2a782b5b 1749 PL_colors[1],
9e55ce06 1750 len0 > 60 ? "..." : "",
2a782b5b 1751 PL_colors[0],
9e55ce06
JH
1752 (int)(len1 > 60 ? 60 : len1),
1753 s1, PL_colors[1],
1754 (len1 > 60 ? "..." : "")
2a782b5b
JH
1755 );
1756 });
6eb5f6b9
JH
1757
1758 /* Simplest case: anchored match need be tried only once. */
1759 /* [unless only anchor is BOL and multiline is set] */
1760 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1761 if (s == startpos && regtry(prog, startpos))
1762 goto got_it;
7fba1cd6 1763 else if (multiline || (prog->reganch & ROPT_IMPLICIT)
6eb5f6b9
JH
1764 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1765 {
1766 char *end;
1767
1768 if (minlen)
1769 dontbother = minlen - 1;
1aa99e6b 1770 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1771 /* for multiline we only have to try after newlines */
33b8afdf 1772 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1773 if (s == startpos)
1774 goto after_try;
1775 while (1) {
1776 if (regtry(prog, s))
1777 goto got_it;
1778 after_try:
1779 if (s >= end)
1780 goto phooey;
1781 if (prog->reganch & RE_USE_INTUIT) {
1782 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1783 if (!s)
1784 goto phooey;
1785 }
1786 else
1787 s++;
1788 }
1789 } else {
1790 if (s > startpos)
1791 s--;
1792 while (s < end) {
1793 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1794 if (regtry(prog, s))
1795 goto got_it;
1796 }
1797 }
1798 }
1799 }
1800 goto phooey;
1801 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1802 if (regtry(prog, PL_reg_ganch))
1803 goto got_it;
1804 goto phooey;
1805 }
1806
1807 /* Messy cases: unanchored match. */
33b8afdf 1808 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1809 /* we have /x+whatever/ */
1810 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1811 char ch;
bf93d4cc
GS
1812#ifdef DEBUGGING
1813 int did_match = 0;
1814#endif
33b8afdf
JH
1815 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1816 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1817 ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1818
1aa99e6b 1819 if (do_utf8) {
6eb5f6b9
JH
1820 while (s < strend) {
1821 if (*s == ch) {
a3621e74 1822 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1823 if (regtry(prog, s)) goto got_it;
1824 s += UTF8SKIP(s);
1825 while (s < strend && *s == ch)
1826 s += UTF8SKIP(s);
1827 }
1828 s += UTF8SKIP(s);
1829 }
1830 }
1831 else {
1832 while (s < strend) {
1833 if (*s == ch) {
a3621e74 1834 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1835 if (regtry(prog, s)) goto got_it;
1836 s++;
1837 while (s < strend && *s == ch)
1838 s++;
1839 }
1840 s++;
1841 }
1842 }
a3621e74 1843 DEBUG_EXECUTE_r(if (!did_match)
bf93d4cc 1844 PerlIO_printf(Perl_debug_log,
b7953727
JH
1845 "Did not find anchored character...\n")
1846 );
6eb5f6b9
JH
1847 }
1848 /*SUPPRESS 560*/
33b8afdf
JH
1849 else if (prog->anchored_substr != Nullsv
1850 || prog->anchored_utf8 != Nullsv
1851 || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1852 && prog->float_max_offset < strend - s)) {
1853 SV *must;
1854 I32 back_max;
1855 I32 back_min;
1856 char *last;
6eb5f6b9 1857 char *last1; /* Last position checked before */
bf93d4cc
GS
1858#ifdef DEBUGGING
1859 int did_match = 0;
1860#endif
33b8afdf
JH
1861 if (prog->anchored_substr || prog->anchored_utf8) {
1862 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1863 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1864 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1865 back_max = back_min = prog->anchored_offset;
1866 } else {
1867 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1868 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1869 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1870 back_max = prog->float_max_offset;
1871 back_min = prog->float_min_offset;
1872 }
1873 if (must == &PL_sv_undef)
1874 /* could not downgrade utf8 check substring, so must fail */
1875 goto phooey;
1876
1877 last = HOP3c(strend, /* Cannot start after this */
1878 -(I32)(CHR_SVLEN(must)
1879 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1880
1881 if (s > PL_bostr)
1882 last1 = HOPc(s, -1);
1883 else
1884 last1 = s - 1; /* bogus */
1885
1886 /* XXXX check_substr already used to find `s', can optimize if
1887 check_substr==must. */
1888 scream_pos = -1;
1889 dontbother = end_shift;
1890 strend = HOPc(strend, -dontbother);
1891 while ( (s <= last) &&
9041c2e3 1892 ((flags & REXEC_SCREAM)
1aa99e6b 1893 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1894 end_shift, &scream_pos, 0))
1aa99e6b 1895 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1896 (unsigned char*)strend, must,
7fba1cd6 1897 multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1898 /* we may be pointing at the wrong string */
1899 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
7ef91622 1900 s = strbeg + (s - SvPVX(sv));
a3621e74 1901 DEBUG_EXECUTE_r( did_match = 1 );
6eb5f6b9
JH
1902 if (HOPc(s, -back_max) > last1) {
1903 last1 = HOPc(s, -back_min);
1904 s = HOPc(s, -back_max);
1905 }
1906 else {
1907 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1908
1909 last1 = HOPc(s, -back_min);
1910 s = t;
1911 }
1aa99e6b 1912 if (do_utf8) {
6eb5f6b9
JH
1913 while (s <= last1) {
1914 if (regtry(prog, s))
1915 goto got_it;
1916 s += UTF8SKIP(s);
1917 }
1918 }
1919 else {
1920 while (s <= last1) {
1921 if (regtry(prog, s))
1922 goto got_it;
1923 s++;
1924 }
1925 }
1926 }
a3621e74 1927 DEBUG_EXECUTE_r(if (!did_match)
b7953727
JH
1928 PerlIO_printf(Perl_debug_log,
1929 "Did not find %s substr `%s%.*s%s'%s...\n",
33b8afdf 1930 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1931 ? "anchored" : "floating"),
1932 PL_colors[0],
1933 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1934 SvPVX(must),
b7953727
JH
1935 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1936 );
6eb5f6b9
JH
1937 goto phooey;
1938 }
155aba94 1939 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1940 if (minlen) {
1941 I32 op = (U8)OP(prog->regstclass);
66e933ab 1942 /* don't bother with what can't match */
f14c76ed
RGS
1943 if (PL_regkind[op] != EXACT && op != CANY)
1944 strend = HOPc(strend, -(minlen - 1));
1945 }
a3621e74 1946 DEBUG_EXECUTE_r({
ffc61ed2 1947 SV *prop = sv_newmortal();
9e55ce06
JH
1948 char *s0;
1949 char *s1;
1950 int len0;
1951 int len1;
1952
ffc61ed2 1953 regprop(prop, c);
9e55ce06
JH
1954 s0 = UTF ?
1955 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1956 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1957 SvPVX(prop);
1958 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1959 s1 = UTF ?
c728cb41 1960 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1961 len1 = UTF ? SvCUR(dsv1) : strend - s;
1962 PerlIO_printf(Perl_debug_log,
1963 "Matching stclass `%*.*s' against `%*.*s'\n",
1964 len0, len0, s0,
1965 len1, len1, s1);
ffc61ed2 1966 });
6eb5f6b9
JH
1967 if (find_byclass(prog, c, s, strend, startpos, 0))
1968 goto got_it;
a3621e74 1969 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1970 }
1971 else {
1972 dontbother = 0;
33b8afdf
JH
1973 if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1974 /* Trim the end. */
d6a28714 1975 char *last;
33b8afdf
JH
1976 SV* float_real;
1977
1978 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1979 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1980 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1981
1982 if (flags & REXEC_SCREAM) {
33b8afdf 1983 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1984 end_shift, &scream_pos, 1); /* last one */
1985 if (!last)
ffc61ed2 1986 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1987 /* we may be pointing at the wrong string */
1988 else if (RX_MATCH_COPIED(prog))
7ef91622 1989 s = strbeg + (s - SvPVX(sv));
b8c5462f 1990 }
d6a28714
JH
1991 else {
1992 STRLEN len;
33b8afdf 1993 char *little = SvPV(float_real, len);
d6a28714 1994
33b8afdf 1995 if (SvTAIL(float_real)) {
d6a28714
JH
1996 if (memEQ(strend - len + 1, little, len - 1))
1997 last = strend - len + 1;
7fba1cd6 1998 else if (!multiline)
9041c2e3 1999 last = memEQ(strend - len, little, len)
d6a28714 2000 ? strend - len : Nullch;
b8c5462f 2001 else
d6a28714
JH
2002 goto find_last;
2003 } else {
2004 find_last:
9041c2e3 2005 if (len)
d6a28714 2006 last = rninstr(s, strend, little, little + len);
b8c5462f 2007 else
d6a28714 2008 last = strend; /* matching `$' */
b8c5462f 2009 }
b8c5462f 2010 }
bf93d4cc 2011 if (last == NULL) {
a3621e74 2012 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
bf93d4cc
GS
2013 "%sCan't trim the tail, match fails (should not happen)%s\n",
2014 PL_colors[4],PL_colors[5]));
2015 goto phooey; /* Should not happen! */
2016 }
d6a28714
JH
2017 dontbother = strend - last + prog->float_min_offset;
2018 }
2019 if (minlen && (dontbother < minlen))
2020 dontbother = minlen - 1;
2021 strend -= dontbother; /* this one's always in bytes! */
2022 /* We don't know much -- general case. */
1aa99e6b 2023 if (do_utf8) {
d6a28714
JH
2024 for (;;) {
2025 if (regtry(prog, s))
2026 goto got_it;
2027 if (s >= strend)
2028 break;
b8c5462f 2029 s += UTF8SKIP(s);
d6a28714
JH
2030 };
2031 }
2032 else {
2033 do {
2034 if (regtry(prog, s))
2035 goto got_it;
2036 } while (s++ < strend);
2037 }
2038 }
2039
2040 /* Failure. */
2041 goto phooey;
2042
2043got_it:
2044 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2045
2046 if (PL_reg_eval_set) {
2047 /* Preserve the current value of $^R */
2048 if (oreplsv != GvSV(PL_replgv))
2049 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2050 restored, the value remains
2051 the same. */
acfe0abc 2052 restore_pos(aTHX_ 0);
d6a28714
JH
2053 }
2054
2055 /* make sure $`, $&, $', and $digit will work later */
2056 if ( !(flags & REXEC_NOT_FIRST) ) {
ed252734 2057 RX_MATCH_COPY_FREE(prog);
d6a28714
JH
2058 if (flags & REXEC_COPY_STR) {
2059 I32 i = PL_regeol - startpos + (stringarg - strbeg);
ed252734
NC
2060#ifdef PERL_COPY_ON_WRITE
2061 if ((SvIsCOW(sv)
2062 || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2063 if (DEBUG_C_TEST) {
2064 PerlIO_printf(Perl_debug_log,
2065 "Copy on write: regexp capture, type %d\n",
2066 (int) SvTYPE(sv));
2067 }
2068 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2069 prog->subbeg = SvPVX(prog->saved_copy);
2070 assert (SvPOKp(prog->saved_copy));
2071 } else
2072#endif
2073 {
2074 RX_MATCH_COPIED_on(prog);
2075 s = savepvn(strbeg, i);
2076 prog->subbeg = s;
2077 }
d6a28714 2078 prog->sublen = i;
d6a28714
JH
2079 }
2080 else {
2081 prog->subbeg = strbeg;
2082 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2083 }
2084 }
9041c2e3 2085
d6a28714
JH
2086 return 1;
2087
2088phooey:
a3621e74 2089 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
bf93d4cc 2090 PL_colors[4],PL_colors[5]));
d6a28714 2091 if (PL_reg_eval_set)
acfe0abc 2092 restore_pos(aTHX_ 0);
d6a28714
JH
2093 return 0;
2094}
2095
2096/*
2097 - regtry - try match at specific point
2098 */
2099STATIC I32 /* 0 failure, 1 success */
2100S_regtry(pTHX_ regexp *prog, char *startpos)
2101{
d6a28714
JH
2102 register I32 i;
2103 register I32 *sp;
2104 register I32 *ep;
2105 CHECKPOINT lastcp;
a3621e74 2106 GET_RE_DEBUG_FLAGS_DECL;
d6a28714 2107
02db2b7b
IZ
2108#ifdef DEBUGGING
2109 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2110#endif
d6a28714
JH
2111 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2112 MAGIC *mg;
2113
2114 PL_reg_eval_set = RS_init;
a3621e74 2115 DEBUG_EXECUTE_r(DEBUG_s(
b900a521
JH
2116 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2117 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2118 ));
e8347627 2119 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2120 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2121 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2122 SAVETMPS;
2123 /* Apparently this is not needed, judging by wantarray. */
e8347627 2124 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2125 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2126
2127 if (PL_reg_sv) {
2128 /* Make $_ available to executed code. */
2129 if (PL_reg_sv != DEFSV) {
59f00321 2130 SAVE_DEFSV;
d6a28714 2131 DEFSV = PL_reg_sv;
b8c5462f 2132 }
d6a28714 2133
9041c2e3 2134 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2135 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2136 /* prepare for quick setting of pos */
14befaf4
DM
2137 sv_magic(PL_reg_sv, (SV*)0,
2138 PERL_MAGIC_regex_global, Nullch, 0);
2139 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 2140 mg->mg_len = -1;
b8c5462f 2141 }
d6a28714
JH
2142 PL_reg_magic = mg;
2143 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2144 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2145 }
09687e5a 2146 if (!PL_reg_curpm) {
0f79a09d 2147 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
2148#ifdef USE_ITHREADS
2149 {
2150 SV* repointer = newSViv(0);
577e12cc 2151 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2152 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2153 av_push(PL_regex_padav,repointer);
2154 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2155 PL_regex_pad = AvARRAY(PL_regex_padav);
2156 }
2157#endif
2158 }
aaa362c4 2159 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2160 PL_reg_oldcurpm = PL_curpm;
2161 PL_curpm = PL_reg_curpm;
2162 if (RX_MATCH_COPIED(prog)) {
2163 /* Here is a serious problem: we cannot rewrite subbeg,
2164 since it may be needed if this match fails. Thus
2165 $` inside (?{}) could fail... */
2166 PL_reg_oldsaved = prog->subbeg;
2167 PL_reg_oldsavedlen = prog->sublen;
ed252734
NC
2168#ifdef PERL_COPY_ON_WRITE
2169 PL_nrs = prog->saved_copy;
2170#endif
d6a28714
JH
2171 RX_MATCH_COPIED_off(prog);
2172 }
2173 else
2174 PL_reg_oldsaved = Nullch;
2175 prog->subbeg = PL_bostr;
2176 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2177 }
2178 prog->startp[0] = startpos - PL_bostr;
2179 PL_reginput = startpos;
2180 PL_regstartp = prog->startp;
2181 PL_regendp = prog->endp;
2182 PL_reglastparen = &prog->lastparen;
a01268b5 2183 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2184 prog->lastparen = 0;
03994de8 2185 prog->lastcloseparen = 0;
d6a28714 2186 PL_regsize = 0;
a3621e74 2187 DEBUG_EXECUTE_r(PL_reg_starttry = startpos);
d6a28714
JH
2188 if (PL_reg_start_tmpl <= prog->nparens) {
2189 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2190 if(PL_reg_start_tmp)
2191 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2192 else
2193 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2194 }
2195
2196 /* XXXX What this code is doing here?!!! There should be no need
2197 to do this again and again, PL_reglastparen should take care of
3dd2943c 2198 this! --ilya*/
dafc8851
JH
2199
2200 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2201 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2202 * PL_reglastparen), is not needed at all by the test suite
2203 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2204 * enough, for building DynaLoader, or otherwise this
2205 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2206 * will happen. Meanwhile, this code *is* needed for the
2207 * above-mentioned test suite tests to succeed. The common theme
2208 * on those tests seems to be returning null fields from matches.
2209 * --jhi */
dafc8851 2210#if 1
d6a28714
JH
2211 sp = prog->startp;
2212 ep = prog->endp;
2213 if (prog->nparens) {
eb160463 2214 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2215 *++sp = -1;
2216 *++ep = -1;
2217 }
2218 }
dafc8851 2219#endif
02db2b7b 2220 REGCP_SET(lastcp);
d6a28714
JH
2221 if (regmatch(prog->program + 1)) {
2222 prog->endp[0] = PL_reginput - PL_bostr;
2223 return 1;
2224 }
02db2b7b 2225 REGCP_UNWIND(lastcp);
d6a28714
JH
2226 return 0;
2227}
2228
02db2b7b
IZ
2229#define RE_UNWIND_BRANCH 1
2230#define RE_UNWIND_BRANCHJ 2
2231
2232union re_unwind_t;
2233
2234typedef struct { /* XX: makes sense to enlarge it... */
2235 I32 type;
2236 I32 prev;
2237 CHECKPOINT lastcp;
2238} re_unwind_generic_t;
2239
2240typedef struct {
2241 I32 type;
2242 I32 prev;
2243 CHECKPOINT lastcp;
2244 I32 lastparen;
2245 regnode *next;
2246 char *locinput;
2247 I32 nextchr;
2248#ifdef DEBUGGING
2249 int regindent;
2250#endif
2251} re_unwind_branch_t;
2252
2253typedef union re_unwind_t {
2254 I32 type;
2255 re_unwind_generic_t generic;
2256 re_unwind_branch_t branch;
2257} re_unwind_t;
2258
8ba1375e
MJD
2259#define sayYES goto yes
2260#define sayNO goto no
e0f9d4a8 2261#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2262#define sayYES_FINAL goto yes_final
2263#define sayYES_LOUD goto yes_loud
2264#define sayNO_FINAL goto no_final
2265#define sayNO_SILENT goto do_no
2266#define saySAME(x) if (x) goto yes; else goto no
2267
a3621e74
YO
2268/* this is used to determine how far from the left messages like
2269 'failed...' are printed. Currently 29 makes these messages line
2270 up with the opcode they refer to. Earlier perls used 25 which
2271 left these messages outdented making reviewing a debug output
2272 quite difficult.
2273*/
2274#define REPORT_CODE_OFF 29
2275
2276
2277/* Make sure there is a test for this +1 options in re_tests */
2278#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2279
2280#define TRIE_CHECK_STATE_IS_ACCEPTING STMT_START { \
2281 if ( trie->states[ state ].wordnum ) { \
2282 if ( !accepted ) { \
2283 ENTER; \
2284 SAVETMPS; \
2285 bufflen = TRIE_INITAL_ACCEPT_BUFFLEN ; \
2286 sv_accept_buff=NEWSV( 1234, \
2287 bufflen * sizeof(reg_trie_accepted) - 1 ); \
2288 SvCUR_set( sv_accept_buff, sizeof(reg_trie_accepted) ); \
2289 SvPOK_on( sv_accept_buff ); \
2290 sv_2mortal( sv_accept_buff ); \
2291 accept_buff = (reg_trie_accepted*)SvPV_nolen( sv_accept_buff );\
2292 } else { \
2293 if ( accepted >= bufflen ) { \
2294 bufflen *= 2; \
2295 accept_buff =(reg_trie_accepted*)SvGROW( sv_accept_buff, \
2296 bufflen * sizeof(reg_trie_accepted) ); \
2297 } \
2298 SvCUR_set( sv_accept_buff,SvCUR( sv_accept_buff ) \
2299 + sizeof( reg_trie_accepted ) ); \
2300 } \
2301 accept_buff[ accepted ].wordnum = trie->states[ state ].wordnum; \
2302 accept_buff[ accepted ].endpos = uc; \
2303 ++accepted; \
2304 } } STMT_END
2305
2306#define TRIE_HANDLE_CHAR STMT_START { \
2307 if ( uvc < 256 ) { \
2308 charid = trie->charmap[ uvc ]; \
2309 } else { \
2310 charid = 0; \
2311 if( trie->widecharmap ) { \
2312 SV** svpp = (SV**)NULL; \
2313 svpp = hv_fetch( trie->widecharmap, (char*)&uvc, \
2314 sizeof( UV ), 0 ); \
2315 if ( svpp ) { \
2316 charid = (U16)SvIV( *svpp ); \
2317 } \
2318 } \
2319 } \
2320 if ( charid && \
cc601c31
YO
2321 ( base + charid > trie->uniquecharcount ) && \
2322 ( base + charid - 1 - trie->uniquecharcount < trie->lasttrans) && \
a3621e74
YO
2323 trie->trans[ base + charid - 1 - trie->uniquecharcount ].check == state ) \
2324 { \
2325 state = trie->trans[ base + charid - 1 - trie->uniquecharcount ].next; \
2326 } else { \
2327 state = 0; \
2328 } \
2329 uc += len; \
2330 } STMT_END
8ba1375e 2331
d6a28714
JH
2332/*
2333 - regmatch - main matching routine
2334 *
2335 * Conceptually the strategy is simple: check to see whether the current
2336 * node matches, call self recursively to see whether the rest matches,
2337 * and then act accordingly. In practice we make some effort to avoid
2338 * recursion, in particular by going through "ordinary" nodes (that don't
2339 * need to know whether the rest of the match failed) by a loop instead of
2340 * by recursion.
2341 */
2342/* [lwall] I've hoisted the register declarations to the outer block in order to
2343 * maybe save a little bit of pushing and popping on the stack. It also takes
2344 * advantage of machines that use a register save mask on subroutine entry.
2345 */
2346STATIC I32 /* 0 failure, 1 success */
2347S_regmatch(pTHX_ regnode *prog)
2348{
d6a28714
JH
2349 register regnode *scan; /* Current node. */
2350 regnode *next; /* Next node. */
2351 regnode *inner; /* Next node in internal branch. */
2352 register I32 nextchr; /* renamed nextchr - nextchar colides with
2353 function of same name */
2354 register I32 n; /* no or next */
b7953727
JH
2355 register I32 ln = 0; /* len or last */
2356 register char *s = Nullch; /* operand or save */
d6a28714 2357 register char *locinput = PL_reginput;
b7953727 2358 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2359 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2360 I32 unwind = 0;
a3621e74
YO
2361
2362 /* used by the trie code */
2363 SV *sv_accept_buff; /* accepting states we have traversed */
2364 reg_trie_accepted *accept_buff; /* "" */
2365 reg_trie_data *trie; /* what trie are we using right now */
2366 U32 accepted = 0; /* how many accepting states we have seen*/
2367
b7953727 2368#if 0
02db2b7b 2369 I32 firstcp = PL_savestack_ix;
b7953727 2370#endif
53c4c00c 2371 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2372#ifdef DEBUGGING
ce333219
JH
2373 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2374 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2375 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
a3621e74
YO
2376
2377 SV *re_debug_flags;
2a782b5b 2378#endif
02db2b7b 2379
a3621e74
YO
2380 GET_RE_DEBUG_FLAGS;
2381
d6a28714
JH
2382#ifdef DEBUGGING
2383 PL_regindent++;
2384#endif
2385
a3621e74 2386
d6a28714
JH
2387 /* Note that nextchr is a byte even in UTF */
2388 nextchr = UCHARAT(locinput);
2389 scan = prog;
2390 while (scan != NULL) {
8ba1375e 2391
a3621e74 2392 DEBUG_EXECUTE_r( {
d6a28714
JH
2393 SV *prop = sv_newmortal();
2394 int docolor = *PL_colors[0];
2395 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2396 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2397 /* The part of the string before starttry has one color
2398 (pref0_len chars), between starttry and current
2399 position another one (pref_len - pref0_len chars),
2400 after the current position the third one.
2401 We assume that pref0_len <= pref_len, otherwise we
2402 decrease pref0_len. */
9041c2e3 2403 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2404 ? (5 + taill) - l : locinput - PL_bostr;
2405 int pref0_len;
d6a28714 2406
df1ffd02 2407 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2408 pref_len++;
2409 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2410 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2411 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2412 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2413 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2414 l--;
d6a28714
JH
2415 if (pref0_len < 0)
2416 pref0_len = 0;
2417 if (pref0_len > pref_len)
2418 pref0_len = pref_len;
2419 regprop(prop, scan);
2a782b5b
JH
2420 {
2421 char *s0 =
f14c76ed 2422 do_utf8 && OP(scan) != CANY ?
2a782b5b 2423 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2424 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2425 locinput - pref_len;
df1ffd02 2426 int len0 = do_utf8 ? strlen(s0) : pref0_len;
f14c76ed 2427 char *s1 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2428 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2429 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2430 locinput - pref_len + pref0_len;
df1ffd02 2431 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
f14c76ed 2432 char *s2 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2433 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2434 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2435 locinput;
df1ffd02 2436 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2437 PerlIO_printf(Perl_debug_log,
2438 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2439 (IV)(locinput - PL_bostr),
2440 PL_colors[4],
2441 len0, s0,
2442 PL_colors[5],
2443 PL_colors[2],
2444 len1, s1,
2445 PL_colors[3],
2446 (docolor ? "" : "> <"),
2447 PL_colors[0],
2448 len2, s2,
2449 PL_colors[1],
2450 15 - l - pref_len + 1,
2451 "",
2452 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2453 SvPVX(prop));
2454 }
2455 });
d6a28714
JH
2456
2457 next = scan + NEXT_OFF(scan);
2458 if (next == scan)
2459 next = NULL;
2460
2461 switch (OP(scan)) {
2462 case BOL:
7fba1cd6 2463 if (locinput == PL_bostr)
d6a28714
JH
2464 {
2465 /* regtill = regbol; */
b8c5462f
JH
2466 break;
2467 }
d6a28714
JH
2468 sayNO;
2469 case MBOL:
12d33761
HS
2470 if (locinput == PL_bostr ||
2471 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2472 {
b8c5462f
JH
2473 break;
2474 }
d6a28714
JH
2475 sayNO;
2476 case SBOL:
c2a73568 2477 if (locinput == PL_bostr)
b8c5462f 2478 break;
d6a28714
JH
2479 sayNO;
2480 case GPOS:
2481 if (locinput == PL_reg_ganch)
2482 break;
2483 sayNO;
2484 case EOL:
d6a28714
JH
2485 goto seol;
2486 case MEOL:
d6a28714 2487 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2488 sayNO;
b8c5462f 2489 break;
d6a28714
JH
2490 case SEOL:
2491 seol:
2492 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2493 sayNO;
d6a28714 2494 if (PL_regeol - locinput > 1)
b8c5462f 2495 sayNO;
b8c5462f 2496 break;
d6a28714
JH
2497 case EOS:
2498 if (PL_regeol != locinput)
b8c5462f 2499 sayNO;
d6a28714 2500 break;
ffc61ed2 2501 case SANY:
d6a28714 2502 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2503 sayNO;
f33976b4
DB
2504 if (do_utf8) {
2505 locinput += PL_utf8skip[nextchr];
2506 if (locinput > PL_regeol)
2507 sayNO;
2508 nextchr = UCHARAT(locinput);
2509 }
2510 else
2511 nextchr = UCHARAT(++locinput);
2512 break;
2513 case CANY:
2514 if (!nextchr && locinput >= PL_regeol)
2515 sayNO;
b8c5462f 2516 nextchr = UCHARAT(++locinput);
a0d0e21e 2517 break;
ffc61ed2 2518 case REG_ANY:
1aa99e6b
IH
2519 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2520 sayNO;
2521 if (do_utf8) {
b8c5462f 2522 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2523 if (locinput > PL_regeol)
2524 sayNO;
a0ed51b3 2525 nextchr = UCHARAT(locinput);
a0ed51b3 2526 }
1aa99e6b
IH
2527 else
2528 nextchr = UCHARAT(++locinput);
a0ed51b3 2529 break;
a3621e74
YO
2530
2531
2532
2533 /*
2534 traverse the TRIE keeping track of all accepting states
2535 we transition through until we get to a failing node.
2536
2537 we use two slightly different pieces of code to handle
2538 the traversal depending on whether its case sensitive or
2539 not. we reuse the accept code however. (this should probably
2540 be turned into a macro.)
2541
2542 */
2543 case TRIEF:
2544 case TRIEFL:
2545 {
2546
2547 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2548 U8 *uc = ( U8* )locinput;
2549 U32 state = 1;
2550 U16 charid = 0;
2551 U32 base = 0;
2552 UV uvc = 0;
2553 STRLEN len = 0;
2554 STRLEN foldlen = 0;
2555 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2556 U8 *uscan = (U8*)NULL;
2557 STRLEN bufflen=0;
2558 accepted = 0;
2559
2560 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2561
2562 while ( state && uc <= (U8*)PL_regeol ) {
2563
2564 TRIE_CHECK_STATE_IS_ACCEPTING;
2565
2566 base = trie->states[ state ].trans.base;
2567
2568 DEBUG_TRIE_EXECUTE_r(
2569 PerlIO_printf( Perl_debug_log,
2570 "%*s %sState: %4x, Base: %4x Accepted: %4x ",
2571 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2572 state, base, accepted );
2573 );
2574
2575 if ( base ) {
2576
2577 if ( do_utf8 || UTF ) {
2578 if ( foldlen>0 ) {
2579 uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );
2580 foldlen -= len;
2581 uscan += len;
2582 len=0;
2583 } else {
2584 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2585 uvc = to_uni_fold( uvc, foldbuf, &foldlen );
2586 foldlen -= UNISKIP( uvc );
2587 uscan = foldbuf + UNISKIP( uvc );
2588 }
2589 } else {
2590 uvc = (U32)*uc;
2591 len = 1;
2592 }
2593
2594 TRIE_HANDLE_CHAR;
2595
2596 } else {
2597 state = 0;
2598 }
2599 DEBUG_TRIE_EXECUTE_r(
2600 PerlIO_printf( Perl_debug_log,
2601 "Charid:%3x CV:%4x After State: %4x%s\n",
2602 charid, uvc, state, PL_colors[5] );
2603 );
2604 }
2605 if ( !accepted ) {
2606 sayNO;
2607 } else {
2608 goto TrieAccept;
2609 }
2610 }
2611 /* unreached codepoint: we jump into the middle of the next case
2612 from previous if blocks */
2613 case TRIE:
2614 {
2615 U32 uniflags = ckWARN( WARN_UTF8 ) ? 0 : UTF8_ALLOW_ANY;
2616 U8 *uc = (U8*)locinput;
2617 U32 state = 1;
2618 U16 charid = 0;
2619 U32 base = 0;
2620 UV uvc = 0;
2621 STRLEN len = 0;
2622 STRLEN bufflen = 0;
2623 accepted = 0;
2624
2625 trie = (reg_trie_data*)PL_regdata->data[ ARG( scan ) ];
2626
2627 while ( state && uc <= (U8*)PL_regeol ) {
2628
2629 TRIE_CHECK_STATE_IS_ACCEPTING;
2630
2631 base = trie->states[ state ].trans.base;
2632
2633 DEBUG_TRIE_EXECUTE_r(
2634 PerlIO_printf( Perl_debug_log,
2635 "%*s %sState: %4x, Base: %4x Accepted: %4x ",
2636 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2637 state, base, accepted );
2638 );
2639
2640 if ( base ) {
2641
2642 if ( do_utf8 || UTF ) {
2643 uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );
2644 } else {
2645 uvc = (U32)*uc;
2646 len = 1;
2647 }
2648
2649 TRIE_HANDLE_CHAR;
2650
2651 } else {
2652 state = 0;
2653 }
2654 DEBUG_TRIE_EXECUTE_r(
2655 PerlIO_printf( Perl_debug_log,
2656 "Charid:%3x CV:%4x After State: %4x%s\n",
2657 charid, uvc, state, PL_colors[5] );
2658 );
2659 }
2660 if ( !accepted ) {
2661 sayNO;
2662 }
2663 }
2664
2665
2666 /*
2667 There was at least one accepting state that we
2668 transitioned through. Presumably the number of accepting
2669 states is going to be low, typically one or two. So we
2670 simply scan through to find the one with lowest wordnum.
2671 Once we find it, we swap the last state into its place
2672 and decrement the size. We then try to match the rest of
2673 the pattern at the point where the word ends, if we
2674 succeed then we end the loop, otherwise the loop
2675 eventually terminates once all of the accepting states
2676 have been tried.
2677 */
2678 TrieAccept:
2679 {
2680 int gotit = 0;
2681
2682 if ( accepted == 1 ) {
2683 DEBUG_EXECUTE_r({
2684 SV **tmp = av_fetch( trie->words, accept_buff[ 0 ].wordnum-1, 0 );
2685 PerlIO_printf( Perl_debug_log,
2686 "%*s %sonly one match : #%d <%s>%s\n",
2687 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2688 accept_buff[ 0 ].wordnum,
2689 tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",
2690 PL_colors[5] );
2691 });
cc601c31 2692 PL_reginput = (char *)accept_buff[ 0 ].endpos;
a3621e74
YO
2693 /* in this case we free tmps/leave before we call regmatch
2694 as we wont be using accept_buff again. */
2695 FREETMPS;
2696 LEAVE;
2697 gotit = regmatch( scan + NEXT_OFF( scan ) );
2698 } else {
2699 DEBUG_EXECUTE_r(
2700 PerlIO_printf( Perl_debug_log,"%*s %sgot %d possible matches%s\n",
2701 REPORT_CODE_OFF + PL_regindent * 2, "",PL_colors[4], accepted,
2702 PL_colors[5] );
2703 );
2704 while ( !gotit && accepted-- ) {
2705 U32 best = 0;
2706 U32 cur;
2707 for( cur = 1 ; cur <= accepted ; cur++ ) {
2708 DEBUG_TRIE_EXECUTE_r(
2709 PerlIO_printf( Perl_debug_log,
2710 "%*s %sgot %d (%d) as best, looking at %d (%d)%s\n",
2711 REPORT_CODE_OFF + PL_regindent * 2, "", PL_colors[4],
2712 best, accept_buff[ best ].wordnum, cur,
2713 accept_buff[ cur ].wordnum, PL_colors[5] );
2714 );
2715
2716 if ( accept_buff[ cur ].wordnum < accept_buff[ best ].wordnum )
2717 best = cur;
2718 }
2719 DEBUG_EXECUTE_r({
2720 SV **tmp = av_fetch( trie->words, accept_buff[ best ].wordnum - 1, 0 );
2721 PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at 0x%p%s\n",
2722 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],
2723 accept_buff[best].wordnum,
2724 tmp ? SvPV_nolen( *tmp ) : "not compiled under -Dr",scan,
2725 PL_colors[5] );
2726 });
2727 if ( best<accepted ) {
2728 reg_trie_accepted tmp = accept_buff[ best ];
2729 accept_buff[ best ] = accept_buff[ accepted ];
2730 accept_buff[ accepted ] = tmp;
2731 best = accepted;
2732 }
cc601c31 2733 PL_reginput = (char *)accept_buff[ best ].endpos;
a3621e74
YO
2734
2735 /*
2736 as far as I can tell we only need the SAVETMPS/FREETMPS
2737 for re's with EVAL in them but I'm leaving them in for
2738 all until I can be sure.
2739 */
2740 SAVETMPS;
2741 gotit = regmatch( scan + NEXT_OFF( scan ) ) ;
2742 FREETMPS;
2743 }
2744 FREETMPS;
2745 LEAVE;
2746 }
2747
2748 if ( gotit ) {
2749 sayYES;
2750 } else {
2751 sayNO;
2752 }
2753 }
2754 /* unreached codepoint */
d6a28714 2755 case EXACT:
cd439c50
IZ
2756 s = STRING(scan);
2757 ln = STR_LEN(scan);
eb160463 2758 if (do_utf8 != UTF) {
bc517b45 2759 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2760 char *l = locinput;
2761 char *e = s + ln;
bc517b45 2762 STRLEN ulen;
a72c7584 2763
5ff6fc6d
JH
2764 if (do_utf8) {
2765 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2766 while (s < e) {
2767 if (l >= PL_regeol)
5ff6fc6d
JH
2768 sayNO;
2769 if (NATIVE_TO_UNI(*(U8*)s) !=
89ebb4a3 2770 utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
872c91ae
JH
2771 ckWARN(WARN_UTF8) ?
2772 0 : UTF8_ALLOW_ANY))
5ff6fc6d 2773 sayNO;
bc517b45 2774 l += ulen;
5ff6fc6d 2775 s ++;
1aa99e6b 2776 }
5ff6fc6d
JH
2777 }
2778 else {
2779 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2780 while (s < e) {
2781 if (l >= PL_regeol)
2782 sayNO;
5ff6fc6d 2783 if (NATIVE_TO_UNI(*((U8*)l)) !=
89ebb4a3 2784 utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
872c91ae
JH
2785 ckWARN(WARN_UTF8) ?
2786 0 : UTF8_ALLOW_ANY))
1aa99e6b 2787 sayNO;
bc517b45 2788 s += ulen;
a72c7584 2789 l ++;
1aa99e6b 2790 }
5ff6fc6d 2791 }
1aa99e6b
IH
2792 locinput = l;
2793 nextchr = UCHARAT(locinput);
2794 break;
2795 }
bc517b45 2796 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2797 /* Inline the first character, for speed. */
2798 if (UCHARAT(s) != nextchr)
2799 sayNO;
2800 if (PL_regeol - locinput < ln)
2801 sayNO;
2802 if (ln > 1 && memNE(s, locinput, ln))
2803 sayNO;
2804 locinput += ln;
2805 nextchr = UCHARAT(locinput);
2806 break;
2807 case EXACTFL:
b8c5462f
JH
2808 PL_reg_flags |= RF_tainted;
2809 /* FALL THROUGH */
d6a28714 2810 case EXACTF:
cd439c50
IZ
2811 s = STRING(scan);
2812 ln = STR_LEN(scan);
d6a28714 2813
d07ddd77
JH
2814 if (do_utf8 || UTF) {
2815 /* Either target or the pattern are utf8. */
d6a28714 2816 char *l = locinput;
d07ddd77 2817 char *e = PL_regeol;
bc517b45 2818
eb160463 2819 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 2820 l, &e, 0, do_utf8)) {
5486206c
JH
2821 /* One more case for the sharp s:
2822 * pack("U0U*", 0xDF) =~ /ss/i,
2823 * the 0xC3 0x9F are the UTF-8
2824 * byte sequence for the U+00DF. */
2825 if (!(do_utf8 &&
2826 toLOWER(s[0]) == 's' &&
2827 ln >= 2 &&
2828 toLOWER(s[1]) == 's' &&
2829 (U8)l[0] == 0xC3 &&
2830 e - l >= 2 &&
2831 (U8)l[1] == 0x9F))
2832 sayNO;
2833 }
d07ddd77
JH
2834 locinput = e;
2835 nextchr = UCHARAT(locinput);
2836 break;
a0ed51b3 2837 }
d6a28714 2838
bc517b45
JH
2839 /* Neither the target and the pattern are utf8. */
2840
d6a28714
JH
2841 /* Inline the first character, for speed. */
2842 if (UCHARAT(s) != nextchr &&
2843 UCHARAT(s) != ((OP(scan) == EXACTF)
2844 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2845 sayNO;
d6a28714 2846 if (PL_regeol - locinput < ln)
b8c5462f 2847 sayNO;
d6a28714
JH
2848 if (ln > 1 && (OP(scan) == EXACTF
2849 ? ibcmp(s, locinput, ln)
2850 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2851 sayNO;
d6a28714
JH
2852 locinput += ln;
2853 nextchr = UCHARAT(locinput);
a0d0e21e 2854 break;
d6a28714 2855 case ANYOF:
ffc61ed2 2856 if (do_utf8) {
9e55ce06
JH
2857 STRLEN inclasslen = PL_regeol - locinput;
2858
ba7b4546 2859 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2860 sayNO_ANYOF;
ffc61ed2
JH
2861 if (locinput >= PL_regeol)
2862 sayNO;
0f0076b4 2863 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2864 nextchr = UCHARAT(locinput);
e0f9d4a8 2865 break;
ffc61ed2
JH
2866 }
2867 else {
2868 if (nextchr < 0)
2869 nextchr = UCHARAT(locinput);
7d3e948e 2870 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2871 sayNO_ANYOF;
ffc61ed2
JH
2872 if (!nextchr && locinput >= PL_regeol)
2873 sayNO;
2874 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2875 break;
2876 }
2877 no_anyof:
2878 /* If we might have the case of the German sharp s
2879 * in a casefolding Unicode character class. */
2880
ebc501f0
JH
2881 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2882 locinput += SHARP_S_SKIP;
e0f9d4a8 2883 nextchr = UCHARAT(locinput);
ffc61ed2 2884 }
e0f9d4a8
JH
2885 else
2886 sayNO;
b8c5462f 2887 break;
d6a28714 2888 case ALNUML:
b8c5462f
JH
2889 PL_reg_flags |= RF_tainted;
2890 /* FALL THROUGH */
d6a28714 2891 case ALNUM:
b8c5462f 2892 if (!nextchr)
4633a7c4 2893 sayNO;
ffc61ed2 2894 if (do_utf8) {
ad24be35 2895 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2896 if (!(OP(scan) == ALNUM
3568d838 2897 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2898 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2899 {
2900 sayNO;
a0ed51b3 2901 }
b8c5462f 2902 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2903 nextchr = UCHARAT(locinput);
2904 break;
2905 }
ffc61ed2 2906 if (!(OP(scan) == ALNUM
d6a28714 2907 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2908 sayNO;
b8c5462f 2909 nextchr = UCHARAT(++locinput);
a0d0e21e 2910 break;
d6a28714 2911 case NALNUML:
b8c5462f
JH
2912 PL_reg_flags |= RF_tainted;
2913 /* FALL THROUGH */
d6a28714
JH
2914 case NALNUM:
2915 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2916 sayNO;
ffc61ed2 2917 if (do_utf8) {
8269fa76 2918 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2919 if (OP(scan) == NALNUM
3568d838 2920 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2921 : isALNUM_LC_utf8((U8*)locinput))
2922 {
b8c5462f 2923 sayNO;
d6a28714 2924 }
b8c5462f
JH
2925 locinput += PL_utf8skip[nextchr];
2926 nextchr = UCHARAT(locinput);
2927 break;
2928 }
ffc61ed2 2929 if (OP(scan) == NALNUM
d6a28714 2930 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2931 sayNO;
76e3520e 2932 nextchr = UCHARAT(++locinput);
a0d0e21e 2933 break;
d6a28714
JH
2934 case BOUNDL:
2935 case NBOUNDL:
3280af22 2936 PL_reg_flags |= RF_tainted;
bbce6d69 2937 /* FALL THROUGH */
d6a28714
JH
2938 case BOUND:
2939 case NBOUND:
2940 /* was last char in word? */
ffc61ed2 2941 if (do_utf8) {
12d33761
HS
2942 if (locinput == PL_bostr)
2943 ln = '\n';
ffc61ed2 2944 else {
b4f7163a 2945 U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 2946
b4f7163a 2947 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
2948 }
2949 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2950 ln = isALNUM_uni(ln);
8269fa76 2951 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2952 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2953 }
2954 else {
9041c2e3 2955 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2956 n = isALNUM_LC_utf8((U8*)locinput);
2957 }
a0ed51b3 2958 }
d6a28714 2959 else {
12d33761
HS
2960 ln = (locinput != PL_bostr) ?
2961 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2962 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2963 ln = isALNUM(ln);
2964 n = isALNUM(nextchr);
2965 }
2966 else {
2967 ln = isALNUM_LC(ln);
2968 n = isALNUM_LC(nextchr);
2969 }
d6a28714 2970 }
ffc61ed2
JH
2971 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2972 OP(scan) == BOUNDL))
2973 sayNO;
a0ed51b3 2974 break;
d6a28714 2975 case SPACEL:
3280af22 2976 PL_reg_flags |= RF_tainted;
bbce6d69 2977 /* FALL THROUGH */
d6a28714 2978 case SPACE:
9442cb0e 2979 if (!nextchr)
4633a7c4 2980 sayNO;
1aa99e6b 2981 if (do_utf8) {
fd400ab9 2982 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2983 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2984 if (!(OP(scan) == SPACE
3568d838 2985 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2986 : isSPACE_LC_utf8((U8*)locinput)))
2987 {
2988 sayNO;
2989 }
2990 locinput += PL_utf8skip[nextchr];
2991 nextchr = UCHARAT(locinput);
2992 break;
d6a28714 2993 }
ffc61ed2
JH
2994 if (!(OP(scan) == SPACE
2995 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2996 sayNO;
2997 nextchr = UCHARAT(++locinput);
2998 }
2999 else {
3000 if (!(OP(scan) == SPACE
3001 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3002 sayNO;
3003 nextchr = UCHARAT(++locinput);
a0ed51b3 3004 }
a0ed51b3 3005 break;
d6a28714 3006 case NSPACEL:
3280af22 3007 PL_reg_flags |= RF_tainted;
bbce6d69 3008 /* FALL THROUGH */
d6a28714 3009 case NSPACE:
9442cb0e 3010 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3011 sayNO;
1aa99e6b 3012 if (do_utf8) {
8269fa76 3013 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 3014 if (OP(scan) == NSPACE
3568d838 3015 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 3016 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
3017 {
3018 sayNO;
3019 }
3020 locinput += PL_utf8skip[nextchr];
3021 nextchr = UCHARAT(locinput);
3022 break;
a0ed51b3 3023 }
ffc61ed2 3024 if (OP(scan) == NSPACE
d6a28714 3025 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 3026 sayNO;
76e3520e 3027 nextchr = UCHARAT(++locinput);
a0d0e21e 3028 break;
d6a28714 3029 case DIGITL:
a0ed51b3
LW
3030 PL_reg_flags |= RF_tainted;
3031 /* FALL THROUGH */
d6a28714 3032 case DIGIT:
9442cb0e 3033 if (!nextchr)
a0ed51b3 3034 sayNO;
1aa99e6b 3035 if (do_utf8) {
8269fa76 3036 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 3037 if (!(OP(scan) == DIGIT
3568d838 3038 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 3039 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 3040 {
a0ed51b3 3041 sayNO;
dfe13c55 3042 }
6f06b55f 3043 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3044 nextchr = UCHARAT(locinput);
3045 break;
3046 }
ffc61ed2 3047 if (!(OP(scan) == DIGIT
9442cb0e 3048 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
3049 sayNO;
3050 nextchr = UCHARAT(++locinput);
3051 break;
d6a28714 3052 case NDIGITL:
b8c5462f
JH
3053 PL_reg_flags |= RF_tainted;
3054 /* FALL THROUGH */
d6a28714 3055 case NDIGIT:
9442cb0e 3056 if (!nextchr && locinput >= PL_regeol)
b8c5462f 3057 sayNO;
1aa99e6b 3058 if (do_utf8) {
8269fa76 3059 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 3060 if (OP(scan) == NDIGIT
3568d838 3061 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
3062 : isDIGIT_LC_utf8((U8*)locinput))
3063 {
a0ed51b3 3064 sayNO;
9442cb0e 3065 }
6f06b55f 3066 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
3067 nextchr = UCHARAT(locinput);
3068 break;
3069 }
ffc61ed2 3070 if (OP(scan) == NDIGIT
9442cb0e 3071 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
3072 sayNO;
3073 nextchr = UCHARAT(++locinput);
3074 break;
3075 case CLUMP:
b7c83a7e 3076 if (locinput >= PL_regeol)
a0ed51b3 3077 sayNO;
b7c83a7e
JH
3078 if (do_utf8) {
3079 LOAD_UTF8_CHARCLASS(mark,"~");
3080 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3081 sayNO;
3082 locinput += PL_utf8skip[nextchr];
3083 while (locinput < PL_regeol &&
3084 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3085 locinput += UTF8SKIP(locinput);
3086 if (locinput > PL_regeol)
3087 sayNO;
eb08e2da
JH
3088 }
3089 else
3090 locinput++;
a0ed51b3
LW
3091 nextchr = UCHARAT(locinput);
3092 break;
c8756f30 3093 case REFFL:
3280af22 3094 PL_reg_flags |= RF_tainted;
c8756f30 3095 /* FALL THROUGH */
c277df42 3096 case REF:
c8756f30 3097 case REFF:
c277df42 3098 n = ARG(scan); /* which paren pair */
cf93c79d 3099 ln = PL_regstartp[n];
2c2d71f5 3100 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
eb160463 3101 if ((I32)*PL_reglastparen < n || ln == -1)
af3f8c16 3102 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 3103 if (ln == PL_regendp[n])
a0d0e21e 3104 break;
a0ed51b3 3105
cf93c79d 3106 s = PL_bostr + ln;
1aa99e6b 3107 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 3108 char *l = locinput;
cf93c79d 3109 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
3110 /*
3111 * Note that we can't do the "other character" lookup trick as
3112 * in the 8-bit case (no pun intended) because in Unicode we
3113 * have to map both upper and title case to lower case.
3114 */
3115 if (OP(scan) == REFF) {
a2a2844f 3116 STRLEN ulen1, ulen2;
89ebb4a3
JH
3117 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3118 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a0ed51b3
LW
3119 while (s < e) {
3120 if (l >= PL_regeol)
3121 sayNO;
a2a2844f
JH
3122 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3123 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 3124 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 3125 sayNO;
a2a2844f
JH
3126 s += ulen1;
3127 l += ulen2;
a0ed51b3
LW
3128 }
3129 }
3130 locinput = l;
3131 nextchr = UCHARAT(locinput);
3132 break;
3133 }
3134
a0d0e21e 3135 /* Inline the first character, for speed. */
76e3520e 3136 if (UCHARAT(s) != nextchr &&
c8756f30
AK
3137 (OP(scan) == REF ||
3138 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 3139 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 3140 sayNO;
cf93c79d 3141 ln = PL_regendp[n] - ln;
3280af22 3142 if (locinput + ln > PL_regeol)
4633a7c4 3143 sayNO;
c8756f30
AK
3144 if (ln > 1 && (OP(scan) == REF
3145 ? memNE(s, locinput, ln)
3146 : (OP(scan) == REFF
3147 ? ibcmp(s, locinput, ln)
3148 : ibcmp_locale(s, locinput, ln))))
4633a7c4 3149 sayNO;
a0d0e21e 3150 locinput += ln;
76e3520e 3151 nextchr = UCHARAT(locinput);
a0d0e21e
LW
3152 break;
3153
3154 case NOTHING:
c277df42 3155 case TAIL:
a0d0e21e
LW
3156 break;
3157 case BACK:
3158 break;
c277df42
IZ
3159 case EVAL:
3160 {
3161 dSP;
533c011a 3162 OP_4tree *oop = PL_op;
3280af22 3163 COP *ocurcop = PL_curcop;
f3548bdc 3164 PAD *old_comppad;
c277df42 3165 SV *ret;
080c2dec 3166 struct regexp *oreg = PL_reg_re;
9041c2e3 3167
c277df42 3168 n = ARG(scan);
533c011a 3169 PL_op = (OP_4tree*)PL_regdata->data[n];
a3621e74 3170 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f3548bdc 3171 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 3172 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 3173
8e5e9ebe
RGS
3174 {
3175 SV **before = SP;
3176 CALLRUNOPS(aTHX); /* Scalar context. */
3177 SPAGAIN;
3178 if (SP == before)
075aa684 3179 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
3180 else {
3181 ret = POPs;
3182 PUTBACK;
3183 }
3184 }
3185
0f5d15d6 3186 PL_op = oop;
f3548bdc 3187 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 3188 PL_curcop = ocurcop;
c277df42 3189 if (logical) {
0f5d15d6
IZ
3190 if (logical == 2) { /* Postponed subexpression. */
3191 regexp *re;
22c35a8c 3192 MAGIC *mg = Null(MAGIC*);
0f5d15d6 3193 re_cc_state state;
0f5d15d6 3194 CHECKPOINT cp, lastcp;
cb50f42d 3195 int toggleutf;
faf82a0b 3196 register SV *sv;
0f5d15d6 3197
faf82a0b
AE
3198 if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3199 mg = mg_find(sv, PERL_MAGIC_qr);
3200 else if (SvSMAGICAL(ret)) {
3201 if (SvGMAGICAL(ret))
3202 sv_unmagic(ret, PERL_MAGIC_qr);
3203 else
3204 mg = mg_find(ret, PERL_MAGIC_qr);
0f5d15d6 3205 }
faf82a0b 3206
0f5d15d6
IZ
3207 if (mg) {
3208 re = (regexp *)mg->mg_obj;
df0003d4 3209 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
3210 }
3211 else {
3212 STRLEN len;
3213 char *t = SvPV(ret, len);
3214 PMOP pm;
3215 char *oprecomp = PL_regprecomp;
3216 I32 osize = PL_regsize;
3217 I32 onpar = PL_regnpar;
3218
5fcd1c1b 3219 Zero(&pm, 1, PMOP);
cb50f42d 3220 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
cea2e8a9 3221 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 3222 if (!(SvFLAGS(ret)
faf82a0b
AE
3223 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3224 | SVs_GMG)))
14befaf4
DM
3225 sv_magic(ret,(SV*)ReREFCNT_inc(re),
3226 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
3227 PL_regprecomp = oprecomp;
3228 PL_regsize = osize;
3229 PL_regnpar = onpar;
3230 }
a3621e74 3231 DEBUG_EXECUTE_r(
9041c2e3 3232 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
3233 "Entering embedded `%s%.60s%s%s'\n",
3234 PL_colors[0],
3235 re->precomp,
3236 PL_colors[1],
3237 (strlen(re->precomp) > 60 ? "..." : ""))
3238 );
3239 state.node = next;
3240 state.prev = PL_reg_call_cc;
3241 state.cc = PL_regcc;
3242 state.re = PL_reg_re;
3243
2ab05381 3244 PL_regcc = 0;
9041c2e3 3245
0f5d15d6 3246 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3247 REGCP_SET(lastcp);
0f5d15d6
IZ
3248 cache_re(re);
3249 state.ss = PL_savestack_ix;
3250 *PL_reglastparen = 0;
a01268b5 3251 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
3252 PL_reg_call_cc = &state;
3253 PL_reginput = locinput;
cb50f42d
YST
3254 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
3255 ((re->reganch & ROPT_UTF8) != 0);
3256 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3257
3258 /* XXXX This is too dramatic a measure... */
3259 PL_reg_maxiter = 0;
3260
0f5d15d6 3261 if (regmatch(re->program + 1)) {
2c914db6
IZ
3262 /* Even though we succeeded, we need to restore
3263 global variables, since we may be wrapped inside
3264 SUSPEND, thus the match may be not finished yet. */
3265
3266 /* XXXX Do this only if SUSPENDed? */
3267 PL_reg_call_cc = state.prev;
3268 PL_regcc = state.cc;
3269 PL_reg_re = state.re;
3270 cache_re(PL_reg_re);
cb50f42d 3271 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
3272
3273 /* XXXX This is too dramatic a measure... */
3274 PL_reg_maxiter = 0;
3275
3276 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
3277 ReREFCNT_dec(re);
3278 regcpblow(cp);
3279 sayYES;
3280 }
0f5d15d6 3281 ReREFCNT_dec(re);
02db2b7b 3282 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3283 regcppop();
3284 PL_reg_call_cc = state.prev;
3285 PL_regcc = state.cc;
3286 PL_reg_re = state.re;
d3790889 3287 cache_re(PL_reg_re);
cb50f42d 3288 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
3289
3290 /* XXXX This is too dramatic a measure... */
3291 PL_reg_maxiter = 0;
3292
8e514ae6 3293 logical = 0;
0f5d15d6
IZ
3294 sayNO;
3295 }
c277df42 3296 sw = SvTRUE(ret);
0f5d15d6 3297 logical = 0;
a0ed51b3 3298 }
080c2dec 3299 else {
3280af22 3300 sv_setsv(save_scalar(PL_replgv), ret);
080c2dec
AE
3301 cache_re(oreg);
3302 }
c277df42
IZ
3303 break;
3304 }
a0d0e21e 3305 case OPEN:
c277df42 3306 n = ARG(scan); /* which paren pair */
3280af22
NIS
3307 PL_reg_start_tmp[n] = locinput;
3308 if (n > PL_regsize)
3309 PL_regsize = n;
a0d0e21e
LW
3310 break;
3311 case CLOSE:
c277df42 3312 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
3313 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
3314 PL_regendp[n] = locinput - PL_bostr;
eb160463 3315 if (n > (I32)*PL_reglastparen)
3280af22 3316 *PL_reglastparen = n;
a01268b5 3317 *PL_reglastcloseparen = n;
a0d0e21e 3318 break;
c277df42
IZ
3319 case GROUPP:
3320 n = ARG(scan); /* which paren pair */
eb160463 3321 sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
3322 break;
3323 case IFTHEN:
2c2d71f5 3324 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
3325 if (sw)
3326 next = NEXTOPER(NEXTOPER(scan));
3327 else {
3328 next = scan + ARG(scan);
3329 if (OP(next) == IFTHEN) /* Fake one. */
3330 next = NEXTOPER(NEXTOPER(next));
3331 }
3332 break;
3333 case LOGICAL:
0f5d15d6 3334 logical = scan->flags;
c277df42 3335 break;
2ab05381
IZ
3336/*******************************************************************
3337 PL_regcc contains infoblock about the innermost (...)* loop, and
3338 a pointer to the next outer infoblock.
3339
3340 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
3341
3342 1) After matching X, regnode for CURLYX is processed;
3343
9041c2e3 3344 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
3345 regmatch() recursively with the starting point at WHILEM node;
3346
3347 3) Each hit of WHILEM node tries to match A and Z (in the order
3348 depending on the current iteration, min/max of {min,max} and
3349 greediness). The information about where are nodes for "A"
3350 and "Z" is read from the infoblock, as is info on how many times "A"
3351 was already matched, and greediness.
3352
3353 4) After A matches, the same WHILEM node is hit again.
3354
3355 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3356 of the same pair. Thus when WHILEM tries to match Z, it temporarily
3357 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3358 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3359 of the external loop.
3360
3361 Currently present infoblocks form a tree with a stem formed by PL_curcc
3362 and whatever it mentions via ->next, and additional attached trees
3363 corresponding to temporarily unset infoblocks as in "5" above.
3364
9041c2e3 3365 In the following picture infoblocks for outer loop of
2ab05381
IZ
3366 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3367 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3368 infoblocks are drawn below the "reset" infoblock.
3369
3370 In fact in the picture below we do not show failed matches for Z and T
3371 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3372 more obvious *why* one needs to *temporary* unset infoblocks.]
3373
3374 Matched REx position InfoBlocks Comment
3375 (Y(A)*?Z)*?T x
3376 Y(A)*?Z)*?T x <- O
3377 Y (A)*?Z)*?T x <- O
3378 Y A)*?Z)*?T x <- O <- I
3379 YA )*?Z)*?T x <- O <- I
3380 YA A)*?Z)*?T x <- O <- I
3381 YAA )*?Z)*?T x <- O <- I
3382 YAA Z)*?T x <- O # Temporary unset I
3383 I
3384
3385 YAAZ Y(A)*?Z)*?T x <- O
3386 I
3387
3388 YAAZY (A)*?Z)*?T x <- O
3389 I
3390
3391 YAAZY A)*?Z)*?T x <- O <- I
3392 I
3393
3394 YAAZYA )*?Z)*?T x <- O <- I
3395 I
3396
3397 YAAZYA Z)*?T x <- O # Temporary unset I
3398 I,I
3399
3400 YAAZYAZ )*?T x <- O
3401 I,I
3402
3403 YAAZYAZ T x # Temporary unset O
3404 O
3405 I,I
3406
3407 YAAZYAZT x
3408 O
3409 I,I
3410 *******************************************************************/
a0d0e21e
LW
3411 case CURLYX: {
3412 CURCUR cc;
3280af22 3413 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
3414 /* No need to save/restore up to this paren */
3415 I32 parenfloor = scan->flags;
c277df42
IZ
3416
3417 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3418 next += ARG(next);
3280af22
NIS
3419 cc.oldcc = PL_regcc;
3420 PL_regcc = &cc;
cb434fcc
IZ
3421 /* XXXX Probably it is better to teach regpush to support
3422 parenfloor > PL_regsize... */
eb160463 3423 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc
IZ
3424 parenfloor = *PL_reglastparen; /* Pessimization... */
3425 cc.parenfloor = parenfloor;
a0d0e21e
LW
3426 cc.cur = -1;
3427 cc.min = ARG1(scan);
3428 cc.max = ARG2(scan);
c277df42 3429 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3430 cc.next = next;
3431 cc.minmod = minmod;
3432 cc.lastloc = 0;
3280af22 3433 PL_reginput = locinput;
a0d0e21e
LW
3434 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3435 regcpblow(cp);
3280af22 3436 PL_regcc = cc.oldcc;
4633a7c4 3437 saySAME(n);
a0d0e21e
LW
3438 }
3439 /* NOT REACHED */
3440 case WHILEM: {
3441 /*
3442 * This is really hard to understand, because after we match
3443 * what we're trying to match, we must make sure the rest of
2c2d71f5 3444 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3445 * to go back UP the parse tree by recursing ever deeper. And
3446 * if it fails, we have to reset our parent's current state
3447 * that we can try again after backing off.
3448 */
3449
c277df42 3450 CHECKPOINT cp, lastcp;
3280af22 3451 CURCUR* cc = PL_regcc;
c277df42
IZ
3452 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3453
4633a7c4 3454 n = cc->cur + 1; /* how many we know we matched */
3280af22 3455 PL_reginput = locinput;
a0d0e21e 3456
a3621e74 3457 DEBUG_EXECUTE_r(
9041c2e3 3458 PerlIO_printf(Perl_debug_log,
91f3b821 3459 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3460 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3461 (long)n, (long)cc->min,
2797576d 3462 (long)cc->max, PTR2UV(cc))
c277df42 3463 );
4633a7c4 3464
a0d0e21e
LW
3465 /* If degenerate scan matches "", assume scan done. */
3466
579cf2c3 3467 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3468 PL_regcc = cc->oldcc;
2ab05381
IZ
3469 if (PL_regcc)
3470 ln = PL_regcc->cur;
a3621e74 3471 DEBUG_EXECUTE_r(
c3464db5
DD
3472 PerlIO_printf(Perl_debug_log,
3473 "%*s empty match detected, try continuation...\n",
3280af22 3474 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3475 );
a0d0e21e 3476 if (regmatch(cc->next))
4633a7c4 3477 sayYES;
2ab05381
IZ
3478 if (PL_regcc)
3479 PL_regcc->cur = ln;
3280af22 3480 PL_regcc = cc;
4633a7c4 3481 sayNO;
a0d0e21e
LW
3482 }
3483
3484 /* First just match a string of min scans. */
3485
3486 if (n < cc->min) {
3487 cc->cur = n;
3488 cc->lastloc = locinput;
4633a7c4
LW
3489 if (regmatch(cc->scan))
3490 sayYES;
3491 cc->cur = n - 1;
c277df42 3492 cc->lastloc = lastloc;
4633a7c4 3493 sayNO;
a0d0e21e
LW
3494 }
3495
2c2d71f5
JH
3496 if (scan->flags) {
3497 /* Check whether we already were at this position.
3498 Postpone detection until we know the match is not
3499 *that* much linear. */
3500 if (!PL_reg_maxiter) {
3501 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3502 PL_reg_leftiter = PL_reg_maxiter;
3503 }
3504 if (PL_reg_leftiter-- == 0) {
69a95ace 3505 I32 size = (PL_reg_maxiter + 7)/8;
2c2d71f5 3506 if (PL_reg_poscache) {
eb160463 3507 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3508 Renew(PL_reg_poscache, size, char);
3509 PL_reg_poscache_size = size;
3510 }
3511 Zero(PL_reg_poscache, size, char);
3512 }
3513 else {
3514 PL_reg_poscache_size = size;
3515 Newz(29, PL_reg_poscache, size, char);
3516 }
a3621e74 3517 DEBUG_EXECUTE_r(
2c2d71f5
JH
3518 PerlIO_printf(Perl_debug_log,
3519 "%sDetected a super-linear match, switching on caching%s...\n",
3520 PL_colors[4], PL_colors[5])
3521 );
3522 }
3523 if (PL_reg_leftiter < 0) {
3524 I32 o = locinput - PL_bostr, b;
3525
69a95ace 3526 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2c2d71f5
JH
3527 b = o % 8;
3528 o /= 8;
3529 if (PL_reg_poscache[o] & (1<<b)) {
a3621e74 3530 DEBUG_EXECUTE_r(
2c2d71f5
JH
3531 PerlIO_printf(Perl_debug_log,
3532 "%*s already tried at this position...\n",
3533 REPORT_CODE_OFF+PL_regindent*2, "")
3534 );
69a95ace 3535 if (PL_reg_flags & RF_false)
c2b0868c
HS
3536 sayYES;
3537 else
3538 sayNO_SILENT;
2c2d71f5
JH
3539 }
3540 PL_reg_poscache[o] |= (1<<b);
3541 }
3542 }
3543
a0d0e21e
LW
3544 /* Prefer next over scan for minimal matching. */
3545
3546 if (cc->minmod) {
3280af22 3547 PL_regcc = cc->oldcc;
2ab05381
IZ
3548 if (PL_regcc)
3549 ln = PL_regcc->cur;
5f05dabc 3550 cp = regcppush(cc->parenfloor);
02db2b7b 3551 REGCP_SET(lastcp);
5f05dabc 3552 if (regmatch(cc->next)) {
c277df42 3553 regcpblow(cp);
69a95ace 3554 sayYES; /* All done. */
5f05dabc 3555 }
02db2b7b 3556 REGCP_UNWIND(lastcp);
5f05dabc 3557 regcppop();
2ab05381
IZ
3558 if (PL_regcc)
3559 PL_regcc->cur = ln;
3280af22 3560 PL_regcc = cc;
a0d0e21e 3561
c277df42 3562 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3563 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3564 && !(PL_reg_flags & RF_warned)) {
3565 PL_reg_flags |= RF_warned;
9014280d 3566 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3567 "Complex regular subexpression recursion",
3568 REG_INFTY - 1);
c277df42 3569 }
69a95ace 3570 sayNO;
c277df42 3571 }
a687059c 3572
a3621e74 3573 DEBUG_EXECUTE_r(
c3464db5
DD
3574 PerlIO_printf(Perl_debug_log,
3575 "%*s trying longer...\n",
3280af22 3576 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3577 );
a0d0e21e 3578 /* Try scanning more and see if it helps. */
3280af22 3579 PL_reginput = locinput;
a0d0e21e
LW
3580 cc->cur = n;
3581 cc->lastloc = locinput;
5f05dabc 3582 cp = regcppush(cc->parenfloor);
02db2b7b 3583 REGCP_SET(lastcp);
5f05dabc 3584 if (regmatch(cc->scan)) {
c277df42 3585 regcpblow(cp);
69a95ace 3586 sayYES;
5f05dabc 3587 }
02db2b7b 3588 REGCP_UNWIND(lastcp);
5f05dabc 3589 regcppop();
4633a7c4 3590 cc->cur = n - 1;
c277df42 3591 cc->lastloc = lastloc;
69a95ace 3592 sayNO;
a0d0e21e
LW
3593 }
3594
3595 /* Prefer scan over next for maximal matching. */
3596
3597 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3598 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3599 cc->cur = n;
3600 cc->lastloc = locinput;
02db2b7b 3601 REGCP_SET(lastcp);
5f05dabc 3602 if (regmatch(cc->scan)) {
c277df42 3603 regcpblow(cp);
69a95ace 3604 sayYES;
5f05dabc 3605 }
02db2b7b 3606 REGCP_UNWIND(lastcp);
a0d0e21e 3607 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3608 PL_reginput = locinput;
a3621e74 3609 DEBUG_EXECUTE_r(
c3464db5
DD
3610 PerlIO_printf(Perl_debug_log,
3611 "%*s failed, try continuation...\n",
3280af22 3612 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3613 );
3614 }
9041c2e3 3615 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3616 && !(PL_reg_flags & RF_warned)) {
3280af22 3617 PL_reg_flags |= RF_warned;
9014280d 3618 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3619 "Complex regular subexpression recursion",
3620 REG_INFTY - 1);
a0d0e21e
LW
3621 }
3622
3623 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3624 PL_regcc = cc->oldcc;
2ab05381
IZ
3625 if (PL_regcc)
3626 ln = PL_regcc->cur;
a0d0e21e 3627 if (regmatch(cc->next))
69a95ace 3628 sayYES;
2ab05381
IZ
3629 if (PL_regcc)
3630 PL_regcc->cur = ln;
3280af22 3631 PL_regcc = cc;
4633a7c4 3632 cc->cur = n - 1;
c277df42 3633 cc->lastloc = lastloc;
69a95ace 3634 sayNO;
a0d0e21e
LW
3635 }
3636 /* NOT REACHED */
9041c2e3 3637 case BRANCHJ:
c277df42
IZ
3638 next = scan + ARG(scan);
3639 if (next == scan)
3640 next = NULL;
3641 inner = NEXTOPER(NEXTOPER(scan));
3642 goto do_branch;
9041c2e3 3643 case BRANCH:
c277df42
IZ
3644 inner = NEXTOPER(scan);
3645 do_branch:
3646 {
c277df42
IZ
3647 c1 = OP(scan);
3648 if (OP(next) != c1) /* No choice. */
3649 next = inner; /* Avoid recursion. */
a0d0e21e 3650 else {
02db2b7b
IZ
3651 I32 lastparen = *PL_reglastparen;
3652 I32 unwind1;
3653 re_unwind_branch_t *uw;
3654
3655 /* Put unwinding data on stack */
3656 unwind1 = SSNEWt(1,re_unwind_branch_t);
3657 uw = SSPTRt(unwind1,re_unwind_branch_t);
3658 uw->prev = unwind;
3659 unwind = unwind1;
3660 uw->type = ((c1 == BRANCH)
3661 ? RE_UNWIND_BRANCH
3662 : RE_UNWIND_BRANCHJ);
3663 uw->lastparen = lastparen;
3664 uw->next = next;
3665 uw->locinput = locinput;
3666 uw->nextchr = nextchr;
3667#ifdef DEBUGGING
3668 uw->regindent = ++PL_regindent;
3669#endif
c277df42 3670
02db2b7b
IZ
3671 REGCP_SET(uw->lastcp);
3672
3673 /* Now go into the first branch */
3674 next = inner;
a687059c 3675 }
a0d0e21e
LW
3676 }
3677 break;
3678 case MINMOD:
3679 minmod = 1;
3680 break;
c277df42
IZ
3681 case CURLYM:
3682 {
00db4c45 3683 I32 l = 0;
c277df42 3684 CHECKPOINT lastcp;
9041c2e3 3685
c277df42 3686 /* We suppose that the next guy does not need
0e788c72 3687 backtracking: in particular, it is of constant non-zero length,
c277df42
IZ
3688 and has no parenths to influence future backrefs. */
3689 ln = ARG1(scan); /* min to match */
3690 n = ARG2(scan); /* max to match */
c277df42
IZ
3691 paren = scan->flags;
3692 if (paren) {
3280af22
NIS
3693 if (paren > PL_regsize)
3694 PL_regsize = paren;
eb160463 3695 if (paren > (I32)*PL_reglastparen)
3280af22 3696 *PL_reglastparen = paren;
c277df42 3697 }
dc45a647 3698 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3699 if (paren)
3700 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3701 PL_reginput = locinput;
c277df42
IZ
3702 if (minmod) {
3703 minmod = 0;
3704 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3705 sayNO;
3280af22 3706 locinput = PL_reginput;
cca55fe3 3707 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3708 regnode *text_node = next;
3709
cca55fe3 3710 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3711
cca55fe3 3712 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3713 else {
cca55fe3 3714 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3715 c1 = c2 = -1000;
3716 goto assume_ok_MM;
cca55fe3
JP
3717 }
3718 else { c1 = (U8)*STRING(text_node); }
af5decee 3719 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3720 c2 = PL_fold[c1];
af5decee 3721 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3722 c2 = PL_fold_locale[c1];
3723 else
3724 c2 = c1;
3725 }
a0ed51b3
LW
3726 }
3727 else
c277df42 3728 c1 = c2 = -1000;
cca55fe3 3729 assume_ok_MM:
02db2b7b 3730 REGCP_SET(lastcp);
0e788c72 3731 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
c277df42
IZ
3732 /* If it could work, try it. */
3733 if (c1 == -1000 ||
3280af22
NIS
3734 UCHARAT(PL_reginput) == c1 ||
3735 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3736 {
3737 if (paren) {
f31a99c8 3738 if (ln) {
cf93c79d
IZ
3739 PL_regstartp[paren] =
3740 HOPc(PL_reginput, -l) - PL_bostr;
3741 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3742 }
3743 else
cf93c79d 3744 PL_regendp[paren] = -1;
c277df42
IZ
3745 }
3746 if (regmatch(next))
3747 sayYES;
02db2b7b 3748 REGCP_UNWIND(lastcp);
c277df42
IZ
3749 }
3750 /* Couldn't or didn't -- move forward. */
3280af22 3751 PL_reginput = locinput;
c277df42
IZ
3752 if (regrepeat_hard(scan, 1, &l)) {
3753 ln++;
3280af22 3754 locinput = PL_reginput;
c277df42
IZ
3755 }
3756 else
3757 sayNO;
3758 }
a0ed51b3
LW
3759 }
3760 else {
c277df42 3761 n = regrepeat_hard(scan, n, &l);
3280af22 3762 locinput = PL_reginput;
a3621e74 3763 DEBUG_EXECUTE_r(
5c0ca799 3764 PerlIO_printf(Perl_debug_log,
faccc32b 3765 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3766 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3767 (IV) n, (IV)l)
c277df42
IZ
3768 );
3769 if (n >= ln) {
cca55fe3 3770 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3771 regnode *text_node = next;
3772
cca55fe3 3773 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3774
cca55fe3 3775 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3776 else {
cca55fe3 3777 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3778 c1 = c2 = -1000;
3779 goto assume_ok_REG;
cca55fe3
JP
3780 }
3781 else { c1 = (U8)*STRING(text_node); }
3782
af5decee 3783 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3784 c2 = PL_fold[c1];
af5decee 3785 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3786 c2 = PL_fold_locale[c1];
3787 else
3788 c2 = c1;
3789 }
a0ed51b3
LW
3790 }
3791 else
c277df42
IZ
3792 c1 = c2 = -1000;
3793 }
cca55fe3 3794 assume_ok_REG:
02db2b7b 3795 REGCP_SET(lastcp);
c277df42
IZ
3796 while (n >= ln) {
3797 /* If it could work, try it. */
3798 if (c1 == -1000 ||
3280af22
NIS
3799 UCHARAT(PL_reginput) == c1 ||
3800 UCHARAT(PL_reginput) == c2)
a0ed51b3 3801 {
a3621e74 3802 DEBUG_EXECUTE_r(
c3464db5 3803 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3804 "%*s trying tail with n=%"IVdf"...\n",
3805 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3806 );
3807 if (paren) {
3808 if (n) {
cf93c79d
IZ
3809 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3810 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3811 }
a0ed51b3 3812 else
cf93c79d 3813 PL_regendp[paren] = -1;
c277df42 3814 }
a0ed51b3
LW
3815 if (regmatch(next))
3816 sayYES;
02db2b7b 3817 REGCP_UNWIND(lastcp);
a0ed51b3 3818 }
c277df42
IZ
3819 /* Couldn't or didn't -- back up. */
3820 n--;
dfe13c55 3821 locinput = HOPc(locinput, -l);
3280af22 3822 PL_reginput = locinput;
c277df42
IZ
3823 }
3824 }
3825 sayNO;
3826 break;
3827 }
3828 case CURLYN:
3829 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3830 if (paren > PL_regsize)
3831 PL_regsize = paren;
eb160463 3832 if (paren > (I32)*PL_reglastparen)
3280af22 3833 *PL_reglastparen = paren;
c277df42
IZ
3834 ln = ARG1(scan); /* min to match */
3835 n = ARG2(scan); /* max to match */
dc45a647 3836 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3837 goto repeat;
a0d0e21e 3838 case CURLY:
c277df42 3839 paren = 0;
a0d0e21e
LW
3840 ln = ARG1(scan); /* min to match */
3841 n = ARG2(scan); /* max to match */
dc45a647 3842 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3843 goto repeat;
3844 case STAR:
3845 ln = 0;
c277df42 3846 n = REG_INFTY;
a0d0e21e 3847 scan = NEXTOPER(scan);
c277df42 3848 paren = 0;
a0d0e21e
LW
3849 goto repeat;
3850 case PLUS:
c277df42
IZ
3851 ln = 1;
3852 n = REG_INFTY;
3853 scan = NEXTOPER(scan);
3854 paren = 0;
3855 repeat:
a0d0e21e
LW
3856 /*
3857 * Lookahead to avoid useless match attempts
3858 * when we know what character comes next.
3859 */
5f80c4cf
JP
3860
3861 /*
3862 * Used to only do .*x and .*?x, but now it allows
3863 * for )'s, ('s and (?{ ... })'s to be in the way
3864 * of the quantifier and the EXACT-like node. -- japhy
3865 */
3866
cca55fe3 3867 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3868 U8 *s;
3869 regnode *text_node = next;
3870
cca55fe3 3871 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3872
cca55fe3 3873 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3874 else {
cca55fe3 3875 if (PL_regkind[(U8)OP(text_node)] == REF) {
44a68960
JH
3876 c1 = c2 = -1000;
3877 goto assume_ok_easy;
cca55fe3
JP
3878 }
3879 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3880
3881 if (!UTF) {
3882 c2 = c1 = *s;
f65d3ee7 3883 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3884 c2 = PL_fold[c1];
f65d3ee7 3885 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3886 c2 = PL_fold_locale[c1];
1aa99e6b 3887 }
5f80c4cf 3888 else { /* UTF */
f65d3ee7 3889 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3890 STRLEN ulen1, ulen2;
89ebb4a3
JH
3891 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3892 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
a2a2844f
JH
3893
3894 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3895 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3896
89ebb4a3 3897 c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
872c91ae
JH
3898 ckWARN(WARN_UTF8) ?
3899 0 : UTF8_ALLOW_ANY);
89ebb4a3 3900 c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
872c91ae
JH
3901 ckWARN(WARN_UTF8) ?
3902 0 : UTF8_ALLOW_ANY);
5f80c4cf
JP
3903 }
3904 else {
89ebb4a3 3905 c2 = c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
872c91ae
JH
3906 ckWARN(WARN_UTF8) ?
3907 0 : UTF8_ALLOW_ANY);
5f80c4cf 3908 }
1aa99e6b
IH
3909 }
3910 }
bbce6d69 3911 }
a0d0e21e 3912 else
bbce6d69 3913 c1 = c2 = -1000;
cca55fe3 3914 assume_ok_easy:
3280af22 3915 PL_reginput = locinput;
a0d0e21e 3916 if (minmod) {
c277df42 3917 CHECKPOINT lastcp;
a0d0e21e
LW
3918 minmod = 0;
3919 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3920 sayNO;
a0ed51b3 3921 locinput = PL_reginput;
02db2b7b 3922 REGCP_SET(lastcp);
0fe9bf95 3923 if (c1 != -1000) {
1aa99e6b 3924 char *e; /* Should not check after this */
0fe9bf95 3925 char *old = locinput;
b2f2f093 3926 int count = 0;
0fe9bf95 3927
1aa99e6b 3928 if (n == REG_INFTY) {
0fe9bf95 3929 e = PL_regeol - 1;
1aa99e6b
IH
3930 if (do_utf8)
3931 while (UTF8_IS_CONTINUATION(*(U8*)e))
3932 e--;
3933 }
3934 else if (do_utf8) {
3935 int m = n - ln;
3936 for (e = locinput;
3937 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3938 e += UTF8SKIP(e);
3939 }
3940 else {
3941 e = locinput + n - ln;
3942 if (e >= PL_regeol)
3943 e = PL_regeol - 1;
3944 }
0fe9bf95
IZ
3945 while (1) {
3946 /* Find place 'next' could work */
1aa99e6b
IH
3947 if (!do_utf8) {
3948 if (c1 == c2) {
a8e8ab15
JH
3949 while (locinput <= e &&
3950 UCHARAT(locinput) != c1)
1aa99e6b
IH
3951 locinput++;
3952 } else {
9041c2e3 3953 while (locinput <= e
a8e8ab15
JH
3954 && UCHARAT(locinput) != c1
3955 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3956 locinput++;
3957 }
3958 count = locinput - old;
3959 }
3960 else {
3961 STRLEN len;
3962 if (c1 == c2) {
872c91ae
JH
3963 /* count initialised to
3964 * utf8_distance(old, locinput) */
b2f2f093 3965 while (locinput <= e &&
872c91ae 3966 utf8n_to_uvchr((U8*)locinput,
89ebb4a3 3967 UTF8_MAXBYTES, &len,
872c91ae 3968 ckWARN(WARN_UTF8) ?
eb160463 3969 0 : UTF8_ALLOW_ANY) != (UV)c1) {
1aa99e6b 3970 locinput += len;
b2f2f093
JH
3971 count++;
3972 }
1aa99e6b 3973 } else {
872c91ae
JH
3974 /* count initialised to
3975 * utf8_distance(old, locinput) */
b2f2f093 3976 while (locinput <= e) {
872c91ae 3977 UV c = utf8n_to_uvchr((U8*)locinput,
89ebb4a3 3978 UTF8_MAXBYTES, &len,
872c91ae
JH
3979 ckWARN(WARN_UTF8) ?
3980 0 : UTF8_ALLOW_ANY);
eb160463 3981 if (c == (UV)c1 || c == (UV)c2)
1aa99e6b 3982 break;
b2f2f093
JH
3983 locinput += len;
3984 count++;
1aa99e6b
IH
3985 }
3986 }
0fe9bf95 3987 }
9041c2e3 3988 if (locinput > e)
0fe9bf95
IZ
3989 sayNO;
3990 /* PL_reginput == old now */
3991 if (locinput != old) {
3992 ln = 1; /* Did some */
1aa99e6b 3993 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3994 sayNO;
3995 }
3996 /* PL_reginput == locinput now */
29d1e993 3997 TRYPAREN(paren, ln, locinput);
0fe9bf95 3998 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3999 REGCP_UNWIND(lastcp);
0fe9bf95 4000 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
4001 old = locinput;
4002 if (do_utf8)
4003 locinput += UTF8SKIP(locinput);
4004 else
4005 locinput++;
b2f2f093 4006 count = 1;
0fe9bf95
IZ
4007 }
4008 }
4009 else
c277df42 4010 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
4011 UV c;
4012 if (c1 != -1000) {
4013 if (do_utf8)
872c91ae 4014 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4015 UTF8_MAXBYTES, 0,
872c91ae
JH
4016 ckWARN(WARN_UTF8) ?
4017 0 : UTF8_ALLOW_ANY);
1aa99e6b 4018 else
9041c2e3 4019 c = UCHARAT(PL_reginput);
2390ecbc 4020 /* If it could work, try it. */
eb160463 4021 if (c == (UV)c1 || c == (UV)c2)
2390ecbc 4022 {
ecc99935 4023 TRYPAREN(paren, ln, PL_reginput);
2390ecbc
PP
4024 REGCP_UNWIND(lastcp);
4025 }
1aa99e6b 4026 }
a0d0e21e 4027 /* If it could work, try it. */
2390ecbc 4028 else if (c1 == -1000)
bbce6d69 4029 {
ecc99935 4030 TRYPAREN(paren, ln, PL_reginput);
02db2b7b 4031 REGCP_UNWIND(lastcp);
bbce6d69 4032 }
c277df42 4033 /* Couldn't or didn't -- move forward. */
a0ed51b3 4034 PL_reginput = locinput;
a0d0e21e
LW
4035 if (regrepeat(scan, 1)) {
4036 ln++;
a0ed51b3
LW
4037 locinput = PL_reginput;
4038 }
4039 else
4633a7c4 4040 sayNO;
a0d0e21e
LW
4041 }
4042 }
4043 else {
c277df42 4044 CHECKPOINT lastcp;
a0d0e21e 4045 n = regrepeat(scan, n);
a0ed51b3 4046 locinput = PL_reginput;
22c35a8c 4047 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
7fba1cd6 4048 (OP(next) != MEOL ||
15272685
HS
4049 OP(next) == SEOL || OP(next) == EOS))
4050 {
a0d0e21e 4051 ln = n; /* why back off? */
1aeab75a
GS
4052 /* ...because $ and \Z can match before *and* after
4053 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
4054 We should back off by one in this case. */
4055 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
4056 ln--;
4057 }
02db2b7b 4058 REGCP_SET(lastcp);
c277df42 4059 if (paren) {
8fa7f367 4060 UV c = 0;
c277df42 4061 while (n >= ln) {
1aa99e6b
IH
4062 if (c1 != -1000) {
4063 if (do_utf8)
872c91ae 4064 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4065 UTF8_MAXBYTES, 0,
872c91ae
JH
4066 ckWARN(WARN_UTF8) ?
4067 0 : UTF8_ALLOW_ANY);
1aa99e6b 4068 else
9041c2e3 4069 c = UCHARAT(PL_reginput);
1aa99e6b 4070 }
c277df42 4071 /* If it could work, try it. */
eb160463 4072 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 4073 {
29d1e993 4074 TRYPAREN(paren, n, PL_reginput);
02db2b7b 4075 REGCP_UNWIND(lastcp);
c277df42
IZ
4076 }
4077 /* Couldn't or didn't -- back up. */
4078 n--;
dfe13c55 4079 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 4080 }
a0ed51b3
LW
4081 }
4082 else {
8fa7f367 4083 UV c = 0;
c277df42 4084 while (n >= ln) {
1aa99e6b
IH
4085 if (c1 != -1000) {
4086 if (do_utf8)
872c91ae 4087 c = utf8n_to_uvchr((U8*)PL_reginput,
89ebb4a3 4088 UTF8_MAXBYTES, 0,
872c91ae
JH
4089 ckWARN(WARN_UTF8) ?
4090 0 : UTF8_ALLOW_ANY);
1aa99e6b 4091 else
9041c2e3 4092 c = UCHARAT(PL_reginput);
1aa99e6b 4093 }
c277df42 4094 /* If it could work, try it. */
eb160463 4095 if (c1 == -1000 || c == (UV)c1 || c == (UV)c2)
c277df42 4096 {
29d1e993 4097 TRYPAREN(paren, n, PL_reginput);
02db2b7b 4098 REGCP_UNWIND(lastcp);
c277df42
IZ
4099 }
4100 /* Couldn't or didn't -- back up. */
4101 n--;
dfe13c55 4102 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 4103 }
a0d0e21e
LW
4104 }
4105 }
4633a7c4 4106 sayNO;
c277df42 4107 break;
a0d0e21e 4108 case END:
0f5d15d6
IZ
4109 if (PL_reg_call_cc) {
4110 re_cc_state *cur_call_cc = PL_reg_call_cc;
4111 CURCUR *cctmp = PL_regcc;
4112 regexp *re = PL_reg_re;
4113 CHECKPOINT cp, lastcp;
4114
4115 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 4116 REGCP_SET(lastcp);
0f5d15d6
IZ
4117 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
4118 the caller. */
4119 PL_reginput = locinput; /* Make position available to
4120 the callcc. */
4121 cache_re(PL_reg_call_cc->re);
4122 PL_regcc = PL_reg_call_cc->cc;
4123 PL_reg_call_cc = PL_reg_call_cc->prev;
4124 if (regmatch(cur_call_cc->node)) {
4125 PL_reg_call_cc = cur_call_cc;
4126 regcpblow(cp);
4127 sayYES;
4128 }
02db2b7b 4129 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
4130 regcppop();
4131 PL_reg_call_cc = cur_call_cc;
4132 PL_regcc = cctmp;
4133 PL_reg_re = re;
4134 cache_re(re);
4135
a3621e74 4136 DEBUG_EXECUTE_r(
0f5d15d6
IZ
4137 PerlIO_printf(Perl_debug_log,
4138 "%*s continuation failed...\n",
4139 REPORT_CODE_OFF+PL_regindent*2, "")
4140 );
7821416a 4141 sayNO_SILENT;
0f5d15d6 4142 }
7821416a 4143 if (locinput < PL_regtill) {
a3621e74 4144 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
7821416a
IZ
4145 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4146 PL_colors[4],
4147 (long)(locinput - PL_reg_starttry),
4148 (long)(PL_regtill - PL_reg_starttry),
4149 PL_colors[5]));
4150 sayNO_FINAL; /* Cannot match: too short. */
4151 }
4152 PL_reginput = locinput; /* put where regtry can find it */
4153 sayYES_FINAL; /* Success! */
7e5428c5 4154 case SUCCEED:
3280af22 4155 PL_reginput = locinput; /* put where regtry can find it */
7821416a 4156 sayYES_LOUD; /* Success! */
c277df42
IZ
4157 case SUSPEND:
4158 n = 1;
9fe1d20c 4159 PL_reginput = locinput;
9041c2e3 4160 goto do_ifmatch;
a0d0e21e 4161 case UNLESSM:
c277df42 4162 n = 0;
a0ed51b3 4163 if (scan->flags) {
efb30f32
HS
4164 s = HOPBACKc(locinput, scan->flags);
4165 if (!s)
4166 goto say_yes;
4167 PL_reginput = s;
a0ed51b3
LW
4168 }
4169 else
4170 PL_reginput = locinput;
69a95ace 4171 PL_reg_flags ^= RF_false;
c277df42
IZ
4172 goto do_ifmatch;
4173 case IFMATCH:
4174 n = 1;
a0ed51b3 4175 if (scan->flags) {
efb30f32
HS
4176 s = HOPBACKc(locinput, scan->flags);
4177 if (!s)
4178 goto say_no;
4179 PL_reginput = s;
a0ed51b3
LW
4180 }
4181 else
4182 PL_reginput = locinput;
4183
c277df42 4184 do_ifmatch:
c277df42
IZ
4185 inner = NEXTOPER(NEXTOPER(scan));
4186 if (regmatch(inner) != n) {
69a95ace
RGS
4187 if (n == 0)
4188 PL_reg_flags ^= RF_false;
c277df42
IZ
4189 say_no:
4190 if (logical) {
4191 logical = 0;
4192 sw = 0;
4193 goto do_longjump;
a0ed51b3
LW
4194 }
4195 else
c277df42
IZ
4196 sayNO;
4197 }
69a95ace
RGS
4198 if (n == 0)
4199 PL_reg_flags ^= RF_false;
c277df42
IZ
4200 say_yes:
4201 if (logical) {
4202 logical = 0;
4203 sw = 1;
4204 }
fe44a5e8 4205 if (OP(scan) == SUSPEND) {
3280af22 4206 locinput = PL_reginput;
565764a8 4207 nextchr = UCHARAT(locinput);
fe44a5e8 4208 }
c277df42
IZ
4209 /* FALL THROUGH. */
4210 case LONGJMP:
4211 do_longjump:
4212 next = scan + ARG(scan);
4213 if (next == scan)
4214 next = NULL;
a0d0e21e
LW
4215 break;
4216 default:
b900a521 4217 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 4218 PTR2UV(scan), OP(scan));
cea2e8a9 4219 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 4220 }
02db2b7b 4221 reenter:
a0d0e21e
LW
4222 scan = next;
4223 }
a687059c 4224
a0d0e21e
LW
4225 /*
4226 * We get here only if there's trouble -- normally "case END" is
4227 * the terminating point.
4228 */
cea2e8a9 4229 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 4230 /*NOTREACHED*/
4633a7c4
LW
4231 sayNO;
4232
7821416a 4233yes_loud:
a3621e74 4234 DEBUG_EXECUTE_r(
7821416a
IZ
4235 PerlIO_printf(Perl_debug_log,
4236 "%*s %scould match...%s\n",
4237 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
4238 );
4239 goto yes;
4240yes_final:
a3621e74 4241 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
7821416a 4242 PL_colors[4],PL_colors[5]));
4633a7c4
LW
4243yes:
4244#ifdef DEBUGGING
3280af22 4245 PL_regindent--;
4633a7c4 4246#endif
02db2b7b
IZ
4247
4248#if 0 /* Breaks $^R */
4249 if (unwind)
4250 regcpblow(firstcp);
4251#endif
4633a7c4
LW
4252 return 1;
4253
4254no:
a3621e74 4255 DEBUG_EXECUTE_r(
7821416a
IZ
4256 PerlIO_printf(Perl_debug_log,
4257 "%*s %sfailed...%s\n",
4258 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
4259 );
4260 goto do_no;
4261no_final:
4262do_no:
02db2b7b
IZ
4263 if (unwind) {
4264 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
4265
4266 switch (uw->type) {
4267 case RE_UNWIND_BRANCH:
4268 case RE_UNWIND_BRANCHJ:
4269 {
4270 re_unwind_branch_t *uwb = &(uw->branch);
4271 I32 lastparen = uwb->lastparen;
9041c2e3 4272
02db2b7b
IZ
4273 REGCP_UNWIND(uwb->lastcp);
4274 for (n = *PL_reglastparen; n > lastparen; n--)
4275 PL_regendp[n] = -1;
4276 *PL_reglastparen = n;
4277 scan = next = uwb->next;
9041c2e3
NIS
4278 if ( !scan ||
4279 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
4280 ? BRANCH : BRANCHJ) ) { /* Failure */
4281 unwind = uwb->prev;
4282#ifdef DEBUGGING
4283 PL_regindent--;
4284#endif
4285 goto do_no;
4286 }
4287 /* Have more choice yet. Reuse the same uwb. */
4288 /*SUPPRESS 560*/
4289 if ((n = (uwb->type == RE_UNWIND_BRANCH
4290 ? NEXT_OFF(next) : ARG(next))))
4291 next += n;
4292 else
4293 next = NULL; /* XXXX Needn't unwinding in this case... */
4294 uwb->next = next;
4295 next = NEXTOPER(scan);
4296 if (uwb->type == RE_UNWIND_BRANCHJ)
4297 next = NEXTOPER(next);
4298 locinput = uwb->locinput;
4299 nextchr = uwb->nextchr;
4300#ifdef DEBUGGING
4301 PL_regindent = uwb->regindent;
4302#endif
4303
4304 goto reenter;
4305 }
4306 /* NOT REACHED */
4307 default:
4308 Perl_croak(aTHX_ "regexp unwind memory corruption");
4309 }
4310 /* NOT REACHED */
4311 }
4633a7c4 4312#ifdef DEBUGGING
3280af22 4313 PL_regindent--;
4633a7c4 4314#endif
a0d0e21e 4315 return 0;
a687059c
LW
4316}
4317
4318/*
4319 - regrepeat - repeatedly match something simple, report how many
4320 */
4321/*
4322 * [This routine now assumes that it will only match on things of length 1.
4323 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 4324 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 4325 */
76e3520e 4326STATIC I32
cea2e8a9 4327S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 4328{
a0d0e21e 4329 register char *scan;
a0d0e21e 4330 register I32 c;
3280af22 4331 register char *loceol = PL_regeol;
a0ed51b3 4332 register I32 hardcount = 0;
53c4c00c 4333 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 4334
3280af22 4335 scan = PL_reginput;
faf11cac
HS
4336 if (max == REG_INFTY)
4337 max = I32_MAX;
4338 else if (max < loceol - scan)
a0d0e21e 4339 loceol = scan + max;
a0d0e21e 4340 switch (OP(p)) {
22c35a8c 4341 case REG_ANY:
1aa99e6b 4342 if (do_utf8) {
ffc61ed2 4343 loceol = PL_regeol;
1aa99e6b 4344 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
4345 scan += UTF8SKIP(scan);
4346 hardcount++;
4347 }
4348 } else {
4349 while (scan < loceol && *scan != '\n')
4350 scan++;
a0ed51b3
LW
4351 }
4352 break;
ffc61ed2 4353 case SANY:
def8e4ea
JH
4354 if (do_utf8) {
4355 loceol = PL_regeol;
a0804c9e 4356 while (scan < loceol && hardcount < max) {
def8e4ea
JH
4357 scan += UTF8SKIP(scan);
4358 hardcount++;
4359 }
4360 }
4361 else
4362 scan = loceol;
a0ed51b3 4363 break;
f33976b4
DB
4364 case CANY:
4365 scan = loceol;
4366 break;
090f7165
JH
4367 case EXACT: /* length of string is 1 */
4368 c = (U8)*STRING(p);
4369 while (scan < loceol && UCHARAT(scan) == c)
4370 scan++;
bbce6d69 4371 break;
4372 case EXACTF: /* length of string is 1 */
cd439c50 4373 c = (U8)*STRING(p);
bbce6d69 4374 while (scan < loceol &&
22c35a8c 4375 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 4376 scan++;
4377 break;
4378 case EXACTFL: /* length of string is 1 */
3280af22 4379 PL_reg_flags |= RF_tainted;
cd439c50 4380 c = (U8)*STRING(p);
bbce6d69 4381 while (scan < loceol &&
22c35a8c 4382 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
4383 scan++;
4384 break;
4385 case ANYOF:
ffc61ed2
JH
4386 if (do_utf8) {
4387 loceol = PL_regeol;
cfc92286
JH
4388 while (hardcount < max && scan < loceol &&
4389 reginclass(p, (U8*)scan, 0, do_utf8)) {
ffc61ed2
JH
4390 scan += UTF8SKIP(scan);
4391 hardcount++;
4392 }
4393 } else {
7d3e948e 4394 while (scan < loceol && REGINCLASS(p, (U8*)scan))
ffc61ed2
JH
4395 scan++;
4396 }
a0d0e21e
LW
4397 break;
4398 case ALNUM:
1aa99e6b 4399 if (do_utf8) {
ffc61ed2 4400 loceol = PL_regeol;
8269fa76 4401 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 4402 while (hardcount < max && scan < loceol &&
3568d838 4403 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4404 scan += UTF8SKIP(scan);
4405 hardcount++;
4406 }
4407 } else {
4408 while (scan < loceol && isALNUM(*scan))
4409 scan++;
a0ed51b3
LW
4410 }
4411 break;
bbce6d69 4412 case ALNUML:
3280af22 4413 PL_reg_flags |= RF_tainted;
1aa99e6b 4414 if (do_utf8) {
ffc61ed2 4415 loceol = PL_regeol;
1aa99e6b
IH
4416 while (hardcount < max && scan < loceol &&
4417 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4418 scan += UTF8SKIP(scan);
4419 hardcount++;
4420 }
4421 } else {
4422 while (scan < loceol && isALNUM_LC(*scan))
4423 scan++;
a0ed51b3
LW
4424 }
4425 break;
a0d0e21e 4426 case NALNUM:
1aa99e6b 4427 if (do_utf8) {
ffc61ed2 4428 loceol = PL_regeol;
8269fa76 4429 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 4430 while (hardcount < max && scan < loceol &&
3568d838 4431 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4432 scan += UTF8SKIP(scan);
4433 hardcount++;
4434 }
4435 } else {
4436 while (scan < loceol && !isALNUM(*scan))
4437 scan++;
a0ed51b3
LW
4438 }
4439 break;
bbce6d69 4440 case NALNUML:
3280af22 4441 PL_reg_flags |= RF_tainted;
1aa99e6b 4442 if (do_utf8) {
ffc61ed2 4443 loceol = PL_regeol;
1aa99e6b
IH
4444 while (hardcount < max && scan < loceol &&
4445 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
4446 scan += UTF8SKIP(scan);
4447 hardcount++;
4448 }
4449 } else {
4450 while (scan < loceol && !isALNUM_LC(*scan))
4451 scan++;
a0ed51b3
LW
4452 }
4453 break;
a0d0e21e 4454 case SPACE:
1aa99e6b 4455 if (do_utf8) {
ffc61ed2 4456 loceol = PL_regeol;
8269fa76 4457 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4458 while (hardcount < max && scan < loceol &&
3568d838
JH
4459 (*scan == ' ' ||
4460 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4461 scan += UTF8SKIP(scan);
4462 hardcount++;
4463 }
4464 } else {
4465 while (scan < loceol && isSPACE(*scan))
4466 scan++;
a0ed51b3
LW
4467 }
4468 break;
bbce6d69 4469 case SPACEL:
3280af22 4470 PL_reg_flags |= RF_tainted;
1aa99e6b 4471 if (do_utf8) {
ffc61ed2 4472 loceol = PL_regeol;
1aa99e6b 4473 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4474 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4475 scan += UTF8SKIP(scan);
4476 hardcount++;
4477 }
4478 } else {
4479 while (scan < loceol && isSPACE_LC(*scan))
4480 scan++;
a0ed51b3
LW
4481 }
4482 break;
a0d0e21e 4483 case NSPACE:
1aa99e6b 4484 if (do_utf8) {
ffc61ed2 4485 loceol = PL_regeol;
8269fa76 4486 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 4487 while (hardcount < max && scan < loceol &&
3568d838
JH
4488 !(*scan == ' ' ||
4489 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
4490 scan += UTF8SKIP(scan);
4491 hardcount++;
4492 }
4493 } else {
4494 while (scan < loceol && !isSPACE(*scan))
4495 scan++;
4496 break;
a0ed51b3 4497 }
bbce6d69 4498 case NSPACEL:
3280af22 4499 PL_reg_flags |= RF_tainted;
1aa99e6b 4500 if (do_utf8) {
ffc61ed2 4501 loceol = PL_regeol;
1aa99e6b 4502 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
4503 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
4504 scan += UTF8SKIP(scan);
4505 hardcount++;
4506 }
4507 } else {
4508 while (scan < loceol && !isSPACE_LC(*scan))
4509 scan++;
a0ed51b3
LW
4510 }
4511 break;
a0d0e21e 4512 case DIGIT:
1aa99e6b 4513 if (do_utf8) {
ffc61ed2 4514 loceol = PL_regeol;
8269fa76 4515 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4516 while (hardcount < max && scan < loceol &&
3568d838 4517 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4518 scan += UTF8SKIP(scan);
4519 hardcount++;
4520 }
4521 } else {
4522 while (scan < loceol && isDIGIT(*scan))
4523 scan++;
a0ed51b3
LW
4524 }
4525 break;
a0d0e21e 4526 case NDIGIT:
1aa99e6b 4527 if (do_utf8) {
ffc61ed2 4528 loceol = PL_regeol;
8269fa76 4529 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 4530 while (hardcount < max && scan < loceol &&
3568d838 4531 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
4532 scan += UTF8SKIP(scan);
4533 hardcount++;
4534 }
4535 } else {
4536 while (scan < loceol && !isDIGIT(*scan))
4537 scan++;
a0ed51b3
LW
4538 }
4539 break;
a0d0e21e
LW
4540 default: /* Called on something of 0 width. */
4541 break; /* So match right here or not at all. */
4542 }
a687059c 4543
a0ed51b3
LW
4544 if (hardcount)
4545 c = hardcount;
4546 else
4547 c = scan - PL_reginput;
3280af22 4548 PL_reginput = scan;
a687059c 4549
a3621e74
YO
4550 DEBUG_r({
4551 SV *re_debug_flags;
c277df42 4552 SV *prop = sv_newmortal();
a3621e74
YO
4553 GET_RE_DEBUG_FLAGS;
4554 DEBUG_EXECUTE_r({
c277df42 4555 regprop(prop, p);
9041c2e3
NIS
4556 PerlIO_printf(Perl_debug_log,
4557 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 4558 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 4559 });
a3621e74 4560 });
9041c2e3 4561
a0d0e21e 4562 return(c);
a687059c
LW
4563}
4564
4565/*
c277df42 4566 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 4567 *
0e788c72 4568 * The repeater is supposed to have constant non-zero length.
c277df42
IZ
4569 */
4570
76e3520e 4571STATIC I32
cea2e8a9 4572S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 4573{
b7953727 4574 register char *scan = Nullch;
c277df42 4575 register char *start;
3280af22 4576 register char *loceol = PL_regeol;
a0ed51b3 4577 I32 l = 0;
708e3b05 4578 I32 count = 0, res = 1;
a0ed51b3
LW
4579
4580 if (!max)
4581 return 0;
c277df42 4582
3280af22 4583 start = PL_reginput;
53c4c00c 4584 if (PL_reg_match_utf8) {
708e3b05 4585 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4586 if (!count++) {
4587 l = 0;
4588 while (start < PL_reginput) {
4589 l++;
4590 start += UTF8SKIP(start);
4591 }
4592 *lp = l;
4593 if (l == 0)
4594 return max;
4595 }
4596 if (count == max)
4597 return count;
4598 }
4599 }
4600 else {
708e3b05 4601 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
4602 if (!count++) {
4603 *lp = l = PL_reginput - start;
4604 if (max != REG_INFTY && l*max < loceol - scan)
4605 loceol = scan + l*max;
4606 if (l == 0)
4607 return max;
c277df42
IZ
4608 }
4609 }
4610 }
708e3b05 4611 if (!res)
3280af22 4612 PL_reginput = scan;
9041c2e3 4613
a0ed51b3 4614 return count;
c277df42
IZ
4615}
4616
4617/*
ffc61ed2
JH
4618- regclass_swash - prepare the utf8 swash
4619*/
4620
4621SV *
9e55ce06 4622Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** listsvp, SV **altsvp)
ffc61ed2 4623{
9e55ce06
JH
4624 SV *sw = NULL;
4625 SV *si = NULL;
4626 SV *alt = NULL;
ffc61ed2
JH
4627
4628 if (PL_regdata && PL_regdata->count) {
4629 U32 n = ARG(node);
4630
4631 if (PL_regdata->what[n] == 's') {
4632 SV *rv = (SV*)PL_regdata->data[n];
4633 AV *av = (AV*)SvRV((SV*)rv);
b11f357e 4634 SV **ary = AvARRAY(av);
9e55ce06 4635 SV **a, **b;
9041c2e3 4636
9e55ce06
JH
4637 /* See the end of regcomp.c:S_reglass() for
4638 * documentation of these array elements. */
4639
b11f357e
JH
4640 si = *ary;
4641 a = SvTYPE(ary[1]) == SVt_RV ? &ary[1] : 0;
4642 b = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : 0;
4643
ffc61ed2
JH
4644 if (a)
4645 sw = *a;
4646 else if (si && doinit) {
4647 sw = swash_init("utf8", "", si, 1, 0);
4648 (void)av_store(av, 1, sw);
4649 }
9e55ce06
JH
4650 if (b)
4651 alt = *b;
ffc61ed2
JH
4652 }
4653 }
4654
9e55ce06
JH
4655 if (listsvp)
4656 *listsvp = si;
4657 if (altsvp)
4658 *altsvp = alt;
ffc61ed2
JH
4659
4660 return sw;
4661}
4662
4663/*
ba7b4546 4664 - reginclass - determine if a character falls into a character class
832705d4
JH
4665
4666 The n is the ANYOF regnode, the p is the target string, lenp
4667 is pointer to the maximum length of how far to go in the p
4668 (if the lenp is zero, UTF8SKIP(p) is used),
4669 do_utf8 tells whether the target string is in UTF-8.
4670
bbce6d69 4671 */
4672
76e3520e 4673STATIC bool
ba7b4546 4674S_reginclass(pTHX_ register regnode *n, register U8* p, STRLEN* lenp, register bool do_utf8)
bbce6d69 4675{
ffc61ed2 4676 char flags = ANYOF_FLAGS(n);
bbce6d69 4677 bool match = FALSE;
cc07378b 4678 UV c = *p;
ae9ddab8 4679 STRLEN len = 0;
9e55ce06 4680 STRLEN plen;
1aa99e6b 4681
ae9ddab8 4682 if (do_utf8 && !UTF8_IS_INVARIANT(c))
89ebb4a3 4683 c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
ae9ddab8 4684 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
bbce6d69 4685
0f0076b4 4686 plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
ffc61ed2 4687 if (do_utf8 || (flags & ANYOF_UNICODE)) {
9e55ce06
JH
4688 if (lenp)
4689 *lenp = 0;
ffc61ed2 4690 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
4691 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
4692 match = TRUE;
bbce6d69 4693 }
3568d838 4694 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 4695 match = TRUE;
ffc61ed2 4696 if (!match) {
9e55ce06
JH
4697 AV *av;
4698 SV *sw = regclass_swash(n, TRUE, 0, (SV**)&av);
ffc61ed2
JH
4699
4700 if (sw) {
3568d838 4701 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
4702 match = TRUE;
4703 else if (flags & ANYOF_FOLD) {
9e55ce06
JH
4704 if (!match && lenp && av) {
4705 I32 i;
4706
4707 for (i = 0; i <= av_len(av); i++) {
4708 SV* sv = *av_fetch(av, i, FALSE);
4709 STRLEN len;
4710 char *s = SvPV(sv, len);
4711
061b10df 4712 if (len <= plen && memEQ(s, (char*)p, len)) {
9e55ce06
JH
4713 *lenp = len;
4714 match = TRUE;
4715 break;
4716 }
4717 }
4718 }
4719 if (!match) {
89ebb4a3 4720 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4a623e43
JH
4721 STRLEN tmplen;
4722
9e55ce06
JH
4723 to_utf8_fold(p, tmpbuf, &tmplen);
4724 if (swash_fetch(sw, tmpbuf, do_utf8))
4725 match = TRUE;
4726 }
ffc61ed2
JH
4727 }
4728 }
bbce6d69 4729 }
9e55ce06 4730 if (match && lenp && *lenp == 0)
0f0076b4 4731 *lenp = UNISKIP(NATIVE_TO_UNI(c));
bbce6d69 4732 }
1aa99e6b 4733 if (!match && c < 256) {
ffc61ed2
JH
4734 if (ANYOF_BITMAP_TEST(n, c))
4735 match = TRUE;
4736 else if (flags & ANYOF_FOLD) {
eb160463 4737 U8 f;
a0ed51b3 4738
ffc61ed2
JH
4739 if (flags & ANYOF_LOCALE) {
4740 PL_reg_flags |= RF_tainted;
4741 f = PL_fold_locale[c];
4742 }
4743 else
4744 f = PL_fold[c];
4745 if (f != c && ANYOF_BITMAP_TEST(n, f))
4746 match = TRUE;
4747 }
4748
4749 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4750 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4751 if (
4752 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4753 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4754 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4755 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4756 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4757 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4758 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4759 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4760 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4761 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4762 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4763 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4764 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4765 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4766 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4767 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4768 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4769 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4770 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4771 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4772 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4773 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4774 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4775 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4776 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4777 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4778 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4779 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4780 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4781 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4782 ) /* How's that for a conditional? */
4783 {
4784 match = TRUE;
4785 }
a0ed51b3 4786 }
a0ed51b3
LW
4787 }
4788
a0ed51b3
LW
4789 return (flags & ANYOF_INVERT) ? !match : match;
4790}
161b471a 4791
dfe13c55 4792STATIC U8 *
cea2e8a9 4793S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4794{
1aa99e6b
IH
4795 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4796}
4797
4798STATIC U8 *
4799S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4800{
a0ed51b3 4801 if (off >= 0) {
1aa99e6b 4802 while (off-- && s < lim) {
ffc61ed2 4803 /* XXX could check well-formedness here */
a0ed51b3 4804 s += UTF8SKIP(s);
ffc61ed2 4805 }
a0ed51b3
LW
4806 }
4807 else {
4808 while (off++) {
1aa99e6b 4809 if (s > lim) {
a0ed51b3 4810 s--;
ffc61ed2 4811 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4812 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4813 s--;
ffc61ed2
JH
4814 }
4815 /* XXX could check well-formedness here */
a0ed51b3
LW
4816 }
4817 }
4818 }
4819 return s;
4820}
161b471a 4821
dfe13c55 4822STATIC U8 *
1aa99e6b 4823S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4824{
1aa99e6b
IH
4825 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4826}
4827
4828STATIC U8 *
4829S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4830{
4831 if (off >= 0) {
1aa99e6b 4832 while (off-- && s < lim) {
ffc61ed2 4833 /* XXX could check well-formedness here */
a0ed51b3 4834 s += UTF8SKIP(s);
ffc61ed2 4835 }
a0ed51b3
LW
4836 if (off >= 0)
4837 return 0;
4838 }
4839 else {
4840 while (off++) {
1aa99e6b 4841 if (s > lim) {
a0ed51b3 4842 s--;
ffc61ed2 4843 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4844 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4845 s--;
ffc61ed2
JH
4846 }
4847 /* XXX could check well-formedness here */
a0ed51b3
LW
4848 }
4849 else
4850 break;
4851 }
4852 if (off <= 0)
4853 return 0;
4854 }
4855 return s;
4856}
51371543 4857
51371543 4858static void
acfe0abc 4859restore_pos(pTHX_ void *arg)
51371543 4860{
51371543
GS
4861 if (PL_reg_eval_set) {
4862 if (PL_reg_oldsaved) {
4863 PL_reg_re->subbeg = PL_reg_oldsaved;
4864 PL_reg_re->sublen = PL_reg_oldsavedlen;
ed252734
NC
4865#ifdef PERL_COPY_ON_WRITE
4866 PL_reg_re->saved_copy = PL_nrs;
4867#endif
51371543
GS
4868 RX_MATCH_COPIED_on(PL_reg_re);
4869 }
4870 PL_reg_magic->mg_len = PL_reg_oldpos;
4871 PL_reg_eval_set = 0;
4872 PL_curpm = PL_reg_oldcurpm;
4873 }
4874}
33b8afdf
JH
4875
4876STATIC void
4877S_to_utf8_substr(pTHX_ register regexp *prog)
4878{
4879 SV* sv;
4880 if (prog->float_substr && !prog->float_utf8) {
f2b990bf 4881 prog->float_utf8 = sv = newSVsv(prog->float_substr);
33b8afdf
JH
4882 sv_utf8_upgrade(sv);
4883 if (SvTAIL(prog->float_substr))
4884 SvTAIL_on(sv);
4885 if (prog->float_substr == prog->check_substr)
4886 prog->check_utf8 = sv;
4887 }
4888 if (prog->anchored_substr && !prog->anchored_utf8) {
f2b990bf 4889 prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
33b8afdf
JH
4890 sv_utf8_upgrade(sv);
4891 if (SvTAIL(prog->anchored_substr))
4892 SvTAIL_on(sv);
4893 if (prog->anchored_substr == prog->check_substr)
4894 prog->check_utf8 = sv;
4895 }
4896}
4897
4898STATIC void
4899S_to_byte_substr(pTHX_ register regexp *prog)
4900{
4901 SV* sv;
4902 if (prog->float_utf8 && !prog->float_substr) {
f2b990bf 4903 prog->float_substr = sv = newSVsv(prog->float_utf8);
33b8afdf
JH
4904 if (sv_utf8_downgrade(sv, TRUE)) {
4905 if (SvTAIL(prog->float_utf8))
4906 SvTAIL_on(sv);
4907 } else {
4908 SvREFCNT_dec(sv);
4909 prog->float_substr = sv = &PL_sv_undef;
4910 }
4911 if (prog->float_utf8 == prog->check_utf8)
4912 prog->check_substr = sv;
4913 }
4914 if (prog->anchored_utf8 && !prog->anchored_substr) {
f2b990bf 4915 prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
33b8afdf
JH
4916 if (sv_utf8_downgrade(sv, TRUE)) {
4917 if (SvTAIL(prog->anchored_utf8))
4918 SvTAIL_on(sv);
4919 } else {
4920 SvREFCNT_dec(sv);
4921 prog->anchored_substr = sv = &PL_sv_undef;
4922 }
4923 if (prog->anchored_utf8 == prog->check_utf8)
4924 prog->check_substr = sv;
4925 }
4926}