This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
inline SvREFCNT_dec:
[perl5.git] / regexec.c
<
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
be3c0a43 70 **** Copyright (c) 1991-2002, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
eb160463 90#define UTF ((PL_reg_flags & RF_utf8) != 0)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
ba7b4546 99#define REGINCLASS(p,c) (ANYOF_FLAGS(p) ? reginclass(p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
7d3e948e 100
c277df42
IZ
101/*
102 * Forwards.
103 */
104
33b8afdf 105#define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 106#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 107
dfe13c55
GS
108#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
109#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
110#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
111#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
112#define HOPc(pos,off) ((char*)HOP(pos,off))
113#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 114
efb30f32 115#define HOPBACK(pos, off) ( \
e54858b0 116 (PL_reg_match_utf8) \
efb30f32
HS
117 ? reghopmaybe((U8*)pos, -off) \
118 : (pos - off >= PL_bostr) \
119 ? (U8*)(pos - off) \
120 : (U8*)NULL \
121)
122#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
123
1aa99e6b
IH
124#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
125#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
126#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
127#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
128#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
129#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
130
977e4b84 131#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) { ENTER; save_re_context(); (void)CAT2(is_utf8_, a)((U8*)b); LEAVE; } } STMT_END
51371543 132
5f80c4cf 133/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
134#define JUMPABLE(rn) ( \
135 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
136 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
137 OP(rn) == PLUS || OP(rn) == MINMOD || \
138 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
139)
140
cca55fe3
JP
141#define HAS_TEXT(rn) ( \
142 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
143)
e2d8ce26 144
a84d97b6
HS
145/*
146 Search for mandatory following text node; for lookahead, the text must
147 follow but for lookbehind (rn->flags != 0) we skip to the next step.
148*/
cca55fe3 149#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 150 while (JUMPABLE(rn)) \
a84d97b6 151 if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 152 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
153 else if (OP(rn) == PLUS) \
154 rn = NEXTOPER(rn); \
a84d97b6
HS
155 else if (OP(rn) == IFMATCH) \
156 rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
e2d8ce26 157 else rn += NEXT_OFF(rn); \
5f80c4cf 158} STMT_END
74750237 159
acfe0abc 160static void restore_pos(pTHX_ void *arg);
51371543 161
76e3520e 162STATIC CHECKPOINT
cea2e8a9 163S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 164{
3280af22 165 int retval = PL_savestack_ix;
b1ce53c5
JH
166#define REGCP_PAREN_ELEMS 4
167 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
168 int p;
169
e49a9654
IH
170 if (paren_elems_to_push < 0)
171 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
172
a01268b5 173#define REGCP_OTHER_ELEMS 6
b1ce53c5 174 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 175 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 176/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
177 SSPUSHINT(PL_regendp[p]);
178 SSPUSHINT(PL_regstartp[p]);
3280af22 179 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
180 SSPUSHINT(p);
181 }
b1ce53c5 182/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
183 SSPUSHINT(PL_regsize);
184 SSPUSHINT(*PL_reglastparen);
a01268b5 185 SSPUSHINT(*PL_reglastcloseparen);
3280af22 186 SSPUSHPTR(PL_reginput);
41123dfd
JH
187#define REGCP_FRAME_ELEMS 2
188/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
189 * are needed for the regexp context stack bookkeeping. */
190 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 191 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 192
a0d0e21e
LW
193 return retval;
194}
195
c277df42 196/* These are needed since we do not localize EVAL nodes: */
02db2b7b 197# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 198 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 199 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 200
02db2b7b 201# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 202 PerlIO_printf(Perl_debug_log, \
faccc32b 203 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 204 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 205
76e3520e 206STATIC char *
cea2e8a9 207S_regcppop(pTHX)
a0d0e21e 208{
b1ce53c5 209 I32 i;
a0d0e21e
LW
210 U32 paren = 0;
211 char *input;
cf93c79d 212 I32 tmps;
b1ce53c5
JH
213
214 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 215 i = SSPOPINT;
b1ce53c5
JH
216 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
217 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 218 input = (char *) SSPOPPTR;
a01268b5 219 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
220 *PL_reglastparen = SSPOPINT;
221 PL_regsize = SSPOPINT;
b1ce53c5
JH
222
223 /* Now restore the parentheses context. */
41123dfd
JH
224 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
225 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 226 paren = (U32)SSPOPINT;
3280af22 227 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
228 PL_regstartp[paren] = SSPOPINT;
229 tmps = SSPOPINT;
3280af22
NIS
230 if (paren <= *PL_reglastparen)
231 PL_regendp[paren] = tmps;
c277df42 232 DEBUG_r(
c3464db5 233 PerlIO_printf(Perl_debug_log,
b900a521 234 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 235 (UV)paren, (IV)PL_regstartp[paren],
b900a521 236 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 237 (IV)PL_regendp[paren],
3280af22 238 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 239 );
a0d0e21e 240 }
c277df42 241 DEBUG_r(
eb160463 242 if ((I32)(*PL_reglastparen + 1) <= PL_regnpar) {
c3464db5 243 PerlIO_printf(Perl_debug_log,
faccc32b
JH
244 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
245 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
246 }
247 );
daf18116 248#if 1
dafc8851
JH
249 /* It would seem that the similar code in regtry()
250 * already takes care of this, and in fact it is in
251 * a better location to since this code can #if 0-ed out
252 * but the code in regtry() is needed or otherwise tests
253 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
254 * (as of patchlevel 7877) will fail. Then again,
255 * this code seems to be necessary or otherwise
256 * building DynaLoader will fail:
257 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
258 * --jhi */
eb160463
GS
259 for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
260 if ((I32)paren > PL_regsize)
cf93c79d
IZ
261 PL_regstartp[paren] = -1;
262 PL_regendp[paren] = -1;
a0d0e21e 263 }
dafc8851 264#endif
a0d0e21e
LW
265 return input;
266}
267
0f5d15d6 268STATIC char *
cea2e8a9 269S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
270{
271 I32 tmp = PL_savestack_ix;
272
273 PL_savestack_ix = ss;
274 regcppop();
275 PL_savestack_ix = tmp;
942e002e 276 return Nullch;
0f5d15d6
IZ
277}
278
279typedef struct re_cc_state
280{
281 I32 ss;
282 regnode *node;
283 struct re_cc_state *prev;
284 CURCUR *cc;
285 regexp *re;
286} re_cc_state;
287
02db2b7b 288#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 289
29d1e993
HS
290#define TRYPAREN(paren, n, input) { \
291 if (paren) { \
292 if (n) { \
293 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
294 PL_regendp[paren] = input - PL_bostr; \
295 } \
296 else \
297 PL_regendp[paren] = -1; \
298 } \
299 if (regmatch(next)) \
300 sayYES; \
301 if (paren && n) \
302 PL_regendp[paren] = -1; \
303}
304
305
a687059c 306/*
e50aee73 307 * pregexec and friends
a687059c
LW
308 */
309
310/*
c277df42 311 - pregexec - match a regexp against a string
a687059c 312 */
c277df42 313I32
864dbfa3 314Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 315 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
316/* strend: pointer to null at end of string */
317/* strbeg: real beginning of string */
318/* minend: end of match must be >=minend after stringarg. */
319/* nosave: For optimizations. */
320{
321 return
9041c2e3 322 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
323 nosave ? 0 : REXEC_COPY_STR);
324}
0f5d15d6
IZ
325
326STATIC void
cea2e8a9 327S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
328{
329 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
330#ifdef DEBUGGING
331 PL_regprogram = prog->program;
332#endif
333 PL_regnpar = prog->nparens;
9041c2e3
NIS
334 PL_regdata = prog->data;
335 PL_reg_re = prog;
0f5d15d6 336}
22e551b9 337
9041c2e3 338/*
cad2e5aa
JH
339 * Need to implement the following flags for reg_anch:
340 *
341 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
342 * USE_INTUIT_ML
343 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
344 * INTUIT_AUTORITATIVE_ML
345 * INTUIT_ONCE_NOML - Intuit can match in one location only.
346 * INTUIT_ONCE_ML
347 *
348 * Another flag for this function: SECOND_TIME (so that float substrs
349 * with giant delta may be not rechecked).
350 */
351
352/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
353
2c2d71f5 354/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
355 Otherwise, only SvCUR(sv) is used to get strbeg. */
356
357/* XXXX We assume that strpos is strbeg unless sv. */
358
6eb5f6b9
JH
359/* XXXX Some places assume that there is a fixed substring.
360 An update may be needed if optimizer marks as "INTUITable"
361 RExen without fixed substrings. Similarly, it is assumed that
362 lengths of all the strings are no more than minlen, thus they
363 cannot come from lookahead.
364 (Or minlen should take into account lookahead.) */
365
2c2d71f5
JH
366/* A failure to find a constant substring means that there is no need to make
367 an expensive call to REx engine, thus we celebrate a failure. Similarly,
368 finding a substring too deep into the string means that less calls to
30944b6d
IZ
369 regtry() should be needed.
370
371 REx compiler's optimizer found 4 possible hints:
372 a) Anchored substring;
373 b) Fixed substring;
374 c) Whether we are anchored (beginning-of-line or \G);
375 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 376 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
377 string which does not contradict any of them.
378 */
2c2d71f5 379
6eb5f6b9
JH
380/* Most of decisions we do here should have been done at compile time.
381 The nodes of the REx which we used for the search should have been
382 deleted from the finite automaton. */
383
cad2e5aa
JH
384char *
385Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
386 char *strend, U32 flags, re_scream_pos_data *data)
387{
b7953727 388 register I32 start_shift = 0;
cad2e5aa 389 /* Should be nonnegative! */
b7953727 390 register I32 end_shift = 0;
2c2d71f5
JH
391 register char *s;
392 register SV *check;
a1933d95 393 char *strbeg;
cad2e5aa 394 char *t;
33b8afdf 395 int do_utf8 = sv ? SvUTF8(sv) : 0; /* if no sv we have to assume bytes */
cad2e5aa 396 I32 ml_anch;
6eb5f6b9 397 register char *other_last = Nullch; /* other substr checked before this */
b7953727 398 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
399#ifdef DEBUGGING
400 char *i_strpos = strpos;
ce333219 401 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 402#endif
a30b2f1f 403 RX_MATCH_UTF8_set(prog,do_utf8);
cad2e5aa 404
b8d68ded
JH
405 if (prog->reganch & ROPT_UTF8) {
406 DEBUG_r(PerlIO_printf(Perl_debug_log,
407 "UTF-8 regex...\n"));
408 PL_reg_flags |= RF_utf8;
409 }
410
2a782b5b 411 DEBUG_r({
b8d68ded 412 char *s = PL_reg_match_utf8 ?
c728cb41
JH
413 sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
414 strpos;
b8d68ded
JH
415 int len = PL_reg_match_utf8 ?
416 strlen(s) : strend - strpos;
2a782b5b
JH
417 if (!PL_colorset)
418 reginitcolors();
b8d68ded
JH
419 if (PL_reg_match_utf8)
420 DEBUG_r(PerlIO_printf(Perl_debug_log,
421 "UTF-8 target...\n"));
2a782b5b
JH
422 PerlIO_printf(Perl_debug_log,
423 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
424 PL_colors[4],PL_colors[5],PL_colors[0],
425 prog->precomp,
426 PL_colors[1],
427 (strlen(prog->precomp) > 60 ? "..." : ""),
428 PL_colors[0],
429 (int)(len > 60 ? 60 : len),
430 s, PL_colors[1],
431 (len > 60 ? "..." : "")
432 );
433 });
cad2e5aa 434
c344f387
JH
435 /* CHR_DIST() would be more correct here but it makes things slow. */
436 if (prog->minlen > strend - strpos) {
a72c7584
JH
437 DEBUG_r(PerlIO_printf(Perl_debug_log,
438 "String too short... [re_intuit_start]\n"));
cad2e5aa 439 goto fail;
2c2d71f5 440 }
a1933d95 441 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 442 PL_regeol = strend;
33b8afdf
JH
443 if (do_utf8) {
444 if (!prog->check_utf8 && prog->check_substr)
445 to_utf8_substr(prog);
446 check = prog->check_utf8;
447 } else {
448 if (!prog->check_substr && prog->check_utf8)
449 to_byte_substr(prog);
450 check = prog->check_substr;
451 }
452 if (check == &PL_sv_undef) {
453 DEBUG_r(PerlIO_printf(Perl_debug_log,
454 "Non-utf string cannot match utf check string\n"));
455 goto fail;
456 }
2c2d71f5 457 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
458 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
459 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 460 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 461
7e25d62c
JH
462 if (!ml_anch) {
463 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
464 | ROPT_IMPLICIT)) /* not a real BOL */
465 /* SvCUR is not set on references: SvRV and SvPVX overlap */
466 && sv && !SvROK(sv)
467 && (strpos != strbeg)) {
468 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
469 goto fail;
470 }
471 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 472 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 473 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
474 I32 slen;
475
1aa99e6b 476 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
477 if (SvTAIL(check)) {
478 slen = SvCUR(check); /* >= 1 */
cad2e5aa 479
9041c2e3 480 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
481 || (strend - s == slen && strend[-1] != '\n')) {
482 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
483 goto fail_finish;
cad2e5aa
JH
484 }
485 /* Now should match s[0..slen-2] */
486 slen--;
653099ff 487 if (slen && (*SvPVX(check) != *s
cad2e5aa 488 || (slen > 1
653099ff 489 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
490 report_neq:
491 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
492 goto fail_finish;
493 }
cad2e5aa 494 }
653099ff
GS
495 else if (*SvPVX(check) != *s
496 || ((slen = SvCUR(check)) > 1
497 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
498 goto report_neq;
499 goto success_at_start;
7e25d62c 500 }
cad2e5aa 501 }
2c2d71f5 502 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 503 s = strpos;
2c2d71f5 504 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 505 end_shift = prog->minlen - start_shift -
653099ff 506 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 507 if (!ml_anch) {
653099ff
GS
508 I32 end = prog->check_offset_max + CHR_SVLEN(check)
509 - (SvTAIL(check) != 0);
1aa99e6b 510 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
511
512 if (end_shift < eshift)
513 end_shift = eshift;
514 }
cad2e5aa 515 }
2c2d71f5 516 else { /* Can match at random position */
cad2e5aa
JH
517 ml_anch = 0;
518 s = strpos;
2c2d71f5
JH
519 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
520 /* Should be nonnegative! */
521 end_shift = prog->minlen - start_shift -
653099ff 522 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
523 }
524
2c2d71f5 525#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 526 if (end_shift < 0)
6bbae5e6 527 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
528#endif
529
2c2d71f5
JH
530 restart:
531 /* Find a possible match in the region s..strend by looking for
532 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 533 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
534 I32 p = -1; /* Internal iterator of scream. */
535 I32 *pp = data ? data->scream_pos : &p;
536
2c2d71f5
JH
537 if (PL_screamfirst[BmRARE(check)] >= 0
538 || ( BmRARE(check) == '\n'
539 && (BmPREVIOUS(check) == SvCUR(check) - 1)
540 && SvTAIL(check) ))
9041c2e3 541 s = screaminstr(sv, check,
2c2d71f5 542 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 543 else
2c2d71f5 544 goto fail_finish;
4addbd3b
HS
545 /* we may be pointing at the wrong string */
546 if (s && RX_MATCH_COPIED(prog))
547 s = prog->subbeg + (s - SvPVX(sv));
cad2e5aa
JH
548 if (data)
549 *data->scream_olds = s;
550 }
f33976b4 551 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
552 s = fbm_instr((U8*)(s + start_shift),
553 (U8*)(strend - end_shift),
554 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 555 else
1aa99e6b
IH
556 s = fbm_instr(HOP3(s, start_shift, strend),
557 HOP3(strend, -end_shift, strbeg),
2c2d71f5 558 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
559
560 /* Update the count-of-usability, remove useless subpatterns,
561 unshift s. */
2c2d71f5
JH
562
563 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
564 (s ? "Found" : "Did not find"),
33b8afdf 565 (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) ? "anchored" : "floating"),
2c2d71f5 566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
568 SvPVX(check),
2c2d71f5
JH
569 PL_colors[1], (SvTAIL(check) ? "$" : ""),
570 (s ? " at offset " : "...\n") ) );
571
572 if (!s)
573 goto fail_finish;
574
6eb5f6b9
JH
575 check_at = s;
576
2c2d71f5 577 /* Finish the diagnostic message */
30944b6d 578 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
579
580 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
581 Start with the other substr.
582 XXXX no SCREAM optimization yet - and a very coarse implementation
583 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
584 *always* match. Probably should be marked during compile...
585 Probably it is right to do no SCREAM here...
586 */
587
33b8afdf 588 if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8) : (prog->float_substr && prog->anchored_substr)) {
30944b6d 589 /* Take into account the "other" substring. */
2c2d71f5
JH
590 /* XXXX May be hopelessly wrong for UTF... */
591 if (!other_last)
6eb5f6b9 592 other_last = strpos;
33b8afdf 593 if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
30944b6d
IZ
594 do_other_anchored:
595 {
1aa99e6b 596 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5 597 char *s1 = s;
33b8afdf 598 SV* must;
2c2d71f5 599
2c2d71f5
JH
600 t = s - prog->check_offset_max;
601 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 602 && (!do_utf8
1aa99e6b 603 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 604 && t > strpos)))
30944b6d 605 /* EMPTY */;
2c2d71f5
JH
606 else
607 t = strpos;
1aa99e6b 608 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
609 if (t < other_last) /* These positions already checked */
610 t = other_last;
1aa99e6b 611 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
612 if (last < last1)
613 last1 = last;
614 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
615 /* On end-of-str: see comment below. */
33b8afdf
JH
616 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
617 if (must == &PL_sv_undef) {
618 s = (char*)NULL;
619 DEBUG_r(must = prog->anchored_utf8); /* for debug */
620 }
621 else
622 s = fbm_instr(
623 (unsigned char*)t,
624 HOP3(HOP3(last1, prog->anchored_offset, strend)
625 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
626 must,
627 PL_multiline ? FBMrf_MULTILINE : 0
628 );
1aa99e6b
IH
629 DEBUG_r(PerlIO_printf(Perl_debug_log,
630 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
631 (s ? "Found" : "Contradicts"),
632 PL_colors[0],
33b8afdf
JH
633 (int)(SvCUR(must)
634 - (SvTAIL(must)!=0)),
635 SvPVX(must),
636 PL_colors[1], (SvTAIL(must) ? "$" : "")));
2c2d71f5
JH
637 if (!s) {
638 if (last1 >= last2) {
639 DEBUG_r(PerlIO_printf(Perl_debug_log,
640 ", giving up...\n"));
641 goto fail_finish;
642 }
643 DEBUG_r(PerlIO_printf(Perl_debug_log,
644 ", trying floating at offset %ld...\n",
1aa99e6b
IH
645 (long)(HOP3c(s1, 1, strend) - i_strpos)));
646 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
647 s = HOP3c(last, 1, strend);
2c2d71f5
JH
648 goto restart;
649 }
650 else {
651 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 652 (long)(s - i_strpos)));
1aa99e6b
IH
653 t = HOP3c(s, -prog->anchored_offset, strbeg);
654 other_last = HOP3c(s, 1, strend);
30944b6d 655 s = s1;
2c2d71f5
JH
656 if (t == strpos)
657 goto try_at_start;
2c2d71f5
JH
658 goto try_at_offset;
659 }
30944b6d 660 }
2c2d71f5
JH
661 }
662 else { /* Take into account the floating substring. */
33b8afdf
JH
663 char *last, *last1;
664 char *s1 = s;
665 SV* must;
666
667 t = HOP3c(s, -start_shift, strbeg);
668 last1 = last =
669 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
670 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
671 last = HOP3c(t, prog->float_max_offset, strend);
672 s = HOP3c(t, prog->float_min_offset, strend);
673 if (s < other_last)
674 s = other_last;
2c2d71f5 675 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
33b8afdf
JH
676 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
677 /* fbm_instr() takes into account exact value of end-of-str
678 if the check is SvTAIL(ed). Since false positives are OK,
679 and end-of-str is not later than strend we are OK. */
680 if (must == &PL_sv_undef) {
681 s = (char*)NULL;
682 DEBUG_r(must = prog->float_utf8); /* for debug message */
683 }
684 else
2c2d71f5 685 s = fbm_instr((unsigned char*)s,
33b8afdf
JH
686 (unsigned char*)last + SvCUR(must)
687 - (SvTAIL(must)!=0),
688 must, PL_multiline ? FBMrf_MULTILINE : 0);
689 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
690 (s ? "Found" : "Contradicts"),
691 PL_colors[0],
692 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
693 SvPVX(must),
694 PL_colors[1], (SvTAIL(must) ? "$" : "")));
695 if (!s) {
696 if (last1 == last) {
2c2d71f5 697 DEBUG_r(PerlIO_printf(Perl_debug_log,
33b8afdf
JH
698 ", giving up...\n"));
699 goto fail_finish;
2c2d71f5 700 }
33b8afdf
JH
701 DEBUG_r(PerlIO_printf(Perl_debug_log,
702 ", trying anchored starting at offset %ld...\n",
703 (long)(s1 + 1 - i_strpos)));
704 other_last = last;
705 s = HOP3c(t, 1, strend);
706 goto restart;
707 }
708 else {
709 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
710 (long)(s - i_strpos)));
711 other_last = s; /* Fix this later. --Hugo */
712 s = s1;
713 if (t == strpos)
714 goto try_at_start;
715 goto try_at_offset;
716 }
2c2d71f5 717 }
cad2e5aa 718 }
2c2d71f5
JH
719
720 t = s - prog->check_offset_max;
2c2d71f5 721 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
1d86a7f9 722 && (!do_utf8
1aa99e6b
IH
723 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
724 && t > strpos))) {
2c2d71f5
JH
725 /* Fixed substring is found far enough so that the match
726 cannot start at strpos. */
727 try_at_offset:
cad2e5aa 728 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
729 /* Eventually fbm_*() should handle this, but often
730 anchored_offset is not 0, so this check will not be wasted. */
731 /* XXXX In the code below we prefer to look for "^" even in
732 presence of anchored substrings. And we search even
733 beyond the found float position. These pessimizations
734 are historical artefacts only. */
735 find_anchor:
2c2d71f5 736 while (t < strend - prog->minlen) {
cad2e5aa 737 if (*t == '\n') {
4ee3650e 738 if (t < check_at - prog->check_offset_min) {
33b8afdf 739 if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
4ee3650e
GS
740 /* Since we moved from the found position,
741 we definitely contradict the found anchored
30944b6d
IZ
742 substr. Due to the above check we do not
743 contradict "check" substr.
744 Thus we can arrive here only if check substr
745 is float. Redo checking for "other"=="fixed".
746 */
9041c2e3 747 strpos = t + 1;
30944b6d
IZ
748 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
749 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
750 goto do_other_anchored;
751 }
4ee3650e
GS
752 /* We don't contradict the found floating substring. */
753 /* XXXX Why not check for STCLASS? */
cad2e5aa 754 s = t + 1;
2c2d71f5 755 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 756 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
757 goto set_useful;
758 }
4ee3650e
GS
759 /* Position contradicts check-string */
760 /* XXXX probably better to look for check-string
761 than for "\n", so one should lower the limit for t? */
762 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 763 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 764 other_last = strpos = s = t + 1;
cad2e5aa
JH
765 goto restart;
766 }
767 t++;
768 }
2c2d71f5
JH
769 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
770 PL_colors[0],PL_colors[1]));
771 goto fail_finish;
cad2e5aa 772 }
f5952150
GS
773 else {
774 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
775 PL_colors[0],PL_colors[1]));
776 }
cad2e5aa
JH
777 s = t;
778 set_useful:
33b8afdf 779 ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
cad2e5aa
JH
780 }
781 else {
f5952150 782 /* The found string does not prohibit matching at strpos,
2c2d71f5 783 - no optimization of calling REx engine can be performed,
f5952150
GS
784 unless it was an MBOL and we are not after MBOL,
785 or a future STCLASS check will fail this. */
2c2d71f5
JH
786 try_at_start:
787 /* Even in this situation we may use MBOL flag if strpos is offset
788 wrt the start of the string. */
05b4157f 789 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 790 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
791 /* May be due to an implicit anchor of m{.*foo} */
792 && !(prog->reganch & ROPT_IMPLICIT))
793 {
cad2e5aa
JH
794 t = strpos;
795 goto find_anchor;
796 }
30944b6d 797 DEBUG_r( if (ml_anch)
f5952150
GS
798 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
799 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 800 );
2c2d71f5 801 success_at_start:
30944b6d 802 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
33b8afdf
JH
803 && (do_utf8 ? (
804 prog->check_utf8 /* Could be deleted already */
805 && --BmUSEFUL(prog->check_utf8) < 0
806 && (prog->check_utf8 == prog->float_utf8)
807 ) : (
808 prog->check_substr /* Could be deleted already */
809 && --BmUSEFUL(prog->check_substr) < 0
810 && (prog->check_substr == prog->float_substr)
811 )))
66e933ab 812 {
cad2e5aa 813 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 814 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
33b8afdf
JH
815 SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
816 if (do_utf8 ? prog->check_substr : prog->check_utf8)
817 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
818 prog->check_substr = prog->check_utf8 = Nullsv; /* disable */
819 prog->float_substr = prog->float_utf8 = Nullsv; /* clear */
5e39e1e5 820 check = Nullsv; /* abort */
cad2e5aa 821 s = strpos;
3cf5c195
IZ
822 /* XXXX This is a remnant of the old implementation. It
823 looks wasteful, since now INTUIT can use many
6eb5f6b9 824 other heuristics. */
cad2e5aa
JH
825 prog->reganch &= ~RE_USE_INTUIT;
826 }
827 else
828 s = strpos;
829 }
830
6eb5f6b9
JH
831 /* Last resort... */
832 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
833 if (prog->regstclass) {
834 /* minlen == 0 is possible if regstclass is \b or \B,
835 and the fixed substr is ''$.
836 Since minlen is already taken into account, s+1 is before strend;
837 accidentally, minlen >= 1 guaranties no false positives at s + 1
838 even for \b or \B. But (minlen? 1 : 0) below assumes that
839 regstclass does not come from lookahead... */
840 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
841 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 842 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 843 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 844 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 845 : 1);
33b8afdf 846 char *endpos = (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1aa99e6b 847 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
33b8afdf 848 : (prog->float_substr || prog->float_utf8
1aa99e6b
IH
849 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
850 cl_l, strend)
851 : strend);
a1933d95 852 char *startpos = strbeg;
6eb5f6b9
JH
853
854 t = s;
76384e4a 855 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 856 PL_regdata = prog->data;
76384e4a
GS
857 PL_bostr = startpos;
858 }
9926ca43 859 cache_re(prog);
f33976b4 860 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
861 if (!s) {
862#ifdef DEBUGGING
b7953727 863 char *what = 0;
6eb5f6b9
JH
864#endif
865 if (endpos == strend) {
866 DEBUG_r( PerlIO_printf(Perl_debug_log,
867 "Could not match STCLASS...\n") );
868 goto fail;
869 }
66e933ab
GS
870 DEBUG_r( PerlIO_printf(Perl_debug_log,
871 "This position contradicts STCLASS...\n") );
653099ff
GS
872 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
873 goto fail;
6eb5f6b9 874 /* Contradict one of substrings */
33b8afdf
JH
875 if (prog->anchored_substr || prog->anchored_utf8) {
876 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
6eb5f6b9
JH
877 DEBUG_r( what = "anchored" );
878 hop_and_restart:
1aa99e6b 879 s = HOP3c(t, 1, strend);
66e933ab
GS
880 if (s + start_shift + end_shift > strend) {
881 /* XXXX Should be taken into account earlier? */
882 DEBUG_r( PerlIO_printf(Perl_debug_log,
883 "Could not match STCLASS...\n") );
884 goto fail;
885 }
5e39e1e5
HS
886 if (!check)
887 goto giveup;
6eb5f6b9 888 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 889 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
890 what, (long)(s + start_shift - i_strpos)) );
891 goto restart;
892 }
66e933ab 893 /* Have both, check_string is floating */
6eb5f6b9
JH
894 if (t + start_shift >= check_at) /* Contradicts floating=check */
895 goto retry_floating_check;
896 /* Recheck anchored substring, but not floating... */
9041c2e3 897 s = check_at;
5e39e1e5
HS
898 if (!check)
899 goto giveup;
6eb5f6b9 900 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 901 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
902 (long)(other_last - i_strpos)) );
903 goto do_other_anchored;
904 }
60e71179
GS
905 /* Another way we could have checked stclass at the
906 current position only: */
907 if (ml_anch) {
908 s = t = t + 1;
5e39e1e5
HS
909 if (!check)
910 goto giveup;
60e71179 911 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
912 "Looking for /%s^%s/m starting at offset %ld...\n",
913 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 914 goto try_at_offset;
66e933ab 915 }
33b8afdf 916 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
60e71179 917 goto fail;
6eb5f6b9
JH
918 /* Check is floating subtring. */
919 retry_floating_check:
920 t = check_at - start_shift;
921 DEBUG_r( what = "floating" );
922 goto hop_and_restart;
923 }
b7953727
JH
924 if (t != s) {
925 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 926 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
927 (long)(t - i_strpos), (long)(s - i_strpos))
928 );
929 }
930 else {
931 DEBUG_r(PerlIO_printf(Perl_debug_log,
932 "Does not contradict STCLASS...\n");
933 );
934 }
6eb5f6b9 935 }
5e39e1e5
HS
936 giveup:
937 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
938 PL_colors[4], (check ? "Guessed" : "Giving up"),
939 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 940 return s;
2c2d71f5
JH
941
942 fail_finish: /* Substring not found */
33b8afdf
JH
943 if (prog->check_substr || prog->check_utf8) /* could be removed already */
944 BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
cad2e5aa 945 fail:
2c2d71f5 946 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
947 PL_colors[4],PL_colors[5]));
948 return Nullch;
949}
9661b544 950
6eb5f6b9 951/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
952STATIC char *
953S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 954{
6eb5f6b9
JH
955 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
956 char *m;
d8093b23
G
957 STRLEN ln;
958 unsigned int c1;
959 unsigned int c2;
6eb5f6b9
JH
960 char *e;
961 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 962 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 963
6eb5f6b9
JH
964 /* We know what class it must start with. */
965 switch (OP(c)) {
6eb5f6b9 966 case ANYOF:
388cc4de
HS
967 if (do_utf8) {
968 while (s < strend) {
969 if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
970 !UTF8_IS_INVARIANT((U8)s[0]) ?
971 reginclass(c, (U8*)s, 0, do_utf8) :
972 REGINCLASS(c, (U8*)s)) {
973 if (tmp && (norun || regtry(prog, s)))
974 goto got_it;
975 else
976 tmp = doevery;
977 }
978 else
979 tmp = 1;
980 s += UTF8SKIP(s);
981 }
982 }
983 else {
984 while (s < strend) {
985 STRLEN skip = 1;
986
987 if (REGINCLASS(c, (U8*)s) ||
988 (ANYOF_FOLD_SHARP_S(c, s, strend) &&
989 /* The assignment of 2 is intentional:
990 * for the folded sharp s, the skip is 2. */
991 (skip = SHARP_S_SKIP))) {
992 if (tmp && (norun || regtry(prog, s)))
993 goto got_it;
994 else
995 tmp = doevery;
996 }
997 else
998 tmp = 1;
999 s += skip;
1000 }
a0d0e21e 1001 }
6eb5f6b9 1002 break;
f33976b4
DB
1003 case CANY:
1004 while (s < strend) {
1005 if (tmp && (norun || regtry(prog, s)))
1006 goto got_it;
1007 else
1008 tmp = doevery;
1009 s++;
1010 }
1011 break;
6eb5f6b9
JH
1012 case EXACTF:
1013 m = STRING(c);
1014 ln = STR_LEN(c);
1aa99e6b 1015 if (UTF) {
a2a2844f 1016 STRLEN ulen1, ulen2;
e7ae6809
JH
1017 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
1018 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
1019
1020 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1021 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1022
872c91ae
JH
1023 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN_UCLC,
1024 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1025 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN_UCLC,
1026 0, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1aa99e6b
IH
1027 }
1028 else {
1029 c1 = *(U8*)m;
1030 c2 = PL_fold[c1];
1031 }
6eb5f6b9
JH
1032 goto do_exactf;
1033 case EXACTFL:
1034 m = STRING(c);
1035 ln = STR_LEN(c);
d8093b23 1036 c1 = *(U8*)m;
6eb5f6b9
JH
1037 c2 = PL_fold_locale[c1];
1038 do_exactf:
6795a346 1039 e = HOP3c(strend, -(I32)ln, s);
b3c9acc1 1040
6eb5f6b9
JH
1041 if (norun && e < s)
1042 e = s; /* Due to minlen logic of intuit() */
1aa99e6b 1043
60a8b682
JH
1044 /* The idea in the EXACTF* cases is to first find the
1045 * first character of the EXACTF* node and then, if
1046 * necessary, case-insensitively compare the full
1047 * text of the node. The c1 and c2 are the first
1048 * characters (though in Unicode it gets a bit
1049 * more complicated because there are more cases
7f16dd3d
JH
1050 * than just upper and lower: one needs to use
1051 * the so-called folding case for case-insensitive
1052 * matching (called "loose matching" in Unicode).
1053 * ibcmp_utf8() will do just that. */
60a8b682 1054
1aa99e6b 1055 if (do_utf8) {
575cac57
JH
1056 UV c, f;
1057 U8 tmpbuf [UTF8_MAXLEN+1];
1058 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
1059 STRLEN len, foldlen;
d7f013c8 1060
09091399 1061 if (c1 == c2) {
1aa99e6b 1062 while (s <= e) {
872c91ae
JH
1063 c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1064 ckWARN(WARN_UTF8) ?
1065 0 : UTF8_ALLOW_ANY);
80aecb99
JH
1066 if ( c == c1
1067 && (ln == len ||
66423254 1068 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1069 m, (char **)0, ln, (bool)UTF))
55da9344 1070 && (norun || regtry(prog, s)) )
1aa99e6b 1071 goto got_it;
80aecb99
JH
1072 else {
1073 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1074 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1075 if ( f != c
1076 && (f == c1 || f == c2)
1077 && (ln == foldlen ||
66423254
JH
1078 !ibcmp_utf8((char *) foldbuf,
1079 (char **)0, foldlen, do_utf8,
d07ddd77 1080 m,
eb160463 1081 (char **)0, ln, (bool)UTF))
80aecb99
JH
1082 && (norun || regtry(prog, s)) )
1083 goto got_it;
1084 }
1aa99e6b
IH
1085 s += len;
1086 }
09091399
JH
1087 }
1088 else {
1aa99e6b 1089 while (s <= e) {
872c91ae
JH
1090 c = utf8n_to_uvchr((U8*)s, UTF8_MAXLEN, &len,
1091 ckWARN(WARN_UTF8) ?
1092 0 : UTF8_ALLOW_ANY);
80aecb99 1093
60a8b682 1094 /* Handle some of the three Greek sigmas cases.
8c01da3c
JH
1095 * Note that not all the possible combinations
1096 * are handled here: some of them are handled
1097 * by the standard folding rules, and some of
1098 * them (the character class or ANYOF cases)
1099 * are handled during compiletime in
1100 * regexec.c:S_regclass(). */
880bd946
JH
1101 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1102 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1103 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
80aecb99
JH
1104
1105 if ( (c == c1 || c == c2)
1106 && (ln == len ||
66423254 1107 ibcmp_utf8(s, (char **)0, 0, do_utf8,
eb160463 1108 m, (char **)0, ln, (bool)UTF))
55da9344 1109 && (norun || regtry(prog, s)) )
1aa99e6b 1110 goto got_it;
80aecb99
JH
1111 else {
1112 uvchr_to_utf8(tmpbuf, c);
ac7e0132 1113 f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
80aecb99
JH
1114 if ( f != c
1115 && (f == c1 || f == c2)
1116 && (ln == foldlen ||
a6872d42 1117 !ibcmp_utf8((char *) foldbuf,
66423254 1118 (char **)0, foldlen, do_utf8,
d07ddd77 1119 m,
eb160463 1120 (char **)0, ln, (bool)UTF))
80aecb99
JH
1121 && (norun || regtry(prog, s)) )
1122 goto got_it;
1123 }
1aa99e6b
IH
1124 s += len;
1125 }
09091399 1126 }
1aa99e6b
IH
1127 }
1128 else {
1129 if (c1 == c2)
1130 while (s <= e) {
1131 if ( *(U8*)s == c1
1132 && (ln == 1 || !(OP(c) == EXACTF
1133 ? ibcmp(s, m, ln)
1134 : ibcmp_locale(s, m, ln)))
1135 && (norun || regtry(prog, s)) )
1136 goto got_it;
1137 s++;
1138 }
1139 else
1140 while (s <= e) {
1141 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1142 && (ln == 1 || !(OP(c) == EXACTF
1143 ? ibcmp(s, m, ln)
1144 : ibcmp_locale(s, m, ln)))
1145 && (norun || regtry(prog, s)) )
1146 goto got_it;
1147 s++;
1148 }
b3c9acc1
IZ
1149 }
1150 break;
bbce6d69 1151 case BOUNDL:
3280af22 1152 PL_reg_flags |= RF_tainted;
bbce6d69 1153 /* FALL THROUGH */
a0d0e21e 1154 case BOUND:
ffc61ed2 1155 if (do_utf8) {
12d33761 1156 if (s == PL_bostr)
ffc61ed2
JH
1157 tmp = '\n';
1158 else {
b4f7163a 1159 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1160
b4f7163a 1161 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1162 }
1163 tmp = ((OP(c) == BOUND ?
9041c2e3 1164 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1165 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1166 while (s < strend) {
1167 if (tmp == !(OP(c) == BOUND ?
3568d838 1168 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1169 isALNUM_LC_utf8((U8*)s)))
1170 {
1171 tmp = !tmp;
1172 if ((norun || regtry(prog, s)))
1173 goto got_it;
1174 }
1175 s += UTF8SKIP(s);
a687059c 1176 }
a0d0e21e 1177 }
667bb95a 1178 else {
12d33761 1179 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1180 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1181 while (s < strend) {
1182 if (tmp ==
1183 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1184 tmp = !tmp;
1185 if ((norun || regtry(prog, s)))
1186 goto got_it;
1187 }
1188 s++;
a0ed51b3 1189 }
a0ed51b3 1190 }
6eb5f6b9 1191 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1192 goto got_it;
1193 break;
bbce6d69 1194 case NBOUNDL:
3280af22 1195 PL_reg_flags |= RF_tainted;
bbce6d69 1196 /* FALL THROUGH */
a0d0e21e 1197 case NBOUND:
ffc61ed2 1198 if (do_utf8) {
12d33761 1199 if (s == PL_bostr)
ffc61ed2
JH
1200 tmp = '\n';
1201 else {
b4f7163a 1202 U8 *r = reghop3((U8*)s, -1, (U8*)PL_bostr);
9041c2e3 1203
b4f7163a 1204 tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
1205 }
1206 tmp = ((OP(c) == NBOUND ?
9041c2e3 1207 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1208 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1209 while (s < strend) {
1210 if (tmp == !(OP(c) == NBOUND ?
3568d838 1211 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1212 isALNUM_LC_utf8((U8*)s)))
1213 tmp = !tmp;
1214 else if ((norun || regtry(prog, s)))
1215 goto got_it;
1216 s += UTF8SKIP(s);
1217 }
a0d0e21e 1218 }
667bb95a 1219 else {
12d33761 1220 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1221 tmp = ((OP(c) == NBOUND ?
1222 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1223 while (s < strend) {
1224 if (tmp ==
1225 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1226 tmp = !tmp;
1227 else if ((norun || regtry(prog, s)))
1228 goto got_it;
1229 s++;
1230 }
a0ed51b3 1231 }
6eb5f6b9 1232 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1233 goto got_it;
1234 break;
a0d0e21e 1235 case ALNUM:
ffc61ed2 1236 if (do_utf8) {
8269fa76 1237 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1238 while (s < strend) {
3568d838 1239 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1240 if (tmp && (norun || regtry(prog, s)))
1241 goto got_it;
1242 else
1243 tmp = doevery;
1244 }
bbce6d69 1245 else
ffc61ed2
JH
1246 tmp = 1;
1247 s += UTF8SKIP(s);
bbce6d69 1248 }
bbce6d69 1249 }
ffc61ed2
JH
1250 else {
1251 while (s < strend) {
1252 if (isALNUM(*s)) {
1253 if (tmp && (norun || regtry(prog, s)))
1254 goto got_it;
1255 else
1256 tmp = doevery;
1257 }
a0ed51b3 1258 else
ffc61ed2
JH
1259 tmp = 1;
1260 s++;
a0ed51b3 1261 }
a0ed51b3
LW
1262 }
1263 break;
bbce6d69 1264 case ALNUML:
3280af22 1265 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1266 if (do_utf8) {
1267 while (s < strend) {
1268 if (isALNUM_LC_utf8((U8*)s)) {
1269 if (tmp && (norun || regtry(prog, s)))
1270 goto got_it;
1271 else
1272 tmp = doevery;
1273 }
a687059c 1274 else
ffc61ed2
JH
1275 tmp = 1;
1276 s += UTF8SKIP(s);
a0d0e21e 1277 }
a0d0e21e 1278 }
ffc61ed2
JH
1279 else {
1280 while (s < strend) {
1281 if (isALNUM_LC(*s)) {
1282 if (tmp && (norun || regtry(prog, s)))
1283 goto got_it;
1284 else
1285 tmp = doevery;
1286 }
a0ed51b3 1287 else
ffc61ed2
JH
1288 tmp = 1;
1289 s++;
a0ed51b3 1290 }
a0ed51b3
LW
1291 }
1292 break;
a0d0e21e 1293 case NALNUM:
ffc61ed2 1294 if (do_utf8) {
8269fa76 1295 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1296 while (s < strend) {
3568d838 1297 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1298 if (tmp && (norun || regtry(prog, s)))
1299 goto got_it;
1300 else
1301 tmp = doevery;
1302 }
bbce6d69 1303 else
ffc61ed2
JH
1304 tmp = 1;
1305 s += UTF8SKIP(s);
bbce6d69 1306 }
bbce6d69 1307 }
ffc61ed2
JH
1308 else {
1309 while (s < strend) {
1310 if (!isALNUM(*s)) {
1311 if (tmp && (norun || regtry(prog, s)))
1312 goto got_it;
1313 else
1314 tmp = doevery;
1315 }
a0ed51b3 1316 else
ffc61ed2
JH
1317 tmp = 1;
1318 s++;
a0ed51b3 1319 }
a0ed51b3
LW
1320 }
1321 break;
bbce6d69 1322 case NALNUML:
3280af22 1323 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1324 if (do_utf8) {
1325 while (s < strend) {
1326 if (!isALNUM_LC_utf8((U8*)s)) {
1327 if (tmp && (norun || regtry(prog, s)))
1328 goto got_it;
1329 else
1330 tmp = doevery;
1331 }
a687059c 1332 else
ffc61ed2
JH
1333 tmp = 1;
1334 s += UTF8SKIP(s);
a687059c 1335 }
a0d0e21e 1336 }
ffc61ed2
JH
1337 else {
1338 while (s < strend) {
1339 if (!isALNUM_LC(*s)) {
1340 if (tmp && (norun || regtry(prog, s)))
1341 goto got_it;
1342 else
1343 tmp = doevery;
1344 }
a0ed51b3 1345 else
ffc61ed2
JH
1346 tmp = 1;
1347 s++;
a0ed51b3 1348 }
a0ed51b3
LW
1349 }
1350 break;
a0d0e21e 1351 case SPACE:
ffc61ed2 1352 if (do_utf8) {
8269fa76 1353 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1354 while (s < strend) {
3568d838 1355 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1356 if (tmp && (norun || regtry(prog, s)))
1357 goto got_it;
1358 else
1359 tmp = doevery;
1360 }
a0d0e21e 1361 else
ffc61ed2
JH
1362 tmp = 1;
1363 s += UTF8SKIP(s);
2304df62 1364 }
a0d0e21e 1365 }
ffc61ed2
JH
1366 else {
1367 while (s < strend) {
1368 if (isSPACE(*s)) {
1369 if (tmp && (norun || regtry(prog, s)))
1370 goto got_it;
1371 else
1372 tmp = doevery;
1373 }
a0ed51b3 1374 else
ffc61ed2
JH
1375 tmp = 1;
1376 s++;
a0ed51b3 1377 }
a0ed51b3
LW
1378 }
1379 break;
bbce6d69 1380 case SPACEL:
3280af22 1381 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1382 if (do_utf8) {
1383 while (s < strend) {
1384 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1385 if (tmp && (norun || regtry(prog, s)))
1386 goto got_it;
1387 else
1388 tmp = doevery;
1389 }
bbce6d69 1390 else
ffc61ed2
JH
1391 tmp = 1;
1392 s += UTF8SKIP(s);
bbce6d69 1393 }
bbce6d69 1394 }
ffc61ed2
JH
1395 else {
1396 while (s < strend) {
1397 if (isSPACE_LC(*s)) {
1398 if (tmp && (norun || regtry(prog, s)))
1399 goto got_it;
1400 else
1401 tmp = doevery;
1402 }
a0ed51b3 1403 else
ffc61ed2
JH
1404 tmp = 1;
1405 s++;
a0ed51b3 1406 }
a0ed51b3
LW
1407 }
1408 break;
a0d0e21e 1409 case NSPACE:
ffc61ed2 1410 if (do_utf8) {
8269fa76 1411 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1412 while (s < strend) {
3568d838 1413 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1414 if (tmp && (norun || regtry(prog, s)))
1415 goto got_it;
1416 else
1417 tmp = doevery;
1418 }
a0d0e21e 1419 else
ffc61ed2
JH
1420 tmp = 1;
1421 s += UTF8SKIP(s);
a687059c 1422 }
a0d0e21e 1423 }
ffc61ed2
JH
1424 else {
1425 while (s < strend) {
1426 if (!isSPACE(*s)) {
1427 if (tmp && (norun || regtry(prog, s)))
1428 goto got_it;
1429 else
1430 tmp = doevery;
1431 }
a0ed51b3 1432 else
ffc61ed2
JH
1433 tmp = 1;
1434 s++;
a0ed51b3 1435 }
a0ed51b3
LW
1436 }
1437 break;
bbce6d69 1438 case NSPACEL:
3280af22 1439 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1440 if (do_utf8) {
1441 while (s < strend) {
1442 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1443 if (tmp && (norun || regtry(prog, s)))
1444 goto got_it;
1445 else
1446 tmp = doevery;
1447 }
bbce6d69 1448 else
ffc61ed2
JH
1449 tmp = 1;
1450 s += UTF8SKIP(s);
bbce6d69 1451 }
bbce6d69 1452 }
ffc61ed2
JH
1453 else {
1454 while (s < strend) {
1455 if (!isSPACE_LC(*s)) {
1456 if (tmp && (norun || regtry(prog, s)))
1457 goto got_it;
1458 else
1459 tmp = doevery;
1460 }
a0ed51b3 1461 else
ffc61ed2
JH
1462 tmp = 1;
1463 s++;
a0ed51b3 1464 }
a0ed51b3
LW
1465 }
1466 break;
a0d0e21e 1467 case DIGIT:
ffc61ed2 1468 if (do_utf8) {
8269fa76 1469 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1470 while (s < strend) {
3568d838 1471 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1472 if (tmp && (norun || regtry(prog, s)))
1473 goto got_it;
1474 else
1475 tmp = doevery;
1476 }
a0d0e21e 1477 else
ffc61ed2
JH
1478 tmp = 1;
1479 s += UTF8SKIP(s);
2b69d0c2 1480 }
a0d0e21e 1481 }
ffc61ed2
JH
1482 else {
1483 while (s < strend) {
1484 if (isDIGIT(*s)) {
1485 if (tmp && (norun || regtry(prog, s)))
1486 goto got_it;
1487 else
1488 tmp = doevery;
1489 }
a0ed51b3 1490 else
ffc61ed2
JH
1491 tmp = 1;
1492 s++;
a0ed51b3 1493 }
a0ed51b3
LW
1494 }
1495 break;
b8c5462f
JH
1496 case DIGITL:
1497 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1498 if (do_utf8) {
1499 while (s < strend) {
1500 if (isDIGIT_LC_utf8((U8*)s)) {
1501 if (tmp && (norun || regtry(prog, s)))
1502 goto got_it;
1503 else
1504 tmp = doevery;
1505 }
b8c5462f 1506 else
ffc61ed2
JH
1507 tmp = 1;
1508 s += UTF8SKIP(s);
b8c5462f 1509 }
b8c5462f 1510 }
ffc61ed2
JH
1511 else {
1512 while (s < strend) {
1513 if (isDIGIT_LC(*s)) {
1514 if (tmp && (norun || regtry(prog, s)))
1515 goto got_it;
1516 else
1517 tmp = doevery;
1518 }
b8c5462f 1519 else
ffc61ed2
JH
1520 tmp = 1;
1521 s++;
b8c5462f 1522 }
b8c5462f
JH
1523 }
1524 break;
a0d0e21e 1525 case NDIGIT:
ffc61ed2 1526 if (do_utf8) {
8269fa76 1527 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1528 while (s < strend) {
3568d838 1529 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1530 if (tmp && (norun || regtry(prog, s)))
1531 goto got_it;
1532 else
1533 tmp = doevery;
1534 }
a0d0e21e 1535 else
ffc61ed2
JH
1536 tmp = 1;
1537 s += UTF8SKIP(s);
a687059c 1538 }
a0d0e21e 1539 }
ffc61ed2
JH
1540 else {
1541 while (s < strend) {
1542 if (!isDIGIT(*s)) {
1543 if (tmp && (norun || regtry(prog, s)))
1544 goto got_it;
1545 else
1546 tmp = doevery;
1547 }
a0ed51b3 1548 else
ffc61ed2
JH
1549 tmp = 1;
1550 s++;
a0ed51b3 1551 }
a0ed51b3
LW
1552 }
1553 break;
b8c5462f
JH
1554 case NDIGITL:
1555 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1556 if (do_utf8) {
1557 while (s < strend) {
1558 if (!isDIGIT_LC_utf8((U8*)s)) {
1559 if (tmp && (norun || regtry(prog, s)))
1560 goto got_it;
1561 else
1562 tmp = doevery;
1563 }
b8c5462f 1564 else
ffc61ed2
JH
1565 tmp = 1;
1566 s += UTF8SKIP(s);
b8c5462f 1567 }
a0ed51b3 1568 }
ffc61ed2
JH
1569 else {
1570 while (s < strend) {
1571 if (!isDIGIT_LC(*s)) {
1572 if (tmp && (norun || regtry(prog, s)))
1573 goto got_it;
1574 else
1575 tmp = doevery;
1576 }
cf93c79d 1577 else
ffc61ed2
JH
1578 tmp = 1;
1579 s++;
b8c5462f 1580 }
b8c5462f
JH
1581 }
1582 break;
b3c9acc1 1583 default:
3c3eec57
GS
1584 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1585 break;
d6a28714 1586 }
6eb5f6b9
JH
1587 return 0;
1588 got_it:
1589 return s;
1590}
1591
1592/*
1593 - regexec_flags - match a regexp against a string
1594 */
1595I32
1596Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1597 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1598/* strend: pointer to null at end of string */
1599/* strbeg: real beginning of string */
1600/* minend: end of match must be >=minend after stringarg. */
1601/* data: May be used for some additional optimizations. */
1602/* nosave: For optimizations. */
1603{
6eb5f6b9
JH
1604 register char *s;
1605 register regnode *c;
1606 register char *startpos = stringarg;
6eb5f6b9
JH
1607 I32 minlen; /* must match at least this many chars */
1608 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1609 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1610 constant substr. */ /* CC */
1611 I32 end_shift = 0; /* Same for the end. */ /* CC */
1612 I32 scream_pos = -1; /* Internal iterator of scream. */
1613 char *scream_olds;
1614 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1615 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1616#ifdef DEBUGGING
9e55ce06
JH
1617 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
1618 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2a782b5b 1619#endif
a30b2f1f 1620 RX_MATCH_UTF8_set(prog,do_utf8);
6eb5f6b9
JH
1621
1622 PL_regcc = 0;
1623
1624 cache_re(prog);
1625#ifdef DEBUGGING
aea4f609 1626 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1627#endif
1628
1629 /* Be paranoid... */
1630 if (prog == NULL || startpos == NULL) {
1631 Perl_croak(aTHX_ "NULL regexp parameter");
1632 return 0;
1633 }
1634
1635 minlen = prog->minlen;
61a36c01 1636 if (strend - startpos < minlen) {
a72c7584
JH
1637 DEBUG_r(PerlIO_printf(Perl_debug_log,
1638 "String too short [regexec_flags]...\n"));
1639 goto phooey;
1aa99e6b 1640 }
6eb5f6b9 1641
6eb5f6b9
JH
1642 /* Check validity of program. */
1643 if (UCHARAT(prog->program) != REG_MAGIC) {
1644 Perl_croak(aTHX_ "corrupted regexp program");
1645 }
1646
1647 PL_reg_flags = 0;
1648 PL_reg_eval_set = 0;
1649 PL_reg_maxiter = 0;
1650
1651 if (prog->reganch & ROPT_UTF8)
1652 PL_reg_flags |= RF_utf8;
1653
1654 /* Mark beginning of line for ^ and lookbehind. */
1655 PL_regbol = startpos;
1656 PL_bostr = strbeg;
1657 PL_reg_sv = sv;
1658
1659 /* Mark end of line for $ (and such) */
1660 PL_regeol = strend;
1661
1662 /* see how far we have to get to not match where we matched before */
1663 PL_regtill = startpos+minend;
1664
1665 /* We start without call_cc context. */
1666 PL_reg_call_cc = 0;
1667
1668 /* If there is a "must appear" string, look for it. */
1669 s = startpos;
1670
1671 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1672 MAGIC *mg;
1673
1674 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1675 PL_reg_ganch = startpos;
1676 else if (sv && SvTYPE(sv) >= SVt_PVMG
1677 && SvMAGIC(sv)
14befaf4
DM
1678 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1679 && mg->mg_len >= 0) {
6eb5f6b9
JH
1680 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1681 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1682 if (s > PL_reg_ganch)
6eb5f6b9
JH
1683 goto phooey;
1684 s = PL_reg_ganch;
1685 }
1686 }
1687 else /* pos() not defined */
1688 PL_reg_ganch = strbeg;
1689 }
1690
33b8afdf 1691 if (!(flags & REXEC_CHECKED) && (prog->check_substr != Nullsv || prog->check_utf8 != Nullsv)) {
6eb5f6b9
JH
1692 re_scream_pos_data d;
1693
1694 d.scream_olds = &scream_olds;
1695 d.scream_pos = &scream_pos;
1696 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1697 if (!s) {
1698 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1699 goto phooey; /* not present */
3fa9c3d7 1700 }
6eb5f6b9
JH
1701 }
1702
2a782b5b 1703 DEBUG_r({
9e55ce06
JH
1704 char *s0 = UTF ?
1705 pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
c728cb41 1706 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1707 prog->precomp;
1708 int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
1709 char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
c728cb41 1710 UNI_DISPLAY_REGEX) : startpos;
9e55ce06 1711 int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
2a782b5b
JH
1712 if (!PL_colorset)
1713 reginitcolors();
1714 PerlIO_printf(Perl_debug_log,
9e55ce06 1715 "%sMatching REx%s `%s%*.*s%s%s' against `%s%.*s%s%s'\n",
2a782b5b 1716 PL_colors[4],PL_colors[5],PL_colors[0],
9e55ce06 1717 len0, len0, s0,
2a782b5b 1718 PL_colors[1],
9e55ce06 1719 len0 > 60 ? "..." : "",
2a782b5b 1720 PL_colors[0],
9e55ce06
JH
1721 (int)(len1 > 60 ? 60 : len1),
1722 s1, PL_colors[1],
1723 (len1 > 60 ? "..." : "")
2a782b5b
JH
1724 );
1725 });
6eb5f6b9
JH
1726
1727 /* Simplest case: anchored match need be tried only once. */
1728 /* [unless only anchor is BOL and multiline is set] */
1729 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1730 if (s == startpos && regtry(prog, startpos))
1731 goto got_it;
1732 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1733 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1734 {
1735 char *end;
1736
1737 if (minlen)
1738 dontbother = minlen - 1;
1aa99e6b 1739 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9 1740 /* for multiline we only have to try after newlines */
33b8afdf 1741 if (prog->check_substr || prog->check_utf8) {
6eb5f6b9
JH
1742 if (s == startpos)
1743 goto after_try;
1744 while (1) {
1745 if (regtry(prog, s))
1746 goto got_it;
1747 after_try:
1748 if (s >= end)
1749 goto phooey;
1750 if (prog->reganch & RE_USE_INTUIT) {
1751 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1752 if (!s)
1753 goto phooey;
1754 }
1755 else
1756 s++;
1757 }
1758 } else {
1759 if (s > startpos)
1760 s--;
1761 while (s < end) {
1762 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1763 if (regtry(prog, s))
1764 goto got_it;
1765 }
1766 }
1767 }
1768 }
1769 goto phooey;
1770 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1771 if (regtry(prog, PL_reg_ganch))
1772 goto got_it;
1773 goto phooey;
1774 }
1775
1776 /* Messy cases: unanchored match. */
33b8afdf 1777 if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1778 /* we have /x+whatever/ */
1779 /* it must be a one character string (XXXX Except UTF?) */
33b8afdf 1780 char ch;
bf93d4cc
GS
1781#ifdef DEBUGGING
1782 int did_match = 0;
1783#endif
33b8afdf
JH
1784 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1785 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1786 ch = SvPVX(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
bf93d4cc 1787
1aa99e6b 1788 if (do_utf8) {
6eb5f6b9
JH
1789 while (s < strend) {
1790 if (*s == ch) {
bf93d4cc 1791 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1792 if (regtry(prog, s)) goto got_it;
1793 s += UTF8SKIP(s);
1794 while (s < strend && *s == ch)
1795 s += UTF8SKIP(s);
1796 }
1797 s += UTF8SKIP(s);
1798 }
1799 }
1800 else {
1801 while (s < strend) {
1802 if (*s == ch) {
bf93d4cc 1803 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1804 if (regtry(prog, s)) goto got_it;
1805 s++;
1806 while (s < strend && *s == ch)
1807 s++;
1808 }
1809 s++;
1810 }
1811 }
b7953727 1812 DEBUG_r(if (!did_match)
bf93d4cc 1813 PerlIO_printf(Perl_debug_log,
b7953727
JH
1814 "Did not find anchored character...\n")
1815 );
6eb5f6b9
JH
1816 }
1817 /*SUPPRESS 560*/
33b8afdf
JH
1818 else if (prog->anchored_substr != Nullsv
1819 || prog->anchored_utf8 != Nullsv
1820 || ((prog->float_substr != Nullsv || prog->float_utf8 != Nullsv)
1821 && prog->float_max_offset < strend - s)) {
1822 SV *must;
1823 I32 back_max;
1824 I32 back_min;
1825 char *last;
6eb5f6b9 1826 char *last1; /* Last position checked before */
bf93d4cc
GS
1827#ifdef DEBUGGING
1828 int did_match = 0;
1829#endif
33b8afdf
JH
1830 if (prog->anchored_substr || prog->anchored_utf8) {
1831 if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1832 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1833 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1834 back_max = back_min = prog->anchored_offset;
1835 } else {
1836 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1837 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1838 must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1839 back_max = prog->float_max_offset;
1840 back_min = prog->float_min_offset;
1841 }
1842 if (must == &PL_sv_undef)
1843 /* could not downgrade utf8 check substring, so must fail */
1844 goto phooey;
1845
1846 last = HOP3c(strend, /* Cannot start after this */
1847 -(I32)(CHR_SVLEN(must)
1848 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9
JH
1849
1850 if (s > PL_bostr)
1851 last1 = HOPc(s, -1);
1852 else
1853 last1 = s - 1; /* bogus */
1854
1855 /* XXXX check_substr already used to find `s', can optimize if
1856 check_substr==must. */
1857 scream_pos = -1;
1858 dontbother = end_shift;
1859 strend = HOPc(strend, -dontbother);
1860 while ( (s <= last) &&
9041c2e3 1861 ((flags & REXEC_SCREAM)
1aa99e6b 1862 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1863 end_shift, &scream_pos, 0))
1aa99e6b 1864 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1865 (unsigned char*)strend, must,
6eb5f6b9 1866 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
4addbd3b
HS
1867 /* we may be pointing at the wrong string */
1868 if ((flags & REXEC_SCREAM) && RX_MATCH_COPIED(prog))
1869 s = prog->subbeg + (s - SvPVX(sv));
bf93d4cc 1870 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1871 if (HOPc(s, -back_max) > last1) {
1872 last1 = HOPc(s, -back_min);
1873 s = HOPc(s, -back_max);
1874 }
1875 else {
1876 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1877
1878 last1 = HOPc(s, -back_min);
1879 s = t;
1880 }
1aa99e6b 1881 if (do_utf8) {
6eb5f6b9
JH
1882 while (s <= last1) {
1883 if (regtry(prog, s))
1884 goto got_it;
1885 s += UTF8SKIP(s);
1886 }
1887 }
1888 else {
1889 while (s <= last1) {
1890 if (regtry(prog, s))
1891 goto got_it;
1892 s++;
1893 }
1894 }
1895 }
b7953727
JH
1896 DEBUG_r(if (!did_match)
1897 PerlIO_printf(Perl_debug_log,
1898 "Did not find %s substr `%s%.*s%s'%s...\n",
33b8afdf 1899 ((must == prog->anchored_substr || must == prog->anchored_utf8)
bf93d4cc
GS
1900 ? "anchored" : "floating"),
1901 PL_colors[0],
1902 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1903 SvPVX(must),
b7953727
JH
1904 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1905 );
6eb5f6b9
JH
1906 goto phooey;
1907 }
155aba94 1908 else if ((c = prog->regstclass)) {
f14c76ed
RGS
1909 if (minlen) {
1910 I32 op = (U8)OP(prog->regstclass);
66e933ab 1911 /* don't bother with what can't match */
f14c76ed
RGS
1912 if (PL_regkind[op] != EXACT && op != CANY)
1913 strend = HOPc(strend, -(minlen - 1));
1914 }
ffc61ed2
JH
1915 DEBUG_r({
1916 SV *prop = sv_newmortal();
9e55ce06
JH
1917 char *s0;
1918 char *s1;
1919 int len0;
1920 int len1;
1921
ffc61ed2 1922 regprop(prop, c);
9e55ce06
JH
1923 s0 = UTF ?
1924 pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
c728cb41 1925 UNI_DISPLAY_REGEX) :
9e55ce06
JH
1926 SvPVX(prop);
1927 len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
1928 s1 = UTF ?
c728cb41 1929 sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
9e55ce06
JH
1930 len1 = UTF ? SvCUR(dsv1) : strend - s;
1931 PerlIO_printf(Perl_debug_log,
1932 "Matching stclass `%*.*s' against `%*.*s'\n",
1933 len0, len0, s0,
1934 len1, len1, s1);
ffc61ed2 1935 });
6eb5f6b9
JH
1936 if (find_byclass(prog, c, s, strend, startpos, 0))
1937 goto got_it;
bf93d4cc 1938 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1939 }
1940 else {
1941 dontbother = 0;
33b8afdf
JH
1942 if (prog->float_substr != Nullsv || prog->float_utf8 != Nullsv) {
1943 /* Trim the end. */
d6a28714 1944 char *last;
33b8afdf
JH
1945 SV* float_real;
1946
1947 if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1948 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1949 float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
d6a28714
JH
1950
1951 if (flags & REXEC_SCREAM) {
33b8afdf 1952 last = screaminstr(sv, float_real, s - strbeg,
d6a28714
JH
1953 end_shift, &scream_pos, 1); /* last one */
1954 if (!last)
ffc61ed2 1955 last = scream_olds; /* Only one occurrence. */
4addbd3b
HS
1956 /* we may be pointing at the wrong string */
1957 else if (RX_MATCH_COPIED(prog))
1958 s = prog->subbeg + (s - SvPVX(sv));
b8c5462f 1959 }
d6a28714
JH
1960 else {
1961 STRLEN len;
33b8afdf 1962 char *little = SvPV(float_real, len);
d6a28714 1963
33b8afdf 1964 if (SvTAIL(float_real)) {
d6a28714
JH
1965 if (memEQ(strend - len + 1, little, len - 1))
1966 last = strend - len + 1;
1967 else if (!PL_multiline)
9041c2e3 1968 last = memEQ(strend - len, little, len)
d6a28714 1969 ? strend - len : Nullch;
b8c5462f 1970 else
d6a28714
JH
1971 goto find_last;
1972 } else {
1973 find_last:
9041c2e3 1974 if (len)
d6a28714 1975 last = rninstr(s, strend, little, little + len);
b8c5462f 1976 else
d6a28714 1977 last = strend; /* matching `$' */
b8c5462f 1978 }
b8c5462f 1979 }
bf93d4cc
GS
1980 if (last == NULL) {
1981 DEBUG_r(PerlIO_printf(Perl_debug_log,
1982 "%sCan't trim the tail, match fails (should not happen)%s\n",
1983 PL_colors[4],PL_colors[5]));
1984 goto phooey; /* Should not happen! */
1985 }
d6a28714
JH
1986 dontbother = strend - last + prog->float_min_offset;
1987 }
1988 if (minlen && (dontbother < minlen))
1989 dontbother = minlen - 1;
1990 strend -= dontbother; /* this one's always in bytes! */
1991 /* We don't know much -- general case. */
1aa99e6b 1992 if (do_utf8) {
d6a28714
JH
1993 for (;;) {
1994 if (regtry(prog, s))
1995 goto got_it;
1996 if (s >= strend)
1997 break;
b8c5462f 1998 s += UTF8SKIP(s);
d6a28714
JH
1999 };
2000 }
2001 else {
2002 do {
2003 if (regtry(prog, s))
2004 goto got_it;
2005 } while (s++ < strend);
2006 }
2007 }
2008
2009 /* Failure. */
2010 goto phooey;
2011
2012got_it:
2013 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2014
2015 if (PL_reg_eval_set) {
2016 /* Preserve the current value of $^R */
2017 if (oreplsv != GvSV(PL_replgv))
2018 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
2019 restored, the value remains
2020 the same. */
acfe0abc 2021 restore_pos(aTHX_ 0);
d6a28714
JH
2022 }
2023
2024 /* make sure $`, $&, $', and $digit will work later */
2025 if ( !(flags & REXEC_NOT_FIRST) ) {
2026 if (RX_MATCH_COPIED(prog)) {
2027 Safefree(prog->subbeg);
2028 RX_MATCH_COPIED_off(prog);
2029 }
2030 if (flags & REXEC_COPY_STR) {
2031 I32 i = PL_regeol - startpos + (stringarg - strbeg);
2032
2033 s = savepvn(strbeg, i);
2034 prog->subbeg = s;
2035 prog->sublen = i;
2036 RX_MATCH_COPIED_on(prog);
2037 }
2038 else {
2039 prog->subbeg = strbeg;
2040 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2041 }
2042 }
9041c2e3 2043
d6a28714
JH
2044 return 1;
2045
2046phooey:
bf93d4cc
GS
2047 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2048 PL_colors[4],PL_colors[5]));
d6a28714 2049 if (PL_reg_eval_set)
acfe0abc 2050 restore_pos(aTHX_ 0);
d6a28714
JH
2051 return 0;
2052}
2053
2054/*
2055 - regtry - try match at specific point
2056 */
2057STATIC I32 /* 0 failure, 1 success */
2058S_regtry(pTHX_ regexp *prog, char *startpos)
2059{
d6a28714
JH
2060 register I32 i;
2061 register I32 *sp;
2062 register I32 *ep;
2063 CHECKPOINT lastcp;
2064
02db2b7b
IZ
2065#ifdef DEBUGGING
2066 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
2067#endif
d6a28714
JH
2068 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
2069 MAGIC *mg;
2070
2071 PL_reg_eval_set = RS_init;
2072 DEBUG_r(DEBUG_s(
b900a521
JH
2073 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
2074 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 2075 ));
e8347627 2076 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
2077 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2078 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
2079 SAVETMPS;
2080 /* Apparently this is not needed, judging by wantarray. */
e8347627 2081 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
2082 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2083
2084 if (PL_reg_sv) {
2085 /* Make $_ available to executed code. */
2086 if (PL_reg_sv != DEFSV) {
4d1ff10f 2087 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
2088 SAVESPTR(DEFSV);
2089 DEFSV = PL_reg_sv;
b8c5462f 2090 }
d6a28714 2091
9041c2e3 2092 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 2093 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 2094 /* prepare for quick setting of pos */
14befaf4
DM
2095 sv_magic(PL_reg_sv, (SV*)0,
2096 PERL_MAGIC_regex_global, Nullch, 0);
2097 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 2098 mg->mg_len = -1;
b8c5462f 2099 }
d6a28714
JH
2100 PL_reg_magic = mg;
2101 PL_reg_oldpos = mg->mg_len;
c76ac1ee 2102 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 2103 }
09687e5a 2104 if (!PL_reg_curpm) {
0f79a09d 2105 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
2106#ifdef USE_ITHREADS
2107 {
2108 SV* repointer = newSViv(0);
577e12cc 2109 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 2110 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
2111 av_push(PL_regex_padav,repointer);
2112 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2113 PL_regex_pad = AvARRAY(PL_regex_padav);
2114 }
2115#endif
2116 }
aaa362c4 2117 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
2118 PL_reg_oldcurpm = PL_curpm;
2119 PL_curpm = PL_reg_curpm;
2120 if (RX_MATCH_COPIED(prog)) {
2121 /* Here is a serious problem: we cannot rewrite subbeg,
2122 since it may be needed if this match fails. Thus
2123 $` inside (?{}) could fail... */
2124 PL_reg_oldsaved = prog->subbeg;
2125 PL_reg_oldsavedlen = prog->sublen;
2126 RX_MATCH_COPIED_off(prog);
2127 }
2128 else
2129 PL_reg_oldsaved = Nullch;
2130 prog->subbeg = PL_bostr;
2131 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2132 }
2133 prog->startp[0] = startpos - PL_bostr;
2134 PL_reginput = startpos;
2135 PL_regstartp = prog->startp;
2136 PL_regendp = prog->endp;
2137 PL_reglastparen = &prog->lastparen;
a01268b5 2138 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714 2139 prog->lastparen = 0;
03994de8 2140 prog->lastcloseparen = 0;
d6a28714
JH
2141 PL_regsize = 0;
2142 DEBUG_r(PL_reg_starttry = startpos);
2143 if (PL_reg_start_tmpl <= prog->nparens) {
2144 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2145 if(PL_reg_start_tmp)
2146 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2147 else
2148 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2149 }
2150
2151 /* XXXX What this code is doing here?!!! There should be no need
2152 to do this again and again, PL_reglastparen should take care of
3dd2943c 2153 this! --ilya*/
dafc8851
JH
2154
2155 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2156 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
2157 * PL_reglastparen), is not needed at all by the test suite
2158 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2159 * enough, for building DynaLoader, or otherwise this
2160 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2161 * will happen. Meanwhile, this code *is* needed for the
2162 * above-mentioned test suite tests to succeed. The common theme
2163 * on those tests seems to be returning null fields from matches.
2164 * --jhi */
dafc8851 2165#if 1
d6a28714
JH
2166 sp = prog->startp;
2167 ep = prog->endp;
2168 if (prog->nparens) {
eb160463 2169 for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
d6a28714
JH
2170 *++sp = -1;
2171 *++ep = -1;
2172 }
2173 }
dafc8851 2174#endif
02db2b7b 2175 REGCP_SET(lastcp);
d6a28714
JH
2176 if (regmatch(prog->program + 1)) {
2177 prog->endp[0] = PL_reginput - PL_bostr;
2178 return 1;
2179 }
02db2b7b 2180 REGCP_UNWIND(lastcp);
d6a28714
JH
2181 return 0;
2182}
2183
02db2b7b
IZ
2184#define RE_UNWIND_BRANCH 1
2185#define RE_UNWIND_BRANCHJ 2
2186
2187union re_unwind_t;
2188
2189typedef struct { /* XX: makes sense to enlarge it... */
2190 I32 type;
2191 I32 prev;
2192 CHECKPOINT lastcp;
2193} re_unwind_generic_t;
2194
2195typedef struct {
2196 I32 type;
2197 I32 prev;
2198 CHECKPOINT lastcp;
2199 I32 lastparen;
2200 regnode *next;
2201 char *locinput;
2202 I32 nextchr;
2203#ifdef DEBUGGING
2204 int regindent;
2205#endif
2206} re_unwind_branch_t;
2207
2208typedef union re_unwind_t {
2209 I32 type;
2210 re_unwind_generic_t generic;
2211 re_unwind_branch_t branch;
2212} re_unwind_t;
2213
8ba1375e
MJD
2214#define sayYES goto yes
2215#define sayNO goto no
e0f9d4a8 2216#define sayNO_ANYOF goto no_anyof
8ba1375e
MJD
2217#define sayYES_FINAL goto yes_final
2218#define sayYES_LOUD goto yes_loud
2219#define sayNO_FINAL goto no_final
2220#define sayNO_SILENT goto do_no
2221#define saySAME(x) if (x) goto yes; else goto no
2222
2223#define REPORT_CODE_OFF 24
2224
d6a28714
JH
2225/*
2226 - regmatch - main matching routine
2227 *
2228 * Conceptually the strategy is simple: check to see whether the current
2229 * node matches, call self recursively to see whether the rest matches,
2230 * and then act accordingly. In practice we make some effort to avoid
2231 * recursion, in particular by going through "ordinary" nodes (that don't
2232 * need to know whether the rest of the match failed) by a loop instead of
2233 * by recursion.
2234 */
2235/* [lwall] I've hoisted the register declarations to the outer block in order to
2236 * maybe save a little bit of pushing and popping on the stack. It also takes
2237 * advantage of machines that use a register save mask on subroutine entry.
2238 */
2239STATIC I32 /* 0 failure, 1 success */
2240S_regmatch(pTHX_ regnode *prog)
2241{
d6a28714
JH
2242 register regnode *scan; /* Current node. */
2243 regnode *next; /* Next node. */
2244 regnode *inner; /* Next node in internal branch. */
2245 register I32 nextchr; /* renamed nextchr - nextchar colides with
2246 function of same name */
2247 register I32 n; /* no or next */
b7953727
JH
2248 register I32 ln = 0; /* len or last */
2249 register char *s = Nullch; /* operand or save */
d6a28714 2250 register char *locinput = PL_reginput;
b7953727 2251 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2252 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2253 I32 unwind = 0;
b7953727 2254#if 0
02db2b7b 2255 I32 firstcp = PL_savestack_ix;
b7953727 2256#endif
53c4c00c 2257 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2258#ifdef DEBUGGING
ce333219
JH
2259 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2260 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2261 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2262#endif
02db2b7b 2263
d6a28714
JH
2264#ifdef DEBUGGING
2265 PL_regindent++;
2266#endif
2267
2268 /* Note that nextchr is a byte even in UTF */
2269 nextchr = UCHARAT(locinput);
2270 scan = prog;
2271 while (scan != NULL) {
8ba1375e 2272
2a782b5b 2273 DEBUG_r( {
d6a28714
JH
2274 SV *prop = sv_newmortal();
2275 int docolor = *PL_colors[0];
2276 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2277 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2278 /* The part of the string before starttry has one color
2279 (pref0_len chars), between starttry and current
2280 position another one (pref_len - pref0_len chars),
2281 after the current position the third one.
2282 We assume that pref0_len <= pref_len, otherwise we
2283 decrease pref0_len. */
9041c2e3 2284 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2285 ? (5 + taill) - l : locinput - PL_bostr;
2286 int pref0_len;
d6a28714 2287
df1ffd02 2288 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2289 pref_len++;
2290 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2291 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2292 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2293 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2294 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2295 l--;
d6a28714
JH
2296 if (pref0_len < 0)
2297 pref0_len = 0;
2298 if (pref0_len > pref_len)
2299 pref0_len = pref_len;
2300 regprop(prop, scan);
2a782b5b
JH
2301 {
2302 char *s0 =
f14c76ed 2303 do_utf8 && OP(scan) != CANY ?
2a782b5b 2304 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
c728cb41 2305 pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2306 locinput - pref_len;
df1ffd02 2307 int len0 = do_utf8 ? strlen(s0) : pref0_len;
f14c76ed 2308 char *s1 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2309 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
c728cb41 2310 pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2311 locinput - pref_len + pref0_len;
df1ffd02 2312 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
f14c76ed 2313 char *s2 = do_utf8 && OP(scan) != CANY ?
2a782b5b 2314 pv_uni_display(dsv2, (U8*)locinput,
c728cb41 2315 PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
2a782b5b 2316 locinput;
df1ffd02 2317 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2318 PerlIO_printf(Perl_debug_log,
2319 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2320 (IV)(locinput - PL_bostr),
2321 PL_colors[4],
2322 len0, s0,
2323 PL_colors[5],
2324 PL_colors[2],
2325 len1, s1,
2326 PL_colors[3],
2327 (docolor ? "" : "> <"),
2328 PL_colors[0],
2329 len2, s2,
2330 PL_colors[1],
2331 15 - l - pref_len + 1,
2332 "",
2333 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2334 SvPVX(prop));
2335 }
2336 });
d6a28714
JH
2337
2338 next = scan + NEXT_OFF(scan);
2339 if (next == scan)
2340 next = NULL;
2341
2342 switch (OP(scan)) {
2343 case BOL:
12d33761
HS
2344 if (locinput == PL_bostr || (PL_multiline &&
2345 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2346 {
2347 /* regtill = regbol; */
b8c5462f
JH
2348 break;
2349 }
d6a28714
JH
2350 sayNO;
2351 case MBOL:
12d33761
HS
2352 if (locinput == PL_bostr ||
2353 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2354 {
b8c5462f
JH
2355 break;
2356 }
d6a28714
JH
2357 sayNO;
2358 case SBOL:
c2a73568 2359 if (locinput == PL_bostr)
b8c5462f 2360 break;
d6a28714
JH
2361 sayNO;
2362 case GPOS:
2363 if (locinput == PL_reg_ganch)
2364 break;
2365 sayNO;
2366 case EOL:
2367 if (PL_multiline)
2368 goto meol;
2369 else
2370 goto seol;
2371 case MEOL:
2372 meol:
2373 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2374 sayNO;
b8c5462f 2375 break;
d6a28714
JH
2376 case SEOL:
2377 seol:
2378 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2379 sayNO;
d6a28714 2380 if (PL_regeol - locinput > 1)
b8c5462f 2381 sayNO;
b8c5462f 2382 break;
d6a28714
JH
2383 case EOS:
2384 if (PL_regeol != locinput)
b8c5462f 2385 sayNO;
d6a28714 2386 break;
ffc61ed2 2387 case SANY:
d6a28714 2388 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2389 sayNO;
f33976b4
DB
2390 if (do_utf8) {
2391 locinput += PL_utf8skip[nextchr];
2392 if (locinput > PL_regeol)
2393 sayNO;
2394 nextchr = UCHARAT(locinput);
2395 }
2396 else
2397 nextchr = UCHARAT(++locinput);
2398 break;
2399 case CANY:
2400 if (!nextchr && locinput >= PL_regeol)
2401 sayNO;
b8c5462f 2402 nextchr = UCHARAT(++locinput);
a0d0e21e 2403 break;
ffc61ed2 2404 case REG_ANY:
1aa99e6b
IH
2405 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2406 sayNO;
2407 if (do_utf8) {
b8c5462f 2408 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2409 if (locinput > PL_regeol)
2410 sayNO;
a0ed51b3 2411 nextchr = UCHARAT(locinput);
a0ed51b3 2412 }
1aa99e6b
IH
2413 else
2414 nextchr = UCHARAT(++locinput);
a0ed51b3 2415 break;
d6a28714 2416 case EXACT:
cd439c50
IZ
2417 s = STRING(scan);
2418 ln = STR_LEN(scan);
eb160463 2419 if (do_utf8 != UTF) {
bc517b45 2420 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2421 char *l = locinput;
2422 char *e = s + ln;
bc517b45 2423 STRLEN ulen;
a72c7584 2424
5ff6fc6d
JH
2425 if (do_utf8) {
2426 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2427 while (s < e) {
2428 if (l >= PL_regeol)
5ff6fc6d
JH
2429 sayNO;
2430 if (NATIVE_TO_UNI(*(U8*)s) !=
872c91ae
JH
2431 utf8n_to_uvuni((U8*)l, UTF8_MAXLEN, &ulen,
2432 ckWARN(WARN_UTF8) ?
2433 0 : UTF8_ALLOW_ANY))
5ff6fc6d 2434 sayNO;
bc517b45 2435 l += ulen;
5ff6fc6d 2436 s ++;
1aa99e6b 2437 }
5ff6fc6d
JH
2438 }
2439 else {
2440 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2441 while (s < e) {
2442 if (l >= PL_regeol)
2443 sayNO;
5ff6fc6d 2444 if (NATIVE_TO_UNI(*((U8*)l)) !=
872c91ae
JH
2445 utf8n_to_uvuni((U8*)s, UTF8_MAXLEN, &ulen,
2446 ckWARN(WARN_UTF8) ?
2447 0 : UTF8_ALLOW_ANY))
1aa99e6b 2448 sayNO;
bc517b45 2449 s += ulen;
a72c7584 2450 l ++;
1aa99e6b 2451 }
5ff6fc6d 2452 }
1aa99e6b
IH
2453 locinput = l;
2454 nextchr = UCHARAT(locinput);
2455 break;
2456 }
bc517b45 2457 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2458 /* Inline the first character, for speed. */
2459 if (UCHARAT(s) != nextchr)
2460 sayNO;
2461 if (PL_regeol - locinput < ln)
2462 sayNO;
2463 if (ln > 1 && memNE(s, locinput, ln))
2464 sayNO;
2465 locinput += ln;
2466 nextchr = UCHARAT(locinput);
2467 break;
2468 case EXACTFL:
b8c5462f
JH
2469 PL_reg_flags |= RF_tainted;
2470 /* FALL THROUGH */
d6a28714 2471 case EXACTF:
cd439c50
IZ
2472 s = STRING(scan);
2473 ln = STR_LEN(scan);
d6a28714 2474
d07ddd77
JH
2475 if (do_utf8 || UTF) {
2476 /* Either target or the pattern are utf8. */
d6a28714 2477 char *l = locinput;
d07ddd77 2478 char *e = PL_regeol;
bc517b45 2479
eb160463 2480 if (ibcmp_utf8(s, 0, ln, (bool)UTF,
1feea2c7 2481 l, &e, 0, do_utf8)) {
5486206c
JH
2482 /* One more case for the sharp s:
2483 * pack("U0U*", 0xDF) =~ /ss/i,
2484 * the 0xC3 0x9F are the UTF-8
2485 * byte sequence for the U+00DF. */
2486 if (!(do_utf8 &&
2487 toLOWER(s[0]) == 's' &&
2488 ln >= 2 &&
2489 toLOWER(s[1]) == 's' &&
2490 (U8)l[0] == 0xC3 &&
2491 e - l >= 2 &&
2492 (U8)l[1] == 0x9F))
2493 sayNO;
2494 }
d07ddd77
JH
2495 locinput = e;
2496 nextchr = UCHARAT(locinput);
2497 break;
a0ed51b3 2498 }
d6a28714 2499
bc517b45
JH
2500 /* Neither the target and the pattern are utf8. */
2501
d6a28714
JH
2502 /* Inline the first character, for speed. */
2503 if (UCHARAT(s) != nextchr &&
2504 UCHARAT(s) != ((OP(scan) == EXACTF)
2505 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2506 sayNO;
d6a28714 2507 if (PL_regeol - locinput < ln)
b8c5462f 2508 sayNO;
d6a28714
JH
2509 if (ln > 1 && (OP(scan) == EXACTF
2510 ? ibcmp(s, locinput, ln)
2511 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2512 sayNO;
d6a28714
JH
2513 locinput += ln;
2514 nextchr = UCHARAT(locinput);
a0d0e21e 2515 break;
d6a28714 2516 case ANYOF:
ffc61ed2 2517 if (do_utf8) {
9e55ce06
JH
2518 STRLEN inclasslen = PL_regeol - locinput;
2519
ba7b4546 2520 if (!reginclass(scan, (U8*)locinput, &inclasslen, do_utf8))
e0f9d4a8 2521 sayNO_ANYOF;
ffc61ed2
JH
2522 if (locinput >= PL_regeol)
2523 sayNO;
0f0076b4 2524 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
b8c5462f 2525 nextchr = UCHARAT(locinput);
e0f9d4a8 2526 break;
ffc61ed2
JH
2527 }
2528 else {
2529 if (nextchr < 0)
2530 nextchr = UCHARAT(locinput);
7d3e948e 2531 if (!REGINCLASS(scan, (U8*)locinput))
e0f9d4a8 2532 sayNO_ANYOF;
ffc61ed2
JH
2533 if (!nextchr && locinput >= PL_regeol)
2534 sayNO;
2535 nextchr = UCHARAT(++locinput);
e0f9d4a8
JH
2536 break;
2537 }
2538 no_anyof:
2539 /* If we might have the case of the German sharp s
2540 * in a casefolding Unicode character class. */
2541
ebc501f0
JH
2542 if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
2543 locinput += SHARP_S_SKIP;
e0f9d4a8 2544 nextchr = UCHARAT(locinput);
ffc61ed2 2545 }
e0f9d4a8
JH
2546 else
2547 sayNO;
b8c5462f 2548 break;
d6a28714 2549 case ALNUML:
b8c5462f
JH
2550 PL_reg_flags |= RF_tainted;
2551 /* FALL THROUGH */
d6a28714 2552 case ALNUM:
b8c5462f 2553 if (!nextchr)
4633a7c4 2554 sayNO;
ffc61ed2 2555 if (do_utf8) {
ad24be35 2556 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2557 if (!(OP(scan) == ALNUM
3568d838 2558 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2559 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2560 {
2561 sayNO;
a0ed51b3 2562 }
b8c5462f 2563 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2564 nextchr = UCHARAT(locinput);
2565 break;
2566 }
ffc61ed2 2567 if (!(OP(scan) == ALNUM
d6a28714 2568 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2569 sayNO;
b8c5462f 2570 nextchr = UCHARAT(++locinput);
a0d0e21e 2571 break;
d6a28714 2572 case NALNUML:
b8c5462f
JH
2573 PL_reg_flags |= RF_tainted;
2574 /* FALL THROUGH */
d6a28714
JH
2575 case NALNUM:
2576 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2577 sayNO;
ffc61ed2 2578 if (do_utf8) {
8269fa76 2579 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2580 if (OP(scan) == NALNUM
3568d838 2581 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2582 : isALNUM_LC_utf8((U8*)locinput))
2583 {
b8c5462f 2584 sayNO;
d6a28714 2585 }
b8c5462f
JH
2586 locinput += PL_utf8skip[nextchr];
2587 nextchr = UCHARAT(locinput);
2588 break;
2589 }
ffc61ed2 2590 if (OP(scan) == NALNUM
d6a28714 2591 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2592 sayNO;
76e3520e 2593 nextchr = UCHARAT(++locinput);
a0d0e21e 2594 break;
d6a28714
JH
2595 case BOUNDL:
2596 case NBOUNDL:
3280af22 2597 PL_reg_flags |= RF_tainted;
bbce6d69 2598 /* FALL THROUGH */
d6a28714
JH
2599 case BOUND:
2600 case NBOUND:
2601 /* was last char in word? */
ffc61ed2 2602 if (do_utf8) {
12d33761
HS
2603 if (locinput == PL_bostr)
2604 ln = '\n';
ffc61ed2 2605 else {
b4f7163a 2606 U8 *r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
9041c2e3 2607
b4f7163a 2608 ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, 0);
ffc61ed2
JH
2609 }
2610 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2611 ln = isALNUM_uni(ln);
8269fa76 2612 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2613 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2614 }
2615 else {
9041c2e3 2616 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2617 n = isALNUM_LC_utf8((U8*)locinput);
2618 }
a0ed51b3 2619 }
d6a28714 2620 else {
12d33761
HS
2621 ln = (locinput != PL_bostr) ?
2622 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2623 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2624 ln = isALNUM(ln);
2625 n = isALNUM(nextchr);
2626 }
2627 else {
2628 ln = isALNUM_LC(ln);
2629 n = isALNUM_LC(nextchr);
2630 }
d6a28714 2631 }
ffc61ed2
JH
2632 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2633 OP(scan) == BOUNDL))
2634 sayNO;
a0ed51b3 2635 break;
d6a28714 2636 case SPACEL:
3280af22 2637 PL_reg_flags |= RF_tainted;
bbce6d69 2638 /* FALL THROUGH */
d6a28714 2639 case SPACE:
9442cb0e 2640 if (!nextchr)
4633a7c4 2641 sayNO;
1aa99e6b 2642 if (do_utf8) {
fd400ab9 2643 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2644 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2645 if (!(OP(scan) == SPACE
3568d838 2646 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2647 : isSPACE_LC_utf8((U8*)locinput)))
2648 {
2649 sayNO;
2650 }
2651 locinput += PL_utf8skip[nextchr];
2652 nextchr = UCHARAT(locinput);
2653 break;
d6a28714 2654 }
ffc61ed2
JH
2655 if (!(OP(scan) == SPACE
2656 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2657 sayNO;
2658 nextchr = UCHARAT(++locinput);
2659 }
2660 else {
2661 if (!(OP(scan) == SPACE
2662 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2663 sayNO;
2664 nextchr = UCHARAT(++locinput);
a0ed51b3 2665 }
a0ed51b3 2666 break;
d6a28714 2667 case NSPACEL:
3280af22 2668 PL_reg_flags |= RF_tainted;
bbce6d69 2669 /* FALL THROUGH */
d6a28714 2670 case NSPACE:
9442cb0e 2671 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2672 sayNO;
1aa99e6b 2673 if (do_utf8) {
8269fa76 2674 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2675 if (OP(scan) == NSPACE
3568d838 2676 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2677 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2678 {
2679 sayNO;
2680 }
2681 locinput += PL_utf8skip[nextchr];
2682 nextchr = UCHARAT(locinput);
2683 break;
a0ed51b3 2684 }
ffc61ed2 2685 if (OP(scan) == NSPACE
d6a28714 2686 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2687 sayNO;
76e3520e 2688 nextchr = UCHARAT(++locinput);
a0d0e21e 2689 break;
d6a28714 2690 case DIGITL:
a0ed51b3
LW
2691 PL_reg_flags |= RF_tainted;
2692 /* FALL THROUGH */
d6a28714 2693 case DIGIT:
9442cb0e 2694 if (!nextchr)
a0ed51b3 2695 sayNO;
1aa99e6b 2696 if (do_utf8) {
8269fa76 2697 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2698 if (!(OP(scan) == DIGIT
3568d838 2699 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2700 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2701 {
a0ed51b3 2702 sayNO;
dfe13c55 2703 }
6f06b55f 2704 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2705 nextchr = UCHARAT(locinput);
2706 break;
2707 }
ffc61ed2 2708 if (!(OP(scan) == DIGIT
9442cb0e 2709 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2710 sayNO;
2711 nextchr = UCHARAT(++locinput);
2712 break;
d6a28714 2713 case NDIGITL:
b8c5462f
JH
2714 PL_reg_flags |= RF_tainted;
2715 /* FALL THROUGH */
d6a28714 2716 case NDIGIT:
9442cb0e 2717 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2718 sayNO;
1aa99e6b 2719 if (do_utf8) {
8269fa76 2720 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2721 if (OP(scan) == NDIGIT
3568d838 2722 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2723 : isDIGIT_LC_utf8((U8*)locinput))
2724 {
a0ed51b3 2725 sayNO;
9442cb0e 2726 }
6f06b55f 2727 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2728 nextchr = UCHARAT(locinput);
2729 break;
2730 }
ffc61ed2 2731 if (OP(scan) == NDIGIT
9442cb0e 2732 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2733 sayNO;
2734 nextchr = UCHARAT(++locinput);
2735 break;
2736 case CLUMP:
b7c83a7e 2737 if (locinput >= PL_regeol)
a0ed51b3 2738 sayNO;
b7c83a7e
JH
2739 if (do_utf8) {
2740 LOAD_UTF8_CHARCLASS(mark,"~");
2741 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2742 sayNO;
2743 locinput += PL_utf8skip[nextchr];
2744 while (locinput < PL_regeol &&
2745 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2746 locinput += UTF8SKIP(locinput);
2747 if (locinput > PL_regeol)
2748 sayNO;
eb08e2da
JH
2749 }
2750 else
2751 locinput++;
a0ed51b3
LW
2752 nextchr = UCHARAT(locinput);
2753 break;
c8756f30 2754 case REFFL:
3280af22 2755 PL_reg_flags |= RF_tainted;
c8756f30 2756 /* FALL THROUGH */
c277df42 2757 case REF:
c8756f30 2758 case REFF:
c277df42 2759 n = ARG(scan); /* which paren pair */
cf93c79d 2760 ln = PL_regstartp[n];
2c2d71f5 2761 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
eb160463 2762 if ((I32)*PL_reglastparen < n || ln == -1)
af3f8c16 2763 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2764 if (ln == PL_regendp[n])
a0d0e21e 2765 break;
a0ed51b3 2766
cf93c79d 2767 s = PL_bostr + ln;
1aa99e6b 2768 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2769 char *l = locinput;
cf93c79d 2770 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2771 /*
2772 * Note that we can't do the "other character" lookup trick as
2773 * in the 8-bit case (no pun intended) because in Unicode we
2774 * have to map both upper and title case to lower case.
2775 */
2776 if (OP(scan) == REFF) {
a2a2844f 2777 STRLEN ulen1, ulen2;
e7ae6809
JH
2778 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2779 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2780 while (s < e) {
2781 if (l >= PL_regeol)
2782 sayNO;
a2a2844f
JH
2783 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2784 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2785 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2786 sayNO;
a2a2844f
JH
2787 s += ulen1;
2788 l += ulen2;
a0ed51b3
LW
2789 }
2790 }
2791 locinput = l;
2792 nextchr = UCHARAT(locinput);
2793 break;
2794 }
2795
a0d0e21e 2796 /* Inline the first character, for speed. */
76e3520e 2797 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2798 (OP(scan) == REF ||
2799 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2800 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2801 sayNO;
cf93c79d 2802 ln = PL_regendp[n] - ln;
3280af22 2803 if (locinput + ln > PL_regeol)
4633a7c4 2804 sayNO;
c8756f30
AK
2805 if (ln > 1 && (OP(scan) == REF
2806 ? memNE(s, locinput, ln)
2807 : (OP(scan) == REFF
2808 ? ibcmp(s, locinput, ln)
2809 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2810 sayNO;
a0d0e21e 2811 locinput += ln;
76e3520e 2812 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2813 break;
2814
2815 case NOTHING:
c277df42 2816 case TAIL:
a0d0e21e
LW
2817 break;
2818 case BACK:
2819 break;
c277df42
IZ
2820 case EVAL:
2821 {
2822 dSP;
533c011a 2823 OP_4tree *oop = PL_op;
3280af22 2824 COP *ocurcop = PL_curcop;
f3548bdc 2825 PAD *old_comppad;
c277df42 2826 SV *ret;
9041c2e3 2827
c277df42 2828 n = ARG(scan);
533c011a 2829 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2830 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
f3548bdc 2831 PAD_SAVE_LOCAL(old_comppad, (PAD*)PL_regdata->data[n + 2]);
cf93c79d 2832 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2833
8e5e9ebe
RGS
2834 {
2835 SV **before = SP;
2836 CALLRUNOPS(aTHX); /* Scalar context. */
2837 SPAGAIN;
2838 if (SP == before)
075aa684 2839 ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
8e5e9ebe
RGS
2840 else {
2841 ret = POPs;
2842 PUTBACK;
2843 }
2844 }
2845
0f5d15d6 2846 PL_op = oop;
f3548bdc 2847 PAD_RESTORE_LOCAL(old_comppad);
0f5d15d6 2848 PL_curcop = ocurcop;
c277df42 2849 if (logical) {
0f5d15d6
IZ
2850 if (logical == 2) { /* Postponed subexpression. */
2851 regexp *re;
22c35a8c 2852 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2853 re_cc_state state;
0f5d15d6 2854 CHECKPOINT cp, lastcp;
cb50f42d 2855 int toggleutf;
0f5d15d6
IZ
2856
2857 if(SvROK(ret) || SvRMAGICAL(ret)) {
2858 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2859
2860 if(SvMAGICAL(sv))
14befaf4 2861 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2862 }
2863 if (mg) {
2864 re = (regexp *)mg->mg_obj;
df0003d4 2865 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2866 }
2867 else {
2868 STRLEN len;
2869 char *t = SvPV(ret, len);
2870 PMOP pm;
2871 char *oprecomp = PL_regprecomp;
2872 I32 osize = PL_regsize;
2873 I32 onpar = PL_regnpar;
2874
5fcd1c1b 2875 Zero(&pm, 1, PMOP);
cb50f42d 2876 if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
cea2e8a9 2877 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2878 if (!(SvFLAGS(ret)
0f5d15d6 2879 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2880 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2881 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2882 PL_regprecomp = oprecomp;
2883 PL_regsize = osize;
2884 PL_regnpar = onpar;
2885 }
2886 DEBUG_r(
9041c2e3 2887 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2888 "Entering embedded `%s%.60s%s%s'\n",
2889 PL_colors[0],
2890 re->precomp,
2891 PL_colors[1],
2892 (strlen(re->precomp) > 60 ? "..." : ""))
2893 );
2894 state.node = next;
2895 state.prev = PL_reg_call_cc;
2896 state.cc = PL_regcc;
2897 state.re = PL_reg_re;
2898
2ab05381 2899 PL_regcc = 0;
9041c2e3 2900
0f5d15d6 2901 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2902 REGCP_SET(lastcp);
0f5d15d6
IZ
2903 cache_re(re);
2904 state.ss = PL_savestack_ix;
2905 *PL_reglastparen = 0;
a01268b5 2906 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2907 PL_reg_call_cc = &state;
2908 PL_reginput = locinput;
cb50f42d
YST
2909 toggleutf = ((PL_reg_flags & RF_utf8) != 0) ^
2910 ((re->reganch & ROPT_UTF8) != 0);
2911 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
2912
2913 /* XXXX This is too dramatic a measure... */
2914 PL_reg_maxiter = 0;
2915
0f5d15d6 2916 if (regmatch(re->program + 1)) {
2c914db6
IZ
2917 /* Even though we succeeded, we need to restore
2918 global variables, since we may be wrapped inside
2919 SUSPEND, thus the match may be not finished yet. */
2920
2921 /* XXXX Do this only if SUSPENDed? */
2922 PL_reg_call_cc = state.prev;
2923 PL_regcc = state.cc;
2924 PL_reg_re = state.re;
2925 cache_re(PL_reg_re);
cb50f42d 2926 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c914db6
IZ
2927
2928 /* XXXX This is too dramatic a measure... */
2929 PL_reg_maxiter = 0;
2930
2931 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2932 ReREFCNT_dec(re);
2933 regcpblow(cp);
2934 sayYES;
2935 }
0f5d15d6 2936 ReREFCNT_dec(re);
02db2b7b 2937 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2938 regcppop();
2939 PL_reg_call_cc = state.prev;
2940 PL_regcc = state.cc;
2941 PL_reg_re = state.re;
d3790889 2942 cache_re(PL_reg_re);
cb50f42d 2943 if (toggleutf) PL_reg_flags ^= RF_utf8;
2c2d71f5
JH
2944
2945 /* XXXX This is too dramatic a measure... */
2946 PL_reg_maxiter = 0;
2947
8e514ae6 2948 logical = 0;
0f5d15d6
IZ
2949 sayNO;
2950 }
c277df42 2951 sw = SvTRUE(ret);
0f5d15d6 2952 logical = 0;
a0ed51b3
LW
2953 }
2954 else
3280af22 2955 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2956 break;
2957 }
a0d0e21e 2958 case OPEN:
c277df42 2959 n = ARG(scan); /* which paren pair */
3280af22
NIS
2960 PL_reg_start_tmp[n] = locinput;
2961 if (n > PL_regsize)
2962 PL_regsize = n;
a0d0e21e
LW
2963 break;
2964 case CLOSE:
c277df42 2965 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2966 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2967 PL_regendp[n] = locinput - PL_bostr;
eb160463 2968 if (n > (I32)*PL_reglastparen)
3280af22 2969 *PL_reglastparen = n;
a01268b5 2970 *PL_reglastcloseparen = n;
a0d0e21e 2971 break;
c277df42
IZ
2972 case GROUPP:
2973 n = ARG(scan); /* which paren pair */
eb160463 2974 sw = ((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2975 break;
2976 case IFTHEN:
2c2d71f5 2977 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2978 if (sw)
2979 next = NEXTOPER(NEXTOPER(scan));
2980 else {
2981 next = scan + ARG(scan);
2982 if (OP(next) == IFTHEN) /* Fake one. */
2983 next = NEXTOPER(NEXTOPER(next));
2984 }
2985 break;
2986 case LOGICAL:
0f5d15d6 2987 logical = scan->flags;
c277df42 2988 break;
2ab05381
IZ
2989/*******************************************************************
2990 PL_regcc contains infoblock about the innermost (...)* loop, and
2991 a pointer to the next outer infoblock.
2992
2993 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2994
2995 1) After matching X, regnode for CURLYX is processed;
2996
9041c2e3 2997 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2998 regmatch() recursively with the starting point at WHILEM node;
2999
3000 3) Each hit of WHILEM node tries to match A and Z (in the order
3001 depending on the current iteration, min/max of {min,max} and
3002 greediness). The information about where are nodes for "A"
3003 and "Z" is read from the infoblock, as is info on how many times "A"
3004 was already matched, and greediness.
3005
3006 4) After A matches, the same WHILEM node is hit again.
3007
3008 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
3009 of the same pair. Thus when WHILEM tries to match Z, it temporarily
3010 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
3011 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
3012 of the external loop.
3013
3014 Currently present infoblocks form a tree with a stem formed by PL_curcc
3015 and whatever it mentions via ->next, and additional attached trees
3016 corresponding to temporarily unset infoblocks as in "5" above.
3017
9041c2e3 3018 In the following picture infoblocks for outer loop of
2ab05381
IZ
3019 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
3020 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
3021 infoblocks are drawn below the "reset" infoblock.
3022
3023 In fact in the picture below we do not show failed matches for Z and T
3024 by WHILEM blocks. [We illustrate minimal matches, since for them it is
3025 more obvious *why* one needs to *temporary* unset infoblocks.]
3026
3027 Matched REx position InfoBlocks Comment
3028 (Y(A)*?Z)*?T x
3029 Y(A)*?Z)*?T x <- O
3030 Y (A)*?Z)*?T x <- O
3031 Y A)*?Z)*?T x <- O <- I
3032 YA )*?Z)*?T x <- O <- I
3033 YA A)*?Z)*?T x <- O <- I
3034 YAA )*?Z)*?T x <- O <- I
3035 YAA Z)*?T x <- O # Temporary unset I
3036 I
3037
3038 YAAZ Y(A)*?Z)*?T x <- O
3039 I
3040
3041 YAAZY (A)*?Z)*?T x <- O
3042 I
3043
3044 YAAZY A)*?Z)*?T x <- O <- I
3045 I
3046
3047 YAAZYA )*?Z)*?T x <- O <- I
3048 I
3049
3050 YAAZYA Z)*?T x <- O # Temporary unset I
3051 I,I
3052
3053 YAAZYAZ )*?T x <- O
3054 I,I
3055
3056 YAAZYAZ T x # Temporary unset O
3057 O
3058 I,I
3059
3060 YAAZYAZT x
3061 O
3062 I,I
3063 *******************************************************************/
a0d0e21e
LW
3064 case CURLYX: {
3065 CURCUR cc;
3280af22 3066 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
3067 /* No need to save/restore up to this paren */
3068 I32 parenfloor = scan->flags;
c277df42
IZ
3069
3070 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
3071 next += ARG(next);
3280af22
NIS
3072 cc.oldcc = PL_regcc;
3073 PL_regcc = &cc;
cb434fcc
IZ
3074 /* XXXX Probably it is better to teach regpush to support
3075 parenfloor > PL_regsize... */
eb160463 3076 if (parenfloor > (I32)*PL_reglastparen)
cb434fcc
IZ
3077 parenfloor = *PL_reglastparen; /* Pessimization... */
3078 cc.parenfloor = parenfloor;
a0d0e21e
LW
3079 cc.cur = -1;
3080 cc.min = ARG1(scan);
3081 cc.max = ARG2(scan);
c277df42 3082 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
3083 cc.next = next;
3084 cc.minmod = minmod;
3085 cc.lastloc = 0;
3280af22 3086 PL_reginput = locinput;
a0d0e21e
LW
3087 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
3088 regcpblow(cp);
3280af22 3089 PL_regcc = cc.oldcc;
4633a7c4 3090 saySAME(n);
a0d0e21e
LW
3091 }
3092 /* NOT REACHED */
3093 case WHILEM: {
3094 /*
3095 * This is really hard to understand, because after we match
3096 * what we're trying to match, we must make sure the rest of
2c2d71f5 3097 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
3098 * to go back UP the parse tree by recursing ever deeper. And
3099 * if it fails, we have to reset our parent's current state
3100 * that we can try again after backing off.
3101 */
3102
c277df42 3103 CHECKPOINT cp, lastcp;
3280af22 3104 CURCUR* cc = PL_regcc;
c277df42
IZ
3105 char *lastloc = cc->lastloc; /* Detection of 0-len. */
3106
4633a7c4 3107 n = cc->cur + 1; /* how many we know we matched */
3280af22 3108 PL_reginput = locinput;
a0d0e21e 3109
c277df42 3110 DEBUG_r(
9041c2e3 3111 PerlIO_printf(Perl_debug_log,
91f3b821 3112 "%*s %ld out of %ld..%ld cc=%"UVxf"\n",
3280af22 3113 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 3114 (long)n, (long)cc->min,
2797576d 3115 (long)cc->max, PTR2UV(cc))
c277df42 3116 );
4633a7c4 3117
a0d0e21e
LW
3118 /* If degenerate scan matches "", assume scan done. */
3119
579cf2c3 3120 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 3121 PL_regcc = cc->oldcc;
2ab05381
IZ
3122 if (PL_regcc)
3123 ln = PL_regcc->cur;
c277df42 3124 DEBUG_r(
c3464db5
DD
3125 PerlIO_printf(Perl_debug_log,
3126 "%*s empty match detected, try continuation...\n",
3280af22 3127 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3128 );
a0d0e21e 3129 if (regmatch(cc->next))
4633a7c4 3130 sayYES;
2ab05381
IZ
3131 if (PL_regcc)
3132 PL_regcc->cur = ln;
3280af22 3133 PL_regcc = cc;
4633a7c4 3134 sayNO;
a0d0e21e
LW
3135 }
3136
3137 /* First just match a string of min scans. */
3138
3139 if (n < cc->min) {
3140 cc->cur = n;
3141 cc->lastloc = locinput;
4633a7c4
LW
3142 if (regmatch(cc->scan))
3143 sayYES;
3144 cc->cur = n - 1;
c277df42 3145 cc->lastloc = lastloc;
4633a7c4 3146 sayNO;
a0d0e21e
LW
3147 }
3148
2c2d71f5
JH
3149 if (scan->flags) {
3150 /* Check whether we already were at this position.
3151 Postpone detection until we know the match is not
3152 *that* much linear. */
3153 if (!PL_reg_maxiter) {
3154 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3155 PL_reg_leftiter = PL_reg_maxiter;
3156 }
3157 if (PL_reg_leftiter-- == 0) {
3158 I32 size = (PL_reg_maxiter + 7)/8;
3159 if (PL_reg_poscache) {
eb160463 3160 if ((I32)PL_reg_poscache_size < size) {
2c2d71f5
JH
3161 Renew(PL_reg_poscache, size, char);
3162 PL_reg_poscache_size = size;
3163 }
3164 Zero(PL_reg_poscache, size, char);
3165 }
3166 else {
3167 PL_reg_poscache_size = size;
3168 Newz(29, PL_reg_poscache, size, char);
3169 }
3170 DEBUG_r(
3171 PerlIO_printf(Perl_debug_log,
3172 "%sDetected a super-linear match, switching on caching%s...\n",
3173 PL_colors[4], PL_colors[5])
3174 );
3175 }
3176 if (PL_reg_leftiter < 0) {
3177 I32 o = locinput - PL_bostr, b;
3178
3179 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3180 b = o % 8;
3181 o /= 8;
3182 if (PL_reg_poscache[o] & (1<<b)) {
3183 DEBUG_r(
3184 PerlIO_printf(Perl_debug_log,
3185 "%*s already tried at this position...\n",
3186 REPORT_CODE_OFF+PL_regindent*2, "")
3187 );
7821416a 3188 sayNO_SILENT;
2c2d71f5
JH
3189 }
3190 PL_reg_poscache[o] |= (1<<b);
3191 }
3192 }
3193
a0d0e21e
LW
3194 /* Prefer next over scan for minimal matching. */
3195
3196 if (cc->minmod) {
3280af22 3197 PL_regcc = cc->oldcc;
2ab05381
IZ
3198 if (PL_regcc)
3199 ln = PL_regcc->cur;
5f05dabc 3200 cp = regcppush(cc->parenfloor);
02db2b7b 3201 REGCP_SET(lastcp);
5f05dabc 3202 if (regmatch(cc->next)) {
c277df42 3203 regcpblow(cp);
4633a7c4 3204 sayYES; /* All done. */
5f05dabc 3205 }
02db2b7b 3206 REGCP_UNWIND(lastcp);
5f05dabc 3207 regcppop();
2ab05381
IZ
3208 if (PL_regcc)
3209 PL_regcc->cur = ln;
3280af22 3210 PL_regcc = cc;
a0d0e21e 3211
c277df42 3212 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3213 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3214 && !(PL_reg_flags & RF_warned)) {
3215 PL_reg_flags |= RF_warned;
9014280d 3216 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
2f3ca594
GS
3217 "Complex regular subexpression recursion",
3218 REG_INFTY - 1);
c277df42 3219 }
4633a7c4 3220 sayNO;
c277df42 3221 }
a687059c 3222
c277df42 3223 DEBUG_r(
c3464db5
DD
3224 PerlIO_printf(Perl_debug_log,
3225 "%*s trying longer...\n",
3280af22 3226 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3227 );
a0d0e21e 3228 /* Try scanning more and see if it helps. */
3280af22 3229 PL_reginput = locinput;
a0d0e21e
LW
3230 cc->cur = n;
3231 cc->lastloc = locinput;
5f05dabc 3232 cp = regcppush(cc->parenfloor);
02db2b7b 3233 REGCP_SET(lastcp);
5f05dabc 3234 if (regmatch(cc->scan)) {
c277df42 3235 regcpblow(cp);
4633a7c4 3236 sayYES;
5f05dabc 3237 }
02db2b7b 3238 REGCP_UNWIND(lastcp);
5f05dabc 3239 regcppop();
4633a7c4 3240 cc->cur = n - 1;
c277df42 3241 cc->lastloc = lastloc;
4633a7c4 3242 sayNO;
a0d0e21e
LW
3243 }
3244
3245 /* Prefer scan over next for maximal matching. */
3246
3247 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3248 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3249 cc->cur = n;
3250 cc->lastloc = locinput;
02db2b7b 3251 REGCP_SET(lastcp);
5f05dabc 3252 if (regmatch(cc->scan)) {
c277df42 3253 regcpblow(cp);
4633a7c4 3254 sayYES;
5f05dabc 3255 }
02db2b7b 3256 REGCP_UNWIND(lastcp);
a0d0e21e 3257 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3258 PL_reginput = locinput;
c277df42 3259 DEBUG_r(
c3464db5
DD
3260 PerlIO_printf(Perl_debug_log,
3261 "%*s failed, try continuation...\n",
3280af22 3262 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3263 );
3264 }
9041c2e3 3265 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3266 && !(PL_reg_flags & RF_warned)) {
3280af22 3267 PL_reg_flags |= RF_warned;
9014280d 3268 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
cb5d145d
GS
3269 "Complex regular subexpression recursion",
3270 REG_INFTY - 1);
a0d0e21e
LW
3271 }
3272
3273 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3274 PL_regcc = cc->oldcc;
2ab05381
IZ
3275 if (PL_regcc)
3276 ln = PL_regcc->cur;
a0d0e21e 3277 if (regmatch(cc->next))
4633a7c4 3278 sayYES;
2ab05381
IZ
3279 if (PL_regcc)
3280 PL_regcc->cur = ln;
3280af22 3281 PL_regcc = cc;
4633a7c4 3282 cc->cur = n - 1;
c277df42 3283 cc->lastloc = lastloc;
4633a7c4 3284 sayNO;
a0d0e21e
LW
3285 }
3286 /* NOT REACHED */
9041c2e3 3287 case BRANCHJ:
c277df42
IZ
3288 next = scan + ARG(scan);
3289 if (next == scan)
3290 next = NULL;
3291 inner = NEXTOPER(NEXTOPER(scan));
3292 goto do_branch;
9041c2e3 3293 case BRANCH:
c277df42
IZ
3294 inner = NEXTOPER(scan);
3295 do_branch:
3296 {
c277df42
IZ
3297 c1 = OP(scan);
3298 if (OP(next) != c1) /* No choice. */
3299 next = inner; /* Avoid recursion. */
a0d0e21e 3300 else {
02db2b7b
IZ
3301 I32 lastparen = *PL_reglastparen;
3302 I32 unwind1;
3303 re_unwind_branch_t *uw;
3304
3305 /* Put unwinding data on stack */
3306 unwind1 = SSNEWt(1,re_unwind_branch_t);
3307 uw = SSPTRt(unwind1,re_unwind_branch_t);
3308 uw->prev = unwind;
3309 unwind = unwind1;
3310 uw->type = ((c1 == BRANCH)
3311 ? RE_UNWIND_BRANCH
3312 : RE_UNWIND_BRANCHJ);
3313 uw->lastparen = lastparen;
3314 uw->next = next;
3315 uw->locinput = locinput;
3316 uw->nextchr = nextchr;
3317#ifdef DEBUGGING
3318 uw->regindent = ++PL_regindent;
3319#endif
c277df42 3320
02db2b7b
IZ
3321 REGCP_SET(uw->lastcp);
3322
3323 /* Now go into the first branch */
3324 next = inner;
a687059c 3325 }
a0d0e21e
LW
3326 }
3327 break;
3328 case MINMOD:
3329 minmod = 1;
3330 break;
c277df42
IZ
3331 case CURLYM:
3332 {
00db4c45 3333 I32 l = 0;
c277df42 3334 CHECKPOINT lastcp;
9041c2e3 3335
c277df42
IZ
3336 /* We suppose that the next guy does not need
3337 backtracking: in particular, it is of constant length,
3338 and has no parenths to influence future backrefs. */
3339 ln = ARG1(scan); /* min to match */
3340 n = ARG2(scan); /* max to match */
c277df42
IZ
3341 paren = scan->flags;
3342 if (paren) {
3280af22
NIS
3343 if (paren > PL_regsize)
3344 PL_regsize = paren;
eb160463 3345 if (paren > (I32)*PL_reglastparen)
3280af22 3346 *PL_reglastparen = paren;
c277df42 3347 }
dc45a647 3348 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3349 if (paren)
3350 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3351 PL_reginput = locinput;
c277df42
IZ
3352 if (minmod) {
3353 minmod = 0;
3354 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3355 sayNO;
f31a99c8
HS
3356 /* if we matched something zero-length we don't need to
3357 backtrack - capturing parens are already defined, so
3358 the caveat in the maximal case doesn't apply
3359
3360 XXXX if ln == 0, we can redo this check first time
3361 through the following loop
3362 */
3363 if (ln && l == 0)
3364 n = ln; /* don't backtrack */
3280af22 3365 locinput = PL_reginput;
cca55fe3 3366 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3367 regnode *text_node = next;
3368
cca55fe3 3369 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3370
cca55fe3 3371 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3372 else {
cca55fe3
JP
3373 if (PL_regkind[(U8)OP(text_node)] == REF) {
3374 I32 n, ln;
3375 n = ARG(text_node); /* which paren pair */
3376 ln = PL_regstartp[n];
3377 /* assume yes if we haven't seen CLOSEn */
3378 if (
eb160463 3379 (I32)*PL_reglastparen < n ||
cca55fe3
JP
3380 ln == -1 ||
3381 ln == PL_regendp[n]
3382 ) {
3383 c1 = c2 = -1000;
3384 goto assume_ok_MM;
3385 }
3386 c1 = *(PL_bostr + ln);
3387 }
3388 else { c1 = (U8)*STRING(text_node); }
af5decee 3389 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3390 c2 = PL_fold[c1];
af5decee 3391 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3392 c2 = PL_fold_locale[c1];
3393 else
3394 c2 = c1;
3395 }
a0ed51b3
LW
3396 }
3397 else
c277df42 3398 c1 = c2 = -1000;
cca55fe3 3399 assume_ok_MM:
02db2b7b 3400 REGCP_SET(lastcp);
5f4b28b2 3401 /* This may be improved if l == 0. */
c277df42
IZ
3402 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3403 /* If it could work, try it. */
3404 if (c1 == -1000 ||
3280af22
NIS
3405 UCHARAT(PL_reginput) == c1 ||
3406 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3407 {
3408 if (paren) {
f31a99c8 3409 if (ln) {
cf93c79d
IZ
3410 PL_regstartp[paren] =
3411 HOPc(PL_reginput, -l) - PL_bostr;
3412 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3413 }
3414 else
cf93c79d 3415 PL_regendp[paren] = -1;
c277df42
IZ
3416 }
3417 if (regmatch(next))
3418 sayYES;
02db2b7b 3419 REGCP_UNWIND(lastcp);
c277df42
IZ
3420 }
3421 /* Couldn't or didn't -- move forward. */
3280af22 3422 PL_reginput = locinput;
c277df42
IZ
3423 if (regrepeat_hard(scan, 1, &l)) {
3424 ln++;
3280af22 3425 locinput = PL_reginput;
c277df42
IZ
3426 }
3427 else
3428 sayNO;
3429 }
a0ed51b3
LW
3430 }
3431 else {
c277df42 3432 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3433 /* if we matched something zero-length we don't need to
3434 backtrack, unless the minimum count is zero and we
3435 are capturing the result - in that case the capture
3436 being defined or not may affect later execution
3437 */
3438 if (n != 0 && l == 0 && !(paren && ln == 0))
3439 ln = n; /* don't backtrack */
3280af22 3440 locinput = PL_reginput;
c277df42 3441 DEBUG_r(
5c0ca799 3442 PerlIO_printf(Perl_debug_log,
faccc32b 3443 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3444 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3445 (IV) n, (IV)l)
c277df42
IZ
3446 );
3447 if (n >= ln) {
cca55fe3 3448 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3449 regnode *text_node = next;
3450
cca55fe3 3451 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3452
cca55fe3 3453 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3454 else {
cca55fe3
JP
3455 if (PL_regkind[(U8)OP(text_node)] == REF) {
3456 I32 n, ln;
3457 n = ARG(text_node); /* which paren pair */
3458 ln = PL_regstartp[n];
3459 /* assume yes if we haven't seen CLOSEn */
3460 if (
eb160463 3461 (I32)*PL_reglastparen < n ||
cca55fe3
JP
3462 ln == -1 ||
3463 ln == PL_regendp[n]
3464 ) {
3465 c1 = c2 = -1000;
3466 goto assume_ok_REG;
3467 }
3468 c1 = *(PL_bostr + ln);
3469 }
3470 else { c1 = (U8)*STRING(text_node); }
3471
af5decee 3472 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3473 c2 = PL_fold[c1];
af5decee 3474 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3475 c2 = PL_fold_locale[c1];
3476 else
3477 c2 = c1;
3478 }
a0ed51b3
LW
3479 }
3480 else
c277df42
IZ
3481 c1 = c2 = -1000;
3482 }
cca55fe3 3483 assume_ok_REG:
02db2b7b 3484 REGCP_SET(lastcp);
c277df42
IZ
3485 while (n >= ln) {
3486 /* If it could work, try it. */
3487 if (c1 == -1000 ||
3280af22
NIS
3488 UCHARAT(PL_reginput) == c1 ||
3489 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3490 {
3491 DEBUG_r(
c3464db5 3492 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3493 "%*s trying tail with n=%"IVdf"...\n",
3494 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3495 );
3496 if (paren) {
3497 if (n) {
cf93c79d
IZ
3498 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3499 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3500 }
a0ed51b3 3501 else
cf93c79d 3502 PL_regendp[paren] = -1;
c277df42 3503 }
a0ed51b3
LW
3504 if (regmatch(next))
3505 sayYES;
02db2b7b 3506 REGCP_UNWIND(lastcp);
a0ed51b3 3507 }
c277df42
IZ
3508 /* Couldn't or didn't -- back up. */
3509 n--;
dfe13c55 3510 locinput = HOPc(locinput, -l);
3280af22 3511 PL_reginput = locinput;
c277df42
IZ
3512 }
3513 }
3514 sayNO;
3515 break;
3516 }
3517 case CURLYN:
3518 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3519 if (paren > PL_regsize)
3520 PL_regsize = paren;
eb160463 3521 if (paren > (I32)*PL_reglastparen)
3280af22 3522 *PL_reglastparen = paren;
c277df42
IZ
3523 ln = ARG1(scan); /* min to match */
3524 n = ARG2(scan); /* max to match */
dc45a647 3525 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3526 goto repeat;
a0d0e21e 3527 case CURLY:
c277df42 3528 paren = 0;
a0d0e21e
LW
3529 ln = ARG1(scan); /* min to match */
3530 n = ARG2(scan); /* max to match */
dc45a647 3531 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3532 goto repeat;
3533 case STAR:
3534 ln = 0;
c277df42 3535 n = REG_INFTY;
a0d0e21e 3536 scan = NEXTOPER(scan);
c277df42 3537 paren = 0;
a0d0e21e
LW
3538 goto repeat;
3539 case PLUS:
c277df42
IZ
3540 ln = 1;
3541 n = REG_INFTY;
3542 scan = NEXTOPER(scan);
3543 paren = 0;
3544 repeat:
a0d0e21e
LW
3545 /*
3546 * Lookahead to avoid useless match attempts
3547 * when we know what character comes next.
3548 */
5f80c4cf
JP
3549