This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix encoding pragma.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded
JH
403 char *s = PL_reg_match_utf8 ?
404 sv_uni_display(dsv, sv, 60, 0) : strpos;
405 int len = PL_reg_match_utf8 ?
406 strlen(s) : strend - strpos;
2a782b5b
JH
407 if (!PL_colorset)
408 reginitcolors();
b8d68ded
JH
409 if (PL_reg_match_utf8)
410 DEBUG_r(PerlIO_printf(Perl_debug_log,
411 "UTF-8 target...\n"));
2a782b5b
JH
412 PerlIO_printf(Perl_debug_log,
413 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
414 PL_colors[4],PL_colors[5],PL_colors[0],
415 prog->precomp,
416 PL_colors[1],
417 (strlen(prog->precomp) > 60 ? "..." : ""),
418 PL_colors[0],
419 (int)(len > 60 ? 60 : len),
420 s, PL_colors[1],
421 (len > 60 ? "..." : "")
422 );
423 });
cad2e5aa 424
1aa99e6b 425 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
426 DEBUG_r(PerlIO_printf(Perl_debug_log,
427 "String too short... [re_intuit_start]\n"));
cad2e5aa 428 goto fail;
2c2d71f5 429 }
a1933d95 430 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 431 PL_regeol = strend;
653099ff 432 check = prog->check_substr;
2c2d71f5 433 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
434 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
435 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 436 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 437
7e25d62c
JH
438 if (!ml_anch) {
439 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
440 | ROPT_IMPLICIT)) /* not a real BOL */
441 /* SvCUR is not set on references: SvRV and SvPVX overlap */
442 && sv && !SvROK(sv)
443 && (strpos != strbeg)) {
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
445 goto fail;
446 }
447 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 448 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 449 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
450 I32 slen;
451
1aa99e6b 452 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
453 if (SvTAIL(check)) {
454 slen = SvCUR(check); /* >= 1 */
cad2e5aa 455
9041c2e3 456 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
457 || (strend - s == slen && strend[-1] != '\n')) {
458 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
459 goto fail_finish;
cad2e5aa
JH
460 }
461 /* Now should match s[0..slen-2] */
462 slen--;
653099ff 463 if (slen && (*SvPVX(check) != *s
cad2e5aa 464 || (slen > 1
653099ff 465 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
466 report_neq:
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
468 goto fail_finish;
469 }
cad2e5aa 470 }
653099ff
GS
471 else if (*SvPVX(check) != *s
472 || ((slen = SvCUR(check)) > 1
473 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
474 goto report_neq;
475 goto success_at_start;
7e25d62c 476 }
cad2e5aa 477 }
2c2d71f5 478 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 479 s = strpos;
2c2d71f5 480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 483 if (!ml_anch) {
653099ff
GS
484 I32 end = prog->check_offset_max + CHR_SVLEN(check)
485 - (SvTAIL(check) != 0);
1aa99e6b 486 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
487
488 if (end_shift < eshift)
489 end_shift = eshift;
490 }
cad2e5aa 491 }
2c2d71f5 492 else { /* Can match at random position */
cad2e5aa
JH
493 ml_anch = 0;
494 s = strpos;
2c2d71f5
JH
495 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
496 /* Should be nonnegative! */
497 end_shift = prog->minlen - start_shift -
653099ff 498 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
499 }
500
2c2d71f5 501#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 502 if (end_shift < 0)
6bbae5e6 503 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
504#endif
505
2c2d71f5
JH
506 restart:
507 /* Find a possible match in the region s..strend by looking for
508 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 509 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
510 I32 p = -1; /* Internal iterator of scream. */
511 I32 *pp = data ? data->scream_pos : &p;
512
2c2d71f5
JH
513 if (PL_screamfirst[BmRARE(check)] >= 0
514 || ( BmRARE(check) == '\n'
515 && (BmPREVIOUS(check) == SvCUR(check) - 1)
516 && SvTAIL(check) ))
9041c2e3 517 s = screaminstr(sv, check,
2c2d71f5 518 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 519 else
2c2d71f5 520 goto fail_finish;
cad2e5aa
JH
521 if (data)
522 *data->scream_olds = s;
523 }
f33976b4 524 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
525 s = fbm_instr((U8*)(s + start_shift),
526 (U8*)(strend - end_shift),
527 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 528 else
1aa99e6b
IH
529 s = fbm_instr(HOP3(s, start_shift, strend),
530 HOP3(strend, -end_shift, strbeg),
2c2d71f5 531 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
532
533 /* Update the count-of-usability, remove useless subpatterns,
534 unshift s. */
2c2d71f5
JH
535
536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
537 (s ? "Found" : "Did not find"),
538 ((check == prog->anchored_substr) ? "anchored" : "floating"),
539 PL_colors[0],
7b0972df
JH
540 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
541 SvPVX(check),
2c2d71f5
JH
542 PL_colors[1], (SvTAIL(check) ? "$" : ""),
543 (s ? " at offset " : "...\n") ) );
544
545 if (!s)
546 goto fail_finish;
547
6eb5f6b9
JH
548 check_at = s;
549
2c2d71f5 550 /* Finish the diagnostic message */
30944b6d 551 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
552
553 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
554 Start with the other substr.
555 XXXX no SCREAM optimization yet - and a very coarse implementation
556 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
557 *always* match. Probably should be marked during compile...
558 Probably it is right to do no SCREAM here...
559 */
560
561 if (prog->float_substr && prog->anchored_substr) {
30944b6d 562 /* Take into account the "other" substring. */
2c2d71f5
JH
563 /* XXXX May be hopelessly wrong for UTF... */
564 if (!other_last)
6eb5f6b9 565 other_last = strpos;
2c2d71f5 566 if (check == prog->float_substr) {
30944b6d
IZ
567 do_other_anchored:
568 {
1aa99e6b 569 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
570 char *s1 = s;
571
2c2d71f5
JH
572 t = s - prog->check_offset_max;
573 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
574 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 575 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 576 && t > strpos)))
30944b6d 577 /* EMPTY */;
2c2d71f5
JH
578 else
579 t = strpos;
1aa99e6b 580 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
581 if (t < other_last) /* These positions already checked */
582 t = other_last;
1aa99e6b 583 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
584 if (last < last1)
585 last1 = last;
586 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
587 /* On end-of-str: see comment below. */
588 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
589 HOP3(HOP3(last1, prog->anchored_offset, strend)
590 + SvCUR(prog->anchored_substr),
591 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
592 prog->anchored_substr,
593 PL_multiline ? FBMrf_MULTILINE : 0);
594 DEBUG_r(PerlIO_printf(Perl_debug_log,
595 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
596 (s ? "Found" : "Contradicts"),
597 PL_colors[0],
7b0972df
JH
598 (int)(SvCUR(prog->anchored_substr)
599 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
600 SvPVX(prog->anchored_substr),
601 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
602 if (!s) {
603 if (last1 >= last2) {
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
606 goto fail_finish;
607 }
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying floating at offset %ld...\n",
1aa99e6b
IH
610 (long)(HOP3c(s1, 1, strend) - i_strpos)));
611 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
612 s = HOP3c(last, 1, strend);
2c2d71f5
JH
613 goto restart;
614 }
615 else {
616 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 617 (long)(s - i_strpos)));
1aa99e6b
IH
618 t = HOP3c(s, -prog->anchored_offset, strbeg);
619 other_last = HOP3c(s, 1, strend);
30944b6d 620 s = s1;
2c2d71f5
JH
621 if (t == strpos)
622 goto try_at_start;
2c2d71f5
JH
623 goto try_at_offset;
624 }
30944b6d 625 }
2c2d71f5
JH
626 }
627 else { /* Take into account the floating substring. */
628 char *last, *last1;
629 char *s1 = s;
630
1aa99e6b
IH
631 t = HOP3c(s, -start_shift, strbeg);
632 last1 = last =
633 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
634 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
635 last = HOP3c(t, prog->float_max_offset, strend);
636 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
637 if (s < other_last)
638 s = other_last;
2c2d71f5
JH
639 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
640 /* fbm_instr() takes into account exact value of end-of-str
641 if the check is SvTAIL(ed). Since false positives are OK,
642 and end-of-str is not later than strend we are OK. */
643 s = fbm_instr((unsigned char*)s,
644 (unsigned char*)last + SvCUR(prog->float_substr)
645 - (SvTAIL(prog->float_substr)!=0),
646 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
647 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
7b0972df
JH
650 (int)(SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
652 SvPVX(prog->float_substr),
653 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
654 if (!s) {
655 if (last1 == last) {
656 DEBUG_r(PerlIO_printf(Perl_debug_log,
657 ", giving up...\n"));
658 goto fail_finish;
659 }
660 DEBUG_r(PerlIO_printf(Perl_debug_log,
661 ", trying anchored starting at offset %ld...\n",
30944b6d 662 (long)(s1 + 1 - i_strpos)));
803ff556 663 other_last = last;
1aa99e6b 664 s = HOP3c(t, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
803ff556 670 other_last = s; /* Fix this later. --Hugo */
30944b6d 671 s = s1;
2c2d71f5
JH
672 if (t == strpos)
673 goto try_at_start;
2c2d71f5
JH
674 goto try_at_offset;
675 }
676 }
cad2e5aa 677 }
2c2d71f5
JH
678
679 t = s - prog->check_offset_max;
2c2d71f5
JH
680 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
681 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
682 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
683 && t > strpos))) {
2c2d71f5
JH
684 /* Fixed substring is found far enough so that the match
685 cannot start at strpos. */
686 try_at_offset:
cad2e5aa 687 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
688 /* Eventually fbm_*() should handle this, but often
689 anchored_offset is not 0, so this check will not be wasted. */
690 /* XXXX In the code below we prefer to look for "^" even in
691 presence of anchored substrings. And we search even
692 beyond the found float position. These pessimizations
693 are historical artefacts only. */
694 find_anchor:
2c2d71f5 695 while (t < strend - prog->minlen) {
cad2e5aa 696 if (*t == '\n') {
4ee3650e 697 if (t < check_at - prog->check_offset_min) {
30944b6d 698 if (prog->anchored_substr) {
4ee3650e
GS
699 /* Since we moved from the found position,
700 we definitely contradict the found anchored
30944b6d
IZ
701 substr. Due to the above check we do not
702 contradict "check" substr.
703 Thus we can arrive here only if check substr
704 is float. Redo checking for "other"=="fixed".
705 */
9041c2e3 706 strpos = t + 1;
30944b6d
IZ
707 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
708 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
709 goto do_other_anchored;
710 }
4ee3650e
GS
711 /* We don't contradict the found floating substring. */
712 /* XXXX Why not check for STCLASS? */
cad2e5aa 713 s = t + 1;
2c2d71f5 714 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 715 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
716 goto set_useful;
717 }
4ee3650e
GS
718 /* Position contradicts check-string */
719 /* XXXX probably better to look for check-string
720 than for "\n", so one should lower the limit for t? */
721 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 722 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 723 other_last = strpos = s = t + 1;
cad2e5aa
JH
724 goto restart;
725 }
726 t++;
727 }
2c2d71f5
JH
728 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
729 PL_colors[0],PL_colors[1]));
730 goto fail_finish;
cad2e5aa 731 }
f5952150
GS
732 else {
733 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
734 PL_colors[0],PL_colors[1]));
735 }
cad2e5aa
JH
736 s = t;
737 set_useful:
2c2d71f5 738 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
739 }
740 else {
f5952150 741 /* The found string does not prohibit matching at strpos,
2c2d71f5 742 - no optimization of calling REx engine can be performed,
f5952150
GS
743 unless it was an MBOL and we are not after MBOL,
744 or a future STCLASS check will fail this. */
2c2d71f5
JH
745 try_at_start:
746 /* Even in this situation we may use MBOL flag if strpos is offset
747 wrt the start of the string. */
05b4157f 748 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 749 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
750 /* May be due to an implicit anchor of m{.*foo} */
751 && !(prog->reganch & ROPT_IMPLICIT))
752 {
cad2e5aa
JH
753 t = strpos;
754 goto find_anchor;
755 }
30944b6d 756 DEBUG_r( if (ml_anch)
f5952150
GS
757 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
758 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 759 );
2c2d71f5 760 success_at_start:
30944b6d 761 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 762 && prog->check_substr /* Could be deleted already */
cad2e5aa 763 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
764 && prog->check_substr == prog->float_substr)
765 {
cad2e5aa 766 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
768 SvREFCNT_dec(prog->check_substr);
769 prog->check_substr = Nullsv; /* disable */
770 prog->float_substr = Nullsv; /* clear */
5e39e1e5 771 check = Nullsv; /* abort */
cad2e5aa 772 s = strpos;
3cf5c195
IZ
773 /* XXXX This is a remnant of the old implementation. It
774 looks wasteful, since now INTUIT can use many
6eb5f6b9 775 other heuristics. */
cad2e5aa
JH
776 prog->reganch &= ~RE_USE_INTUIT;
777 }
778 else
779 s = strpos;
780 }
781
6eb5f6b9
JH
782 /* Last resort... */
783 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
784 if (prog->regstclass) {
785 /* minlen == 0 is possible if regstclass is \b or \B,
786 and the fixed substr is ''$.
787 Since minlen is already taken into account, s+1 is before strend;
788 accidentally, minlen >= 1 guaranties no false positives at s + 1
789 even for \b or \B. But (minlen? 1 : 0) below assumes that
790 regstclass does not come from lookahead... */
791 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
792 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 793 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 794 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 795 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 796 : 1);
6eb5f6b9 797 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
798 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
799 : (prog->float_substr
800 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
801 cl_l, strend)
802 : strend);
a1933d95 803 char *startpos = strbeg;
6eb5f6b9
JH
804
805 t = s;
76384e4a 806 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 807 PL_regdata = prog->data;
76384e4a
GS
808 PL_bostr = startpos;
809 }
f33976b4 810 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
811 if (!s) {
812#ifdef DEBUGGING
b7953727 813 char *what = 0;
6eb5f6b9
JH
814#endif
815 if (endpos == strend) {
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
66e933ab
GS
820 DEBUG_r( PerlIO_printf(Perl_debug_log,
821 "This position contradicts STCLASS...\n") );
653099ff
GS
822 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
823 goto fail;
6eb5f6b9
JH
824 /* Contradict one of substrings */
825 if (prog->anchored_substr) {
6eb5f6b9
JH
826 if (prog->anchored_substr == check) {
827 DEBUG_r( what = "anchored" );
828 hop_and_restart:
1aa99e6b 829 s = HOP3c(t, 1, strend);
66e933ab
GS
830 if (s + start_shift + end_shift > strend) {
831 /* XXXX Should be taken into account earlier? */
832 DEBUG_r( PerlIO_printf(Perl_debug_log,
833 "Could not match STCLASS...\n") );
834 goto fail;
835 }
5e39e1e5
HS
836 if (!check)
837 goto giveup;
6eb5f6b9 838 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 839 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
840 what, (long)(s + start_shift - i_strpos)) );
841 goto restart;
842 }
66e933ab 843 /* Have both, check_string is floating */
6eb5f6b9
JH
844 if (t + start_shift >= check_at) /* Contradicts floating=check */
845 goto retry_floating_check;
846 /* Recheck anchored substring, but not floating... */
9041c2e3 847 s = check_at;
5e39e1e5
HS
848 if (!check)
849 goto giveup;
6eb5f6b9 850 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 851 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
852 (long)(other_last - i_strpos)) );
853 goto do_other_anchored;
854 }
60e71179
GS
855 /* Another way we could have checked stclass at the
856 current position only: */
857 if (ml_anch) {
858 s = t = t + 1;
5e39e1e5
HS
859 if (!check)
860 goto giveup;
60e71179 861 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
862 "Looking for /%s^%s/m starting at offset %ld...\n",
863 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 864 goto try_at_offset;
66e933ab 865 }
60e71179
GS
866 if (!prog->float_substr) /* Could have been deleted */
867 goto fail;
6eb5f6b9
JH
868 /* Check is floating subtring. */
869 retry_floating_check:
870 t = check_at - start_shift;
871 DEBUG_r( what = "floating" );
872 goto hop_and_restart;
873 }
b7953727
JH
874 if (t != s) {
875 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 876 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
877 (long)(t - i_strpos), (long)(s - i_strpos))
878 );
879 }
880 else {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
882 "Does not contradict STCLASS...\n");
883 );
884 }
6eb5f6b9 885 }
5e39e1e5
HS
886 giveup:
887 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
888 PL_colors[4], (check ? "Guessed" : "Giving up"),
889 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 890 return s;
2c2d71f5
JH
891
892 fail_finish: /* Substring not found */
66e933ab
GS
893 if (prog->check_substr) /* could be removed already */
894 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 895 fail:
2c2d71f5 896 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
897 PL_colors[4],PL_colors[5]));
898 return Nullch;
899}
9661b544 900
6eb5f6b9 901/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
902STATIC char *
903S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 904{
6eb5f6b9
JH
905 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
906 char *m;
d8093b23
G
907 STRLEN ln;
908 unsigned int c1;
909 unsigned int c2;
6eb5f6b9
JH
910 char *e;
911 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 912 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 913
6eb5f6b9
JH
914 /* We know what class it must start with. */
915 switch (OP(c)) {
6eb5f6b9 916 case ANYOF:
a0ed51b3 917 while (s < strend) {
ffc61ed2 918 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
919 if (tmp && (norun || regtry(prog, s)))
920 goto got_it;
921 else
922 tmp = doevery;
a0ed51b3 923 }
6eb5f6b9
JH
924 else
925 tmp = 1;
ffc61ed2 926 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 927 }
6eb5f6b9 928 break;
f33976b4
DB
929 case CANY:
930 while (s < strend) {
931 if (tmp && (norun || regtry(prog, s)))
932 goto got_it;
933 else
934 tmp = doevery;
935 s++;
936 }
937 break;
6eb5f6b9
JH
938 case EXACTF:
939 m = STRING(c);
940 ln = STR_LEN(c);
1aa99e6b 941 if (UTF) {
a2a2844f 942 STRLEN ulen1, ulen2;
e7ae6809
JH
943 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
944 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
945
946 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
947 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
948
949 c1 = utf8_to_uvuni(tmpbuf1, 0);
950 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
951 }
952 else {
953 c1 = *(U8*)m;
954 c2 = PL_fold[c1];
955 }
6eb5f6b9
JH
956 goto do_exactf;
957 case EXACTFL:
958 m = STRING(c);
959 ln = STR_LEN(c);
d8093b23 960 c1 = *(U8*)m;
6eb5f6b9
JH
961 c2 = PL_fold_locale[c1];
962 do_exactf:
963 e = strend - ln;
b3c9acc1 964
6eb5f6b9
JH
965 if (norun && e < s)
966 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
967
968 if (do_utf8) {
969 STRLEN len;
cadb39a9
JH
970 /* The ibcmp_utf8() uses to_uni_fold() which is more
971 * correct folding for Unicode than using lowercase.
972 * However, it doesn't work quite fully since the folding
973 * is a one-to-many mapping and the regex optimizer is
974 * unaware of this, so it may throw out good matches.
975 * Fortunately, not getting this right is allowed
976 * for Unicode Regular Expression Support level 1,
977 * only one-to-one matching is required. --jhi */
1aa99e6b
IH
978 if (c1 == c2)
979 while (s <= e) {
9041c2e3 980 if ( utf8_to_uvchr((U8*)s, &len) == c1
bc517b45 981 && (ln == len ||
332ddc25 982 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
983 m, UTF, ln))
984 && (norun || regtry(prog, s)) )
1aa99e6b
IH
985 goto got_it;
986 s += len;
987 }
988 else
989 while (s <= e) {
9041c2e3 990 UV c = utf8_to_uvchr((U8*)s, &len);
701a277b 991 if ( (c == c1 || c == c2)
bc517b45 992 && (ln == len ||
332ddc25 993 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
994 m, UTF, ln))
995 && (norun || regtry(prog, s)) )
1aa99e6b
IH
996 goto got_it;
997 s += len;
998 }
999 }
1000 else {
1001 if (c1 == c2)
1002 while (s <= e) {
1003 if ( *(U8*)s == c1
1004 && (ln == 1 || !(OP(c) == EXACTF
1005 ? ibcmp(s, m, ln)
1006 : ibcmp_locale(s, m, ln)))
1007 && (norun || regtry(prog, s)) )
1008 goto got_it;
1009 s++;
1010 }
1011 else
1012 while (s <= e) {
1013 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1014 && (ln == 1 || !(OP(c) == EXACTF
1015 ? ibcmp(s, m, ln)
1016 : ibcmp_locale(s, m, ln)))
1017 && (norun || regtry(prog, s)) )
1018 goto got_it;
1019 s++;
1020 }
b3c9acc1
IZ
1021 }
1022 break;
bbce6d69 1023 case BOUNDL:
3280af22 1024 PL_reg_flags |= RF_tainted;
bbce6d69 1025 /* FALL THROUGH */
a0d0e21e 1026 case BOUND:
ffc61ed2 1027 if (do_utf8) {
12d33761 1028 if (s == PL_bostr)
ffc61ed2
JH
1029 tmp = '\n';
1030 else {
1aa99e6b 1031 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1032
0064a8a9
JH
1033 if (s > (char*)r)
1034 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1035 }
1036 tmp = ((OP(c) == BOUND ?
9041c2e3 1037 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1038 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1039 while (s < strend) {
1040 if (tmp == !(OP(c) == BOUND ?
3568d838 1041 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1042 isALNUM_LC_utf8((U8*)s)))
1043 {
1044 tmp = !tmp;
1045 if ((norun || regtry(prog, s)))
1046 goto got_it;
1047 }
1048 s += UTF8SKIP(s);
a687059c 1049 }
a0d0e21e 1050 }
667bb95a 1051 else {
12d33761 1052 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1053 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1054 while (s < strend) {
1055 if (tmp ==
1056 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1057 tmp = !tmp;
1058 if ((norun || regtry(prog, s)))
1059 goto got_it;
1060 }
1061 s++;
a0ed51b3 1062 }
a0ed51b3 1063 }
6eb5f6b9 1064 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1065 goto got_it;
1066 break;
bbce6d69 1067 case NBOUNDL:
3280af22 1068 PL_reg_flags |= RF_tainted;
bbce6d69 1069 /* FALL THROUGH */
a0d0e21e 1070 case NBOUND:
ffc61ed2 1071 if (do_utf8) {
12d33761 1072 if (s == PL_bostr)
ffc61ed2
JH
1073 tmp = '\n';
1074 else {
1aa99e6b 1075 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1076
0064a8a9
JH
1077 if (s > (char*)r)
1078 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1079 }
1080 tmp = ((OP(c) == NBOUND ?
9041c2e3 1081 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1082 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1083 while (s < strend) {
1084 if (tmp == !(OP(c) == NBOUND ?
3568d838 1085 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1086 isALNUM_LC_utf8((U8*)s)))
1087 tmp = !tmp;
1088 else if ((norun || regtry(prog, s)))
1089 goto got_it;
1090 s += UTF8SKIP(s);
1091 }
a0d0e21e 1092 }
667bb95a 1093 else {
12d33761 1094 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1095 tmp = ((OP(c) == NBOUND ?
1096 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1097 while (s < strend) {
1098 if (tmp ==
1099 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1100 tmp = !tmp;
1101 else if ((norun || regtry(prog, s)))
1102 goto got_it;
1103 s++;
1104 }
a0ed51b3 1105 }
6eb5f6b9 1106 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1107 goto got_it;
1108 break;
a0d0e21e 1109 case ALNUM:
ffc61ed2 1110 if (do_utf8) {
8269fa76 1111 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1112 while (s < strend) {
3568d838 1113 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1114 if (tmp && (norun || regtry(prog, s)))
1115 goto got_it;
1116 else
1117 tmp = doevery;
1118 }
bbce6d69 1119 else
ffc61ed2
JH
1120 tmp = 1;
1121 s += UTF8SKIP(s);
bbce6d69 1122 }
bbce6d69 1123 }
ffc61ed2
JH
1124 else {
1125 while (s < strend) {
1126 if (isALNUM(*s)) {
1127 if (tmp && (norun || regtry(prog, s)))
1128 goto got_it;
1129 else
1130 tmp = doevery;
1131 }
a0ed51b3 1132 else
ffc61ed2
JH
1133 tmp = 1;
1134 s++;
a0ed51b3 1135 }
a0ed51b3
LW
1136 }
1137 break;
bbce6d69 1138 case ALNUML:
3280af22 1139 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1140 if (do_utf8) {
1141 while (s < strend) {
1142 if (isALNUM_LC_utf8((U8*)s)) {
1143 if (tmp && (norun || regtry(prog, s)))
1144 goto got_it;
1145 else
1146 tmp = doevery;
1147 }
a687059c 1148 else
ffc61ed2
JH
1149 tmp = 1;
1150 s += UTF8SKIP(s);
a0d0e21e 1151 }
a0d0e21e 1152 }
ffc61ed2
JH
1153 else {
1154 while (s < strend) {
1155 if (isALNUM_LC(*s)) {
1156 if (tmp && (norun || regtry(prog, s)))
1157 goto got_it;
1158 else
1159 tmp = doevery;
1160 }
a0ed51b3 1161 else
ffc61ed2
JH
1162 tmp = 1;
1163 s++;
a0ed51b3 1164 }
a0ed51b3
LW
1165 }
1166 break;
a0d0e21e 1167 case NALNUM:
ffc61ed2 1168 if (do_utf8) {
8269fa76 1169 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1170 while (s < strend) {
3568d838 1171 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1172 if (tmp && (norun || regtry(prog, s)))
1173 goto got_it;
1174 else
1175 tmp = doevery;
1176 }
bbce6d69 1177 else
ffc61ed2
JH
1178 tmp = 1;
1179 s += UTF8SKIP(s);
bbce6d69 1180 }
bbce6d69 1181 }
ffc61ed2
JH
1182 else {
1183 while (s < strend) {
1184 if (!isALNUM(*s)) {
1185 if (tmp && (norun || regtry(prog, s)))
1186 goto got_it;
1187 else
1188 tmp = doevery;
1189 }
a0ed51b3 1190 else
ffc61ed2
JH
1191 tmp = 1;
1192 s++;
a0ed51b3 1193 }
a0ed51b3
LW
1194 }
1195 break;
bbce6d69 1196 case NALNUML:
3280af22 1197 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1198 if (do_utf8) {
1199 while (s < strend) {
1200 if (!isALNUM_LC_utf8((U8*)s)) {
1201 if (tmp && (norun || regtry(prog, s)))
1202 goto got_it;
1203 else
1204 tmp = doevery;
1205 }
a687059c 1206 else
ffc61ed2
JH
1207 tmp = 1;
1208 s += UTF8SKIP(s);
a687059c 1209 }
a0d0e21e 1210 }
ffc61ed2
JH
1211 else {
1212 while (s < strend) {
1213 if (!isALNUM_LC(*s)) {
1214 if (tmp && (norun || regtry(prog, s)))
1215 goto got_it;
1216 else
1217 tmp = doevery;
1218 }
a0ed51b3 1219 else
ffc61ed2
JH
1220 tmp = 1;
1221 s++;
a0ed51b3 1222 }
a0ed51b3
LW
1223 }
1224 break;
a0d0e21e 1225 case SPACE:
ffc61ed2 1226 if (do_utf8) {
8269fa76 1227 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1228 while (s < strend) {
3568d838 1229 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1230 if (tmp && (norun || regtry(prog, s)))
1231 goto got_it;
1232 else
1233 tmp = doevery;
1234 }
a0d0e21e 1235 else
ffc61ed2
JH
1236 tmp = 1;
1237 s += UTF8SKIP(s);
2304df62 1238 }
a0d0e21e 1239 }
ffc61ed2
JH
1240 else {
1241 while (s < strend) {
1242 if (isSPACE(*s)) {
1243 if (tmp && (norun || regtry(prog, s)))
1244 goto got_it;
1245 else
1246 tmp = doevery;
1247 }
a0ed51b3 1248 else
ffc61ed2
JH
1249 tmp = 1;
1250 s++;
a0ed51b3 1251 }
a0ed51b3
LW
1252 }
1253 break;
bbce6d69 1254 case SPACEL:
3280af22 1255 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1256 if (do_utf8) {
1257 while (s < strend) {
1258 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1259 if (tmp && (norun || regtry(prog, s)))
1260 goto got_it;
1261 else
1262 tmp = doevery;
1263 }
bbce6d69 1264 else
ffc61ed2
JH
1265 tmp = 1;
1266 s += UTF8SKIP(s);
bbce6d69 1267 }
bbce6d69 1268 }
ffc61ed2
JH
1269 else {
1270 while (s < strend) {
1271 if (isSPACE_LC(*s)) {
1272 if (tmp && (norun || regtry(prog, s)))
1273 goto got_it;
1274 else
1275 tmp = doevery;
1276 }
a0ed51b3 1277 else
ffc61ed2
JH
1278 tmp = 1;
1279 s++;
a0ed51b3 1280 }
a0ed51b3
LW
1281 }
1282 break;
a0d0e21e 1283 case NSPACE:
ffc61ed2 1284 if (do_utf8) {
8269fa76 1285 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1286 while (s < strend) {
3568d838 1287 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1288 if (tmp && (norun || regtry(prog, s)))
1289 goto got_it;
1290 else
1291 tmp = doevery;
1292 }
a0d0e21e 1293 else
ffc61ed2
JH
1294 tmp = 1;
1295 s += UTF8SKIP(s);
a687059c 1296 }
a0d0e21e 1297 }
ffc61ed2
JH
1298 else {
1299 while (s < strend) {
1300 if (!isSPACE(*s)) {
1301 if (tmp && (norun || regtry(prog, s)))
1302 goto got_it;
1303 else
1304 tmp = doevery;
1305 }
a0ed51b3 1306 else
ffc61ed2
JH
1307 tmp = 1;
1308 s++;
a0ed51b3 1309 }
a0ed51b3
LW
1310 }
1311 break;
bbce6d69 1312 case NSPACEL:
3280af22 1313 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1314 if (do_utf8) {
1315 while (s < strend) {
1316 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1317 if (tmp && (norun || regtry(prog, s)))
1318 goto got_it;
1319 else
1320 tmp = doevery;
1321 }
bbce6d69 1322 else
ffc61ed2
JH
1323 tmp = 1;
1324 s += UTF8SKIP(s);
bbce6d69 1325 }
bbce6d69 1326 }
ffc61ed2
JH
1327 else {
1328 while (s < strend) {
1329 if (!isSPACE_LC(*s)) {
1330 if (tmp && (norun || regtry(prog, s)))
1331 goto got_it;
1332 else
1333 tmp = doevery;
1334 }
a0ed51b3 1335 else
ffc61ed2
JH
1336 tmp = 1;
1337 s++;
a0ed51b3 1338 }
a0ed51b3
LW
1339 }
1340 break;
a0d0e21e 1341 case DIGIT:
ffc61ed2 1342 if (do_utf8) {
8269fa76 1343 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1344 while (s < strend) {
3568d838 1345 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1346 if (tmp && (norun || regtry(prog, s)))
1347 goto got_it;
1348 else
1349 tmp = doevery;
1350 }
a0d0e21e 1351 else
ffc61ed2
JH
1352 tmp = 1;
1353 s += UTF8SKIP(s);
2b69d0c2 1354 }
a0d0e21e 1355 }
ffc61ed2
JH
1356 else {
1357 while (s < strend) {
1358 if (isDIGIT(*s)) {
1359 if (tmp && (norun || regtry(prog, s)))
1360 goto got_it;
1361 else
1362 tmp = doevery;
1363 }
a0ed51b3 1364 else
ffc61ed2
JH
1365 tmp = 1;
1366 s++;
a0ed51b3 1367 }
a0ed51b3
LW
1368 }
1369 break;
b8c5462f
JH
1370 case DIGITL:
1371 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1372 if (do_utf8) {
1373 while (s < strend) {
1374 if (isDIGIT_LC_utf8((U8*)s)) {
1375 if (tmp && (norun || regtry(prog, s)))
1376 goto got_it;
1377 else
1378 tmp = doevery;
1379 }
b8c5462f 1380 else
ffc61ed2
JH
1381 tmp = 1;
1382 s += UTF8SKIP(s);
b8c5462f 1383 }
b8c5462f 1384 }
ffc61ed2
JH
1385 else {
1386 while (s < strend) {
1387 if (isDIGIT_LC(*s)) {
1388 if (tmp && (norun || regtry(prog, s)))
1389 goto got_it;
1390 else
1391 tmp = doevery;
1392 }
b8c5462f 1393 else
ffc61ed2
JH
1394 tmp = 1;
1395 s++;
b8c5462f 1396 }
b8c5462f
JH
1397 }
1398 break;
a0d0e21e 1399 case NDIGIT:
ffc61ed2 1400 if (do_utf8) {
8269fa76 1401 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1402 while (s < strend) {
3568d838 1403 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1404 if (tmp && (norun || regtry(prog, s)))
1405 goto got_it;
1406 else
1407 tmp = doevery;
1408 }
a0d0e21e 1409 else
ffc61ed2
JH
1410 tmp = 1;
1411 s += UTF8SKIP(s);
a687059c 1412 }
a0d0e21e 1413 }
ffc61ed2
JH
1414 else {
1415 while (s < strend) {
1416 if (!isDIGIT(*s)) {
1417 if (tmp && (norun || regtry(prog, s)))
1418 goto got_it;
1419 else
1420 tmp = doevery;
1421 }
a0ed51b3 1422 else
ffc61ed2
JH
1423 tmp = 1;
1424 s++;
a0ed51b3 1425 }
a0ed51b3
LW
1426 }
1427 break;
b8c5462f
JH
1428 case NDIGITL:
1429 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1430 if (do_utf8) {
1431 while (s < strend) {
1432 if (!isDIGIT_LC_utf8((U8*)s)) {
1433 if (tmp && (norun || regtry(prog, s)))
1434 goto got_it;
1435 else
1436 tmp = doevery;
1437 }
b8c5462f 1438 else
ffc61ed2
JH
1439 tmp = 1;
1440 s += UTF8SKIP(s);
b8c5462f 1441 }
a0ed51b3 1442 }
ffc61ed2
JH
1443 else {
1444 while (s < strend) {
1445 if (!isDIGIT_LC(*s)) {
1446 if (tmp && (norun || regtry(prog, s)))
1447 goto got_it;
1448 else
1449 tmp = doevery;
1450 }
cf93c79d 1451 else
ffc61ed2
JH
1452 tmp = 1;
1453 s++;
b8c5462f 1454 }
b8c5462f
JH
1455 }
1456 break;
b3c9acc1 1457 default:
3c3eec57
GS
1458 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1459 break;
d6a28714 1460 }
6eb5f6b9
JH
1461 return 0;
1462 got_it:
1463 return s;
1464}
1465
1466/*
1467 - regexec_flags - match a regexp against a string
1468 */
1469I32
1470Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1471 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1472/* strend: pointer to null at end of string */
1473/* strbeg: real beginning of string */
1474/* minend: end of match must be >=minend after stringarg. */
1475/* data: May be used for some additional optimizations. */
1476/* nosave: For optimizations. */
1477{
6eb5f6b9
JH
1478 register char *s;
1479 register regnode *c;
1480 register char *startpos = stringarg;
6eb5f6b9
JH
1481 I32 minlen; /* must match at least this many chars */
1482 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1483 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1484 constant substr. */ /* CC */
1485 I32 end_shift = 0; /* Same for the end. */ /* CC */
1486 I32 scream_pos = -1; /* Internal iterator of scream. */
1487 char *scream_olds;
1488 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1489 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1490#ifdef DEBUGGING
ce333219 1491 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
2a782b5b 1492#endif
6eb5f6b9
JH
1493
1494 PL_regcc = 0;
1495
1496 cache_re(prog);
1497#ifdef DEBUGGING
aea4f609 1498 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1499#endif
1500
1501 /* Be paranoid... */
1502 if (prog == NULL || startpos == NULL) {
1503 Perl_croak(aTHX_ "NULL regexp parameter");
1504 return 0;
1505 }
1506
1507 minlen = prog->minlen;
a72c7584
JH
1508 if (strend - startpos < minlen) {
1509 DEBUG_r(PerlIO_printf(Perl_debug_log,
1510 "String too short [regexec_flags]...\n"));
1511 goto phooey;
1aa99e6b 1512 }
6eb5f6b9 1513
6eb5f6b9
JH
1514 /* Check validity of program. */
1515 if (UCHARAT(prog->program) != REG_MAGIC) {
1516 Perl_croak(aTHX_ "corrupted regexp program");
1517 }
1518
1519 PL_reg_flags = 0;
1520 PL_reg_eval_set = 0;
1521 PL_reg_maxiter = 0;
1522
1523 if (prog->reganch & ROPT_UTF8)
1524 PL_reg_flags |= RF_utf8;
1525
1526 /* Mark beginning of line for ^ and lookbehind. */
1527 PL_regbol = startpos;
1528 PL_bostr = strbeg;
1529 PL_reg_sv = sv;
1530
1531 /* Mark end of line for $ (and such) */
1532 PL_regeol = strend;
1533
1534 /* see how far we have to get to not match where we matched before */
1535 PL_regtill = startpos+minend;
1536
1537 /* We start without call_cc context. */
1538 PL_reg_call_cc = 0;
1539
1540 /* If there is a "must appear" string, look for it. */
1541 s = startpos;
1542
1543 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1544 MAGIC *mg;
1545
1546 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1547 PL_reg_ganch = startpos;
1548 else if (sv && SvTYPE(sv) >= SVt_PVMG
1549 && SvMAGIC(sv)
14befaf4
DM
1550 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1551 && mg->mg_len >= 0) {
6eb5f6b9
JH
1552 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1553 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1554 if (s > PL_reg_ganch)
6eb5f6b9
JH
1555 goto phooey;
1556 s = PL_reg_ganch;
1557 }
1558 }
1559 else /* pos() not defined */
1560 PL_reg_ganch = strbeg;
1561 }
1562
699c3c34
JH
1563 if (do_utf8 == (UTF!=0) &&
1564 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1565 re_scream_pos_data d;
1566
1567 d.scream_olds = &scream_olds;
1568 d.scream_pos = &scream_pos;
1569 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1570 if (!s) {
1571 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1572 goto phooey; /* not present */
3fa9c3d7 1573 }
6eb5f6b9
JH
1574 }
1575
2a782b5b 1576 DEBUG_r({
df1ffd02
JH
1577 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1578 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1579 if (!PL_colorset)
1580 reginitcolors();
1581 PerlIO_printf(Perl_debug_log,
1582 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1583 PL_colors[4],PL_colors[5],PL_colors[0],
1584 prog->precomp,
1585 PL_colors[1],
1586 (strlen(prog->precomp) > 60 ? "..." : ""),
1587 PL_colors[0],
1588 (int)(len > 60 ? 60 : len),
1589 s, PL_colors[1],
1590 (len > 60 ? "..." : "")
1591 );
1592 });
6eb5f6b9
JH
1593
1594 /* Simplest case: anchored match need be tried only once. */
1595 /* [unless only anchor is BOL and multiline is set] */
1596 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1597 if (s == startpos && regtry(prog, startpos))
1598 goto got_it;
1599 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1600 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1601 {
1602 char *end;
1603
1604 if (minlen)
1605 dontbother = minlen - 1;
1aa99e6b 1606 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1607 /* for multiline we only have to try after newlines */
1608 if (prog->check_substr) {
1609 if (s == startpos)
1610 goto after_try;
1611 while (1) {
1612 if (regtry(prog, s))
1613 goto got_it;
1614 after_try:
1615 if (s >= end)
1616 goto phooey;
1617 if (prog->reganch & RE_USE_INTUIT) {
1618 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1619 if (!s)
1620 goto phooey;
1621 }
1622 else
1623 s++;
1624 }
1625 } else {
1626 if (s > startpos)
1627 s--;
1628 while (s < end) {
1629 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1630 if (regtry(prog, s))
1631 goto got_it;
1632 }
1633 }
1634 }
1635 }
1636 goto phooey;
1637 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1638 if (regtry(prog, PL_reg_ganch))
1639 goto got_it;
1640 goto phooey;
1641 }
1642
1643 /* Messy cases: unanchored match. */
9041c2e3 1644 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1645 /* we have /x+whatever/ */
1646 /* it must be a one character string (XXXX Except UTF?) */
1647 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1648#ifdef DEBUGGING
1649 int did_match = 0;
1650#endif
1651
1aa99e6b 1652 if (do_utf8) {
6eb5f6b9
JH
1653 while (s < strend) {
1654 if (*s == ch) {
bf93d4cc 1655 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1656 if (regtry(prog, s)) goto got_it;
1657 s += UTF8SKIP(s);
1658 while (s < strend && *s == ch)
1659 s += UTF8SKIP(s);
1660 }
1661 s += UTF8SKIP(s);
1662 }
1663 }
1664 else {
1665 while (s < strend) {
1666 if (*s == ch) {
bf93d4cc 1667 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1668 if (regtry(prog, s)) goto got_it;
1669 s++;
1670 while (s < strend && *s == ch)
1671 s++;
1672 }
1673 s++;
1674 }
1675 }
b7953727 1676 DEBUG_r(if (!did_match)
bf93d4cc 1677 PerlIO_printf(Perl_debug_log,
b7953727
JH
1678 "Did not find anchored character...\n")
1679 );
6eb5f6b9
JH
1680 }
1681 /*SUPPRESS 560*/
1aa99e6b
IH
1682 else if (do_utf8 == (UTF!=0) &&
1683 (prog->anchored_substr != Nullsv
9041c2e3 1684 || (prog->float_substr != Nullsv
1aa99e6b 1685 && prog->float_max_offset < strend - s))) {
9041c2e3 1686 SV *must = prog->anchored_substr
6eb5f6b9 1687 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1688 I32 back_max =
6eb5f6b9 1689 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1690 I32 back_min =
6eb5f6b9 1691 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1692 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1693 -(I32)(CHR_SVLEN(must)
1aa99e6b 1694 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1695 char *last1; /* Last position checked before */
bf93d4cc
GS
1696#ifdef DEBUGGING
1697 int did_match = 0;
1698#endif
6eb5f6b9
JH
1699
1700 if (s > PL_bostr)
1701 last1 = HOPc(s, -1);
1702 else
1703 last1 = s - 1; /* bogus */
1704
1705 /* XXXX check_substr already used to find `s', can optimize if
1706 check_substr==must. */
1707 scream_pos = -1;
1708 dontbother = end_shift;
1709 strend = HOPc(strend, -dontbother);
1710 while ( (s <= last) &&
9041c2e3 1711 ((flags & REXEC_SCREAM)
1aa99e6b 1712 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1713 end_shift, &scream_pos, 0))
1aa99e6b 1714 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1715 (unsigned char*)strend, must,
6eb5f6b9 1716 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1717 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1718 if (HOPc(s, -back_max) > last1) {
1719 last1 = HOPc(s, -back_min);
1720 s = HOPc(s, -back_max);
1721 }
1722 else {
1723 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1724
1725 last1 = HOPc(s, -back_min);
1726 s = t;
1727 }
1aa99e6b 1728 if (do_utf8) {
6eb5f6b9
JH
1729 while (s <= last1) {
1730 if (regtry(prog, s))
1731 goto got_it;
1732 s += UTF8SKIP(s);
1733 }
1734 }
1735 else {
1736 while (s <= last1) {
1737 if (regtry(prog, s))
1738 goto got_it;
1739 s++;
1740 }
1741 }
1742 }
b7953727
JH
1743 DEBUG_r(if (!did_match)
1744 PerlIO_printf(Perl_debug_log,
1745 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1746 ((must == prog->anchored_substr)
1747 ? "anchored" : "floating"),
1748 PL_colors[0],
1749 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1750 SvPVX(must),
b7953727
JH
1751 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1752 );
6eb5f6b9
JH
1753 goto phooey;
1754 }
155aba94 1755 else if ((c = prog->regstclass)) {
66e933ab
GS
1756 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1757 /* don't bother with what can't match */
6eb5f6b9 1758 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1759 DEBUG_r({
1760 SV *prop = sv_newmortal();
1761 regprop(prop, c);
2a782b5b 1762 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1763 });
6eb5f6b9
JH
1764 if (find_byclass(prog, c, s, strend, startpos, 0))
1765 goto got_it;
bf93d4cc 1766 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1767 }
1768 else {
1769 dontbother = 0;
1770 if (prog->float_substr != Nullsv) { /* Trim the end. */
1771 char *last;
d6a28714
JH
1772
1773 if (flags & REXEC_SCREAM) {
1774 last = screaminstr(sv, prog->float_substr, s - strbeg,
1775 end_shift, &scream_pos, 1); /* last one */
1776 if (!last)
ffc61ed2 1777 last = scream_olds; /* Only one occurrence. */
b8c5462f 1778 }
d6a28714
JH
1779 else {
1780 STRLEN len;
1781 char *little = SvPV(prog->float_substr, len);
1782
1783 if (SvTAIL(prog->float_substr)) {
1784 if (memEQ(strend - len + 1, little, len - 1))
1785 last = strend - len + 1;
1786 else if (!PL_multiline)
9041c2e3 1787 last = memEQ(strend - len, little, len)
d6a28714 1788 ? strend - len : Nullch;
b8c5462f 1789 else
d6a28714
JH
1790 goto find_last;
1791 } else {
1792 find_last:
9041c2e3 1793 if (len)
d6a28714 1794 last = rninstr(s, strend, little, little + len);
b8c5462f 1795 else
d6a28714 1796 last = strend; /* matching `$' */
b8c5462f 1797 }
b8c5462f 1798 }
bf93d4cc
GS
1799 if (last == NULL) {
1800 DEBUG_r(PerlIO_printf(Perl_debug_log,
1801 "%sCan't trim the tail, match fails (should not happen)%s\n",
1802 PL_colors[4],PL_colors[5]));
1803 goto phooey; /* Should not happen! */
1804 }
d6a28714
JH
1805 dontbother = strend - last + prog->float_min_offset;
1806 }
1807 if (minlen && (dontbother < minlen))
1808 dontbother = minlen - 1;
1809 strend -= dontbother; /* this one's always in bytes! */
1810 /* We don't know much -- general case. */
1aa99e6b 1811 if (do_utf8) {
d6a28714
JH
1812 for (;;) {
1813 if (regtry(prog, s))
1814 goto got_it;
1815 if (s >= strend)
1816 break;
b8c5462f 1817 s += UTF8SKIP(s);
d6a28714
JH
1818 };
1819 }
1820 else {
1821 do {
1822 if (regtry(prog, s))
1823 goto got_it;
1824 } while (s++ < strend);
1825 }
1826 }
1827
1828 /* Failure. */
1829 goto phooey;
1830
1831got_it:
1832 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1833
1834 if (PL_reg_eval_set) {
1835 /* Preserve the current value of $^R */
1836 if (oreplsv != GvSV(PL_replgv))
1837 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1838 restored, the value remains
1839 the same. */
acfe0abc 1840 restore_pos(aTHX_ 0);
d6a28714
JH
1841 }
1842
1843 /* make sure $`, $&, $', and $digit will work later */
1844 if ( !(flags & REXEC_NOT_FIRST) ) {
1845 if (RX_MATCH_COPIED(prog)) {
1846 Safefree(prog->subbeg);
1847 RX_MATCH_COPIED_off(prog);
1848 }
1849 if (flags & REXEC_COPY_STR) {
1850 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1851
1852 s = savepvn(strbeg, i);
1853 prog->subbeg = s;
1854 prog->sublen = i;
1855 RX_MATCH_COPIED_on(prog);
1856 }
1857 else {
1858 prog->subbeg = strbeg;
1859 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1860 }
1861 }
9041c2e3 1862
d6a28714
JH
1863 return 1;
1864
1865phooey:
bf93d4cc
GS
1866 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1867 PL_colors[4],PL_colors[5]));
d6a28714 1868 if (PL_reg_eval_set)
acfe0abc 1869 restore_pos(aTHX_ 0);
d6a28714
JH
1870 return 0;
1871}
1872
1873/*
1874 - regtry - try match at specific point
1875 */
1876STATIC I32 /* 0 failure, 1 success */
1877S_regtry(pTHX_ regexp *prog, char *startpos)
1878{
d6a28714
JH
1879 register I32 i;
1880 register I32 *sp;
1881 register I32 *ep;
1882 CHECKPOINT lastcp;
1883
02db2b7b
IZ
1884#ifdef DEBUGGING
1885 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1886#endif
d6a28714
JH
1887 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1888 MAGIC *mg;
1889
1890 PL_reg_eval_set = RS_init;
1891 DEBUG_r(DEBUG_s(
b900a521
JH
1892 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1893 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1894 ));
e8347627 1895 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1896 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1897 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1898 SAVETMPS;
1899 /* Apparently this is not needed, judging by wantarray. */
e8347627 1900 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1901 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1902
1903 if (PL_reg_sv) {
1904 /* Make $_ available to executed code. */
1905 if (PL_reg_sv != DEFSV) {
4d1ff10f 1906 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1907 SAVESPTR(DEFSV);
1908 DEFSV = PL_reg_sv;
b8c5462f 1909 }
d6a28714 1910
9041c2e3 1911 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1912 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1913 /* prepare for quick setting of pos */
14befaf4
DM
1914 sv_magic(PL_reg_sv, (SV*)0,
1915 PERL_MAGIC_regex_global, Nullch, 0);
1916 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1917 mg->mg_len = -1;
b8c5462f 1918 }
d6a28714
JH
1919 PL_reg_magic = mg;
1920 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1921 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1922 }
09687e5a 1923 if (!PL_reg_curpm) {
0f79a09d 1924 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1925#ifdef USE_ITHREADS
1926 {
1927 SV* repointer = newSViv(0);
577e12cc 1928 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1929 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1930 av_push(PL_regex_padav,repointer);
1931 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1932 PL_regex_pad = AvARRAY(PL_regex_padav);
1933 }
1934#endif
1935 }
aaa362c4 1936 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1937 PL_reg_oldcurpm = PL_curpm;
1938 PL_curpm = PL_reg_curpm;
1939 if (RX_MATCH_COPIED(prog)) {
1940 /* Here is a serious problem: we cannot rewrite subbeg,
1941 since it may be needed if this match fails. Thus
1942 $` inside (?{}) could fail... */
1943 PL_reg_oldsaved = prog->subbeg;
1944 PL_reg_oldsavedlen = prog->sublen;
1945 RX_MATCH_COPIED_off(prog);
1946 }
1947 else
1948 PL_reg_oldsaved = Nullch;
1949 prog->subbeg = PL_bostr;
1950 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1951 }
1952 prog->startp[0] = startpos - PL_bostr;
1953 PL_reginput = startpos;
1954 PL_regstartp = prog->startp;
1955 PL_regendp = prog->endp;
1956 PL_reglastparen = &prog->lastparen;
a01268b5 1957 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1958 prog->lastparen = 0;
1959 PL_regsize = 0;
1960 DEBUG_r(PL_reg_starttry = startpos);
1961 if (PL_reg_start_tmpl <= prog->nparens) {
1962 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1963 if(PL_reg_start_tmp)
1964 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1965 else
1966 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1967 }
1968
128e8167
JH
1969#ifdef DEBUGGING
1970 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
1971 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
1972 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
1973#endif
1974
d6a28714
JH
1975 /* XXXX What this code is doing here?!!! There should be no need
1976 to do this again and again, PL_reglastparen should take care of
3dd2943c 1977 this! --ilya*/
dafc8851
JH
1978
1979 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1980 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1981 * PL_reglastparen), is not needed at all by the test suite
1982 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1983 * enough, for building DynaLoader, or otherwise this
1984 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1985 * will happen. Meanwhile, this code *is* needed for the
1986 * above-mentioned test suite tests to succeed. The common theme
1987 * on those tests seems to be returning null fields from matches.
1988 * --jhi */
dafc8851 1989#if 1
d6a28714
JH
1990 sp = prog->startp;
1991 ep = prog->endp;
1992 if (prog->nparens) {
09e8ae3b 1993 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1994 *++sp = -1;
1995 *++ep = -1;
1996 }
1997 }
dafc8851 1998#endif
02db2b7b 1999 REGCP_SET(lastcp);
d6a28714
JH
2000 if (regmatch(prog->program + 1)) {
2001 prog->endp[0] = PL_reginput - PL_bostr;
2002 return 1;
2003 }
02db2b7b 2004 REGCP_UNWIND(lastcp);
d6a28714
JH
2005 return 0;
2006}
2007
02db2b7b
IZ
2008#define RE_UNWIND_BRANCH 1
2009#define RE_UNWIND_BRANCHJ 2
2010
2011union re_unwind_t;
2012
2013typedef struct { /* XX: makes sense to enlarge it... */
2014 I32 type;
2015 I32 prev;
2016 CHECKPOINT lastcp;
2017} re_unwind_generic_t;
2018
2019typedef struct {
2020 I32 type;
2021 I32 prev;
2022 CHECKPOINT lastcp;
2023 I32 lastparen;
2024 regnode *next;
2025 char *locinput;
2026 I32 nextchr;
2027#ifdef DEBUGGING
2028 int regindent;
2029#endif
2030} re_unwind_branch_t;
2031
2032typedef union re_unwind_t {
2033 I32 type;
2034 re_unwind_generic_t generic;
2035 re_unwind_branch_t branch;
2036} re_unwind_t;
2037
8ba1375e
MJD
2038#define sayYES goto yes
2039#define sayNO goto no
2040#define sayYES_FINAL goto yes_final
2041#define sayYES_LOUD goto yes_loud
2042#define sayNO_FINAL goto no_final
2043#define sayNO_SILENT goto do_no
2044#define saySAME(x) if (x) goto yes; else goto no
2045
2046#define REPORT_CODE_OFF 24
2047
d6a28714
JH
2048/*
2049 - regmatch - main matching routine
2050 *
2051 * Conceptually the strategy is simple: check to see whether the current
2052 * node matches, call self recursively to see whether the rest matches,
2053 * and then act accordingly. In practice we make some effort to avoid
2054 * recursion, in particular by going through "ordinary" nodes (that don't
2055 * need to know whether the rest of the match failed) by a loop instead of
2056 * by recursion.
2057 */
2058/* [lwall] I've hoisted the register declarations to the outer block in order to
2059 * maybe save a little bit of pushing and popping on the stack. It also takes
2060 * advantage of machines that use a register save mask on subroutine entry.
2061 */
2062STATIC I32 /* 0 failure, 1 success */
2063S_regmatch(pTHX_ regnode *prog)
2064{
d6a28714
JH
2065 register regnode *scan; /* Current node. */
2066 regnode *next; /* Next node. */
2067 regnode *inner; /* Next node in internal branch. */
2068 register I32 nextchr; /* renamed nextchr - nextchar colides with
2069 function of same name */
2070 register I32 n; /* no or next */
b7953727
JH
2071 register I32 ln = 0; /* len or last */
2072 register char *s = Nullch; /* operand or save */
d6a28714 2073 register char *locinput = PL_reginput;
b7953727 2074 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2075 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2076 I32 unwind = 0;
b7953727 2077#if 0
02db2b7b 2078 I32 firstcp = PL_savestack_ix;
b7953727 2079#endif
53c4c00c 2080 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2081#ifdef DEBUGGING
ce333219
JH
2082 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2083 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2084 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2085#endif
02db2b7b 2086
d6a28714
JH
2087#ifdef DEBUGGING
2088 PL_regindent++;
2089#endif
2090
2091 /* Note that nextchr is a byte even in UTF */
2092 nextchr = UCHARAT(locinput);
2093 scan = prog;
2094 while (scan != NULL) {
8ba1375e 2095
2a782b5b 2096 DEBUG_r( {
d6a28714
JH
2097 SV *prop = sv_newmortal();
2098 int docolor = *PL_colors[0];
2099 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2100 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2101 /* The part of the string before starttry has one color
2102 (pref0_len chars), between starttry and current
2103 position another one (pref_len - pref0_len chars),
2104 after the current position the third one.
2105 We assume that pref0_len <= pref_len, otherwise we
2106 decrease pref0_len. */
9041c2e3 2107 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2108 ? (5 + taill) - l : locinput - PL_bostr;
2109 int pref0_len;
d6a28714 2110
df1ffd02 2111 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2112 pref_len++;
2113 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2114 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2115 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2116 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2117 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2118 l--;
d6a28714
JH
2119 if (pref0_len < 0)
2120 pref0_len = 0;
2121 if (pref0_len > pref_len)
2122 pref0_len = pref_len;
2123 regprop(prop, scan);
2a782b5b
JH
2124 {
2125 char *s0 =
df1ffd02 2126 do_utf8 ?
2a782b5b
JH
2127 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2128 pref0_len, 60, 0) :
2129 locinput - pref_len;
df1ffd02
JH
2130 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2131 char *s1 = do_utf8 ?
2a782b5b
JH
2132 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2133 pref_len - pref0_len, 60, 0) :
2134 locinput - pref_len + pref0_len;
df1ffd02
JH
2135 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2136 char *s2 = do_utf8 ?
2a782b5b
JH
2137 pv_uni_display(dsv2, (U8*)locinput,
2138 PL_regeol - locinput, 60, 0) :
2139 locinput;
df1ffd02 2140 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2141 PerlIO_printf(Perl_debug_log,
2142 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2143 (IV)(locinput - PL_bostr),
2144 PL_colors[4],
2145 len0, s0,
2146 PL_colors[5],
2147 PL_colors[2],
2148 len1, s1,
2149 PL_colors[3],
2150 (docolor ? "" : "> <"),
2151 PL_colors[0],
2152 len2, s2,
2153 PL_colors[1],
2154 15 - l - pref_len + 1,
2155 "",
2156 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2157 SvPVX(prop));
2158 }
2159 });
d6a28714
JH
2160
2161 next = scan + NEXT_OFF(scan);
2162 if (next == scan)
2163 next = NULL;
2164
2165 switch (OP(scan)) {
2166 case BOL:
12d33761
HS
2167 if (locinput == PL_bostr || (PL_multiline &&
2168 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2169 {
2170 /* regtill = regbol; */
b8c5462f
JH
2171 break;
2172 }
d6a28714
JH
2173 sayNO;
2174 case MBOL:
12d33761
HS
2175 if (locinput == PL_bostr ||
2176 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2177 {
b8c5462f
JH
2178 break;
2179 }
d6a28714
JH
2180 sayNO;
2181 case SBOL:
c2a73568 2182 if (locinput == PL_bostr)
b8c5462f 2183 break;
d6a28714
JH
2184 sayNO;
2185 case GPOS:
2186 if (locinput == PL_reg_ganch)
2187 break;
2188 sayNO;
2189 case EOL:
2190 if (PL_multiline)
2191 goto meol;
2192 else
2193 goto seol;
2194 case MEOL:
2195 meol:
2196 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2197 sayNO;
b8c5462f 2198 break;
d6a28714
JH
2199 case SEOL:
2200 seol:
2201 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2202 sayNO;
d6a28714 2203 if (PL_regeol - locinput > 1)
b8c5462f 2204 sayNO;
b8c5462f 2205 break;
d6a28714
JH
2206 case EOS:
2207 if (PL_regeol != locinput)
b8c5462f 2208 sayNO;
d6a28714 2209 break;
ffc61ed2 2210 case SANY:
d6a28714 2211 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2212 sayNO;
f33976b4
DB
2213 if (do_utf8) {
2214 locinput += PL_utf8skip[nextchr];
2215 if (locinput > PL_regeol)
2216 sayNO;
2217 nextchr = UCHARAT(locinput);
2218 }
2219 else
2220 nextchr = UCHARAT(++locinput);
2221 break;
2222 case CANY:
2223 if (!nextchr && locinput >= PL_regeol)
2224 sayNO;
b8c5462f 2225 nextchr = UCHARAT(++locinput);
a0d0e21e 2226 break;
ffc61ed2 2227 case REG_ANY:
1aa99e6b
IH
2228 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2229 sayNO;
2230 if (do_utf8) {
b8c5462f 2231 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2232 if (locinput > PL_regeol)
2233 sayNO;
a0ed51b3 2234 nextchr = UCHARAT(locinput);
a0ed51b3 2235 }
1aa99e6b
IH
2236 else
2237 nextchr = UCHARAT(++locinput);
a0ed51b3 2238 break;
d6a28714 2239 case EXACT:
cd439c50
IZ
2240 s = STRING(scan);
2241 ln = STR_LEN(scan);
1aa99e6b 2242 if (do_utf8 != (UTF!=0)) {
bc517b45 2243 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2244 char *l = locinput;
2245 char *e = s + ln;
bc517b45 2246 STRLEN ulen;
a72c7584 2247
5ff6fc6d
JH
2248 if (do_utf8) {
2249 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2250 while (s < e) {
2251 if (l >= PL_regeol)
5ff6fc6d
JH
2252 sayNO;
2253 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2254 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2255 sayNO;
bc517b45 2256 l += ulen;
5ff6fc6d 2257 s ++;
1aa99e6b 2258 }
5ff6fc6d
JH
2259 }
2260 else {
2261 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2262 while (s < e) {
2263 if (l >= PL_regeol)
2264 sayNO;
5ff6fc6d 2265 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2266 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2267 sayNO;
bc517b45 2268 s += ulen;
a72c7584 2269 l ++;
1aa99e6b 2270 }
5ff6fc6d 2271 }
1aa99e6b
IH
2272 locinput = l;
2273 nextchr = UCHARAT(locinput);
2274 break;
2275 }
bc517b45 2276 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2277 /* Inline the first character, for speed. */
2278 if (UCHARAT(s) != nextchr)
2279 sayNO;
2280 if (PL_regeol - locinput < ln)
2281 sayNO;
2282 if (ln > 1 && memNE(s, locinput, ln))
2283 sayNO;
2284 locinput += ln;
2285 nextchr = UCHARAT(locinput);
2286 break;
2287 case EXACTFL:
b8c5462f
JH
2288 PL_reg_flags |= RF_tainted;
2289 /* FALL THROUGH */
d6a28714 2290 case EXACTF:
cd439c50
IZ
2291 s = STRING(scan);
2292 ln = STR_LEN(scan);
d6a28714 2293
bc517b45 2294 {
d6a28714 2295 char *l = locinput;
bc517b45
JH
2296 char *e = s + ln;
2297 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
2298
2299 if (do_utf8 != (UTF!=0)) {
2300 /* The target and the pattern have differing utf8ness. */
2301 STRLEN ulen1, ulen2;
2302 UV cs, cl;
2303
2304 if (do_utf8) {
2305 /* The target is utf8, the pattern is not utf8. */
2306 while (s < e) {
2307 if (l >= PL_regeol)
2308 sayNO;
2309
2310 cs = to_uni_fold(NATIVE_TO_UNI(*(U8*)s),
2311 (U8*)s, &ulen1);
2312 cl = utf8_to_uvchr((U8*)l, &ulen2);
2313
2314 if (cs != cl) {
2315 cl = to_uni_fold(cl, (U8*)l, &ulen2);
2316 if (ulen1 != ulen2 || cs != cl)
2317 sayNO;
2318 }
2319 l += ulen1;
2320 s ++;
2321 }
2322 }
2323 else {
2324 /* The target is not utf8, the pattern is utf8. */
2325 while (s < e) {
2326 if (l >= PL_regeol)
2327 sayNO;
2328
2329 cs = utf8_to_uvchr((U8*)s, &ulen1);
2330
2331 cl = to_uni_fold(NATIVE_TO_UNI(*(U8*)l),
2332 (U8*)l, &ulen2);
2333
2334 if (cs != cl) {
2335 cs = to_uni_fold(cs, (U8*)s, &ulen1);
2336 if (ulen1 != ulen2 || cs != cl)
2337 sayNO;
2338 }
2339 l ++;
2340 s += ulen1;
2341 }
2342 }
2343 locinput = l;
2344 nextchr = UCHARAT(locinput);
2345 break;
2346 }
2347
2348 if (do_utf8 && UTF) {
2349 /* Both the target and the pattern are utf8. */
2350 STRLEN ulen;
2351
2352 while (s < e) {
2353 if (l >= PL_regeol)
2354 sayNO;
2355 if (UTF8SKIP(s) != UTF8SKIP(l) ||
2356 memNE(s, (char*)l, UTF8SKIP(s))) {
2357 to_utf8_fold((U8*)l, tmpbuf, &ulen);
2358 if (UTF8SKIP(s) != ulen ||
2359 memNE(s, (char*)tmpbuf, ulen))
2360 sayNO;
2361 }
2362 l += UTF8SKIP(l);
2363 s += UTF8SKIP(s);
2364 }
2365 locinput = l;
2366 nextchr = UCHARAT(locinput);
2367 break;
b8c5462f 2368 }
a0ed51b3 2369 }
d6a28714 2370
bc517b45
JH
2371 /* Neither the target and the pattern are utf8. */
2372
d6a28714
JH
2373 /* Inline the first character, for speed. */
2374 if (UCHARAT(s) != nextchr &&
2375 UCHARAT(s) != ((OP(scan) == EXACTF)
2376 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2377 sayNO;
d6a28714 2378 if (PL_regeol - locinput < ln)
b8c5462f 2379 sayNO;
d6a28714
JH
2380 if (ln > 1 && (OP(scan) == EXACTF
2381 ? ibcmp(s, locinput, ln)
2382 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2383 sayNO;
d6a28714
JH
2384 locinput += ln;
2385 nextchr = UCHARAT(locinput);
a0d0e21e 2386 break;
d6a28714 2387 case ANYOF:
ffc61ed2
JH
2388 if (do_utf8) {
2389 if (!reginclass(scan, (U8*)locinput, do_utf8))
2390 sayNO;
2391 if (locinput >= PL_regeol)
2392 sayNO;
2393 locinput += PL_utf8skip[nextchr];
b8c5462f 2394 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2395 }
2396 else {
2397 if (nextchr < 0)
2398 nextchr = UCHARAT(locinput);
2399 if (!reginclass(scan, (U8*)locinput, do_utf8))
2400 sayNO;
2401 if (!nextchr && locinput >= PL_regeol)
2402 sayNO;
2403 nextchr = UCHARAT(++locinput);
2404 }
b8c5462f 2405 break;
d6a28714 2406 case ALNUML:
b8c5462f
JH
2407 PL_reg_flags |= RF_tainted;
2408 /* FALL THROUGH */
d6a28714 2409 case ALNUM:
b8c5462f 2410 if (!nextchr)
4633a7c4 2411 sayNO;
ffc61ed2 2412 if (do_utf8) {
ad24be35 2413 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2414 if (!(OP(scan) == ALNUM
3568d838 2415 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2416 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2417 {
2418 sayNO;
a0ed51b3 2419 }
b8c5462f 2420 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2421 nextchr = UCHARAT(locinput);
2422 break;
2423 }
ffc61ed2 2424 if (!(OP(scan) == ALNUM
d6a28714 2425 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2426 sayNO;
b8c5462f 2427 nextchr = UCHARAT(++locinput);
a0d0e21e 2428 break;
d6a28714 2429 case NALNUML:
b8c5462f
JH
2430 PL_reg_flags |= RF_tainted;
2431 /* FALL THROUGH */
d6a28714
JH
2432 case NALNUM:
2433 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2434 sayNO;
ffc61ed2 2435 if (do_utf8) {
8269fa76 2436 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2437 if (OP(scan) == NALNUM
3568d838 2438 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2439 : isALNUM_LC_utf8((U8*)locinput))
2440 {
b8c5462f 2441 sayNO;
d6a28714 2442 }
b8c5462f
JH
2443 locinput += PL_utf8skip[nextchr];
2444 nextchr = UCHARAT(locinput);
2445 break;
2446 }
ffc61ed2 2447 if (OP(scan) == NALNUM
d6a28714 2448 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2449 sayNO;
76e3520e 2450 nextchr = UCHARAT(++locinput);
a0d0e21e 2451 break;
d6a28714
JH
2452 case BOUNDL:
2453 case NBOUNDL:
3280af22 2454 PL_reg_flags |= RF_tainted;
bbce6d69 2455 /* FALL THROUGH */
d6a28714
JH
2456 case BOUND:
2457 case NBOUND:
2458 /* was last char in word? */
ffc61ed2 2459 if (do_utf8) {
12d33761
HS
2460 if (locinput == PL_bostr)
2461 ln = '\n';
ffc61ed2
JH
2462 else {
2463 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2464
2b9d42f0 2465 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2466 }
2467 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2468 ln = isALNUM_uni(ln);
8269fa76 2469 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2470 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2471 }
2472 else {
9041c2e3 2473 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2474 n = isALNUM_LC_utf8((U8*)locinput);
2475 }
a0ed51b3 2476 }
d6a28714 2477 else {
12d33761
HS
2478 ln = (locinput != PL_bostr) ?
2479 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2480 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2481 ln = isALNUM(ln);
2482 n = isALNUM(nextchr);
2483 }
2484 else {
2485 ln = isALNUM_LC(ln);
2486 n = isALNUM_LC(nextchr);
2487 }
d6a28714 2488 }
ffc61ed2
JH
2489 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2490 OP(scan) == BOUNDL))
2491 sayNO;
a0ed51b3 2492 break;
d6a28714 2493 case SPACEL:
3280af22 2494 PL_reg_flags |= RF_tainted;
bbce6d69 2495 /* FALL THROUGH */
d6a28714 2496 case SPACE:
9442cb0e 2497 if (!nextchr)
4633a7c4 2498 sayNO;
1aa99e6b 2499 if (do_utf8) {
fd400ab9 2500 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2501 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2502 if (!(OP(scan) == SPACE
3568d838 2503 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2504 : isSPACE_LC_utf8((U8*)locinput)))
2505 {
2506 sayNO;
2507 }
2508 locinput += PL_utf8skip[nextchr];
2509 nextchr = UCHARAT(locinput);
2510 break;
d6a28714 2511 }
ffc61ed2
JH
2512 if (!(OP(scan) == SPACE
2513 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2514 sayNO;
2515 nextchr = UCHARAT(++locinput);
2516 }
2517 else {
2518 if (!(OP(scan) == SPACE
2519 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2520 sayNO;
2521 nextchr = UCHARAT(++locinput);
a0ed51b3 2522 }
a0ed51b3 2523 break;
d6a28714 2524 case NSPACEL:
3280af22 2525 PL_reg_flags |= RF_tainted;
bbce6d69 2526 /* FALL THROUGH */
d6a28714 2527 case NSPACE:
9442cb0e 2528 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2529 sayNO;
1aa99e6b 2530 if (do_utf8) {
8269fa76 2531 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2532 if (OP(scan) == NSPACE
3568d838 2533 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2534 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2535 {
2536 sayNO;
2537 }
2538 locinput += PL_utf8skip[nextchr];
2539 nextchr = UCHARAT(locinput);
2540 break;
a0ed51b3 2541 }
ffc61ed2 2542 if (OP(scan) == NSPACE
d6a28714 2543 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2544 sayNO;
76e3520e 2545 nextchr = UCHARAT(++locinput);
a0d0e21e 2546 break;
d6a28714 2547 case DIGITL:
a0ed51b3
LW
2548 PL_reg_flags |= RF_tainted;
2549 /* FALL THROUGH */
d6a28714 2550 case DIGIT:
9442cb0e 2551 if (!nextchr)
a0ed51b3 2552 sayNO;
1aa99e6b 2553 if (do_utf8) {
8269fa76 2554 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2555 if (!(OP(scan) == DIGIT
3568d838 2556 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2557 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2558 {
a0ed51b3 2559 sayNO;
dfe13c55 2560 }
6f06b55f 2561 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2562 nextchr = UCHARAT(locinput);
2563 break;
2564 }
ffc61ed2 2565 if (!(OP(scan) == DIGIT
9442cb0e 2566 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2567 sayNO;
2568 nextchr = UCHARAT(++locinput);
2569 break;
d6a28714 2570 case NDIGITL:
b8c5462f
JH
2571 PL_reg_flags |= RF_tainted;
2572 /* FALL THROUGH */
d6a28714 2573 case NDIGIT:
9442cb0e 2574 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2575 sayNO;
1aa99e6b 2576 if (do_utf8) {
8269fa76 2577 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2578 if (OP(scan) == NDIGIT
3568d838 2579 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2580 : isDIGIT_LC_utf8((U8*)locinput))
2581 {
a0ed51b3 2582 sayNO;
9442cb0e 2583 }
6f06b55f 2584 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2585 nextchr = UCHARAT(locinput);
2586 break;
2587 }
ffc61ed2 2588 if (OP(scan) == NDIGIT
9442cb0e 2589 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2590 sayNO;
2591 nextchr = UCHARAT(++locinput);
2592 break;
2593 case CLUMP:
8269fa76 2594 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2595 if (locinput >= PL_regeol ||
2596 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2597 sayNO;
6f06b55f 2598 locinput += PL_utf8skip[nextchr];
3568d838
JH
2599 while (locinput < PL_regeol &&
2600 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2601 locinput += UTF8SKIP(locinput);
2602 if (locinput > PL_regeol)
2603 sayNO;
2604 nextchr = UCHARAT(locinput);
2605 break;
c8756f30 2606 case REFFL:
3280af22 2607 PL_reg_flags |= RF_tainted;
c8756f30 2608 /* FALL THROUGH */
c277df42 2609 case REF:
c8756f30 2610 case REFF:
c277df42 2611 n = ARG(scan); /* which paren pair */
cf93c79d 2612 ln = PL_regstartp[n];
2c2d71f5 2613 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2614 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2615 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2616 if (ln == PL_regendp[n])
a0d0e21e 2617 break;
a0ed51b3 2618
cf93c79d 2619 s = PL_bostr + ln;
1aa99e6b 2620 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2621 char *l = locinput;
cf93c79d 2622 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2623 /*
2624 * Note that we can't do the "other character" lookup trick as
2625 * in the 8-bit case (no pun intended) because in Unicode we
2626 * have to map both upper and title case to lower case.
2627 */
2628 if (OP(scan) == REFF) {
a2a2844f 2629 STRLEN ulen1, ulen2;
e7ae6809
JH
2630 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2631 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2632 while (s < e) {
2633 if (l >= PL_regeol)
2634 sayNO;
a2a2844f
JH
2635 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2636 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2637 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2638 sayNO;
a2a2844f
JH
2639 s += ulen1;
2640 l += ulen2;
a0ed51b3
LW
2641 }
2642 }
2643 locinput = l;
2644 nextchr = UCHARAT(locinput);
2645 break;
2646 }
2647
a0d0e21e 2648 /* Inline the first character, for speed. */
76e3520e 2649 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2650 (OP(scan) == REF ||
2651 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2652 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2653 sayNO;
cf93c79d 2654 ln = PL_regendp[n] - ln;
3280af22 2655 if (locinput + ln > PL_regeol)
4633a7c4 2656 sayNO;
c8756f30
AK
2657 if (ln > 1 && (OP(scan) == REF
2658 ? memNE(s, locinput, ln)
2659 : (OP(scan) == REFF
2660 ? ibcmp(s, locinput, ln)
2661 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2662 sayNO;
a0d0e21e 2663 locinput += ln;
76e3520e 2664 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2665 break;
2666
2667 case NOTHING:
c277df42 2668 case TAIL:
a0d0e21e
LW
2669 break;
2670 case BACK:
2671 break;
c277df42
IZ
2672 case EVAL:
2673 {
2674 dSP;
533c011a 2675 OP_4tree *oop = PL_op;
3280af22
NIS
2676 COP *ocurcop = PL_curcop;
2677 SV **ocurpad = PL_curpad;
c277df42 2678 SV *ret;
9041c2e3 2679
c277df42 2680 n = ARG(scan);
533c011a 2681 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2682 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2683 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2684 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2685
8e5e9ebe
RGS
2686 {
2687 SV **before = SP;
2688 CALLRUNOPS(aTHX); /* Scalar context. */
2689 SPAGAIN;
2690 if (SP == before)
2691 ret = Nullsv; /* protect against empty (?{}) blocks. */
2692 else {
2693 ret = POPs;
2694 PUTBACK;
2695 }
2696 }
2697
0f5d15d6
IZ
2698 PL_op = oop;
2699 PL_curpad = ocurpad;
2700 PL_curcop = ocurcop;
c277df42 2701 if (logical) {
0f5d15d6
IZ
2702 if (logical == 2) { /* Postponed subexpression. */
2703 regexp *re;
22c35a8c 2704 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2705 re_cc_state state;
0f5d15d6
IZ
2706 CHECKPOINT cp, lastcp;
2707
2708 if(SvROK(ret) || SvRMAGICAL(ret)) {
2709 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2710
2711 if(SvMAGICAL(sv))
14befaf4 2712 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2713 }
2714 if (mg) {
2715 re = (regexp *)mg->mg_obj;
df0003d4 2716 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2717 }
2718 else {
2719 STRLEN len;
2720 char *t = SvPV(ret, len);
2721 PMOP pm;
2722 char *oprecomp = PL_regprecomp;
2723 I32 osize = PL_regsize;
2724 I32 onpar = PL_regnpar;
2725
5fcd1c1b 2726 Zero(&pm, 1, PMOP);
cea2e8a9 2727 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2728 if (!(SvFLAGS(ret)
0f5d15d6 2729 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2730 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2731 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2732 PL_regprecomp = oprecomp;
2733 PL_regsize = osize;
2734 PL_regnpar = onpar;
2735 }
2736 DEBUG_r(
9041c2e3 2737 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2738 "Entering embedded `%s%.60s%s%s'\n",
2739 PL_colors[0],
2740 re->precomp,
2741 PL_colors[1],
2742 (strlen(re->precomp) > 60 ? "..." : ""))
2743 );
2744 state.node = next;
2745 state.prev = PL_reg_call_cc;
2746 state.cc = PL_regcc;
2747 state.re = PL_reg_re;
2748
2ab05381 2749 PL_regcc = 0;
9041c2e3 2750
0f5d15d6 2751 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2752 REGCP_SET(lastcp);
0f5d15d6
IZ
2753 cache_re(re);
2754 state.ss = PL_savestack_ix;
2755 *PL_reglastparen = 0;
a01268b5 2756 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2757 PL_reg_call_cc = &state;
2758 PL_reginput = locinput;
2c2d71f5
JH
2759
2760 /* XXXX This is too dramatic a measure... */
2761 PL_reg_maxiter = 0;
2762
0f5d15d6 2763 if (regmatch(re->program + 1)) {
2c914db6
IZ
2764 /* Even though we succeeded, we need to restore
2765 global variables, since we may be wrapped inside
2766 SUSPEND, thus the match may be not finished yet. */
2767
2768 /* XXXX Do this only if SUSPENDed? */
2769 PL_reg_call_cc = state.prev;
2770 PL_regcc = state.cc;
2771 PL_reg_re = state.re;
2772 cache_re(PL_reg_re);
2773
2774 /* XXXX This is too dramatic a measure... */
2775 PL_reg_maxiter = 0;
2776
2777 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2778 ReREFCNT_dec(re);
2779 regcpblow(cp);
2780 sayYES;
2781 }
0f5d15d6 2782 ReREFCNT_dec(re);
02db2b7b 2783 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2784 regcppop();
2785 PL_reg_call_cc = state.prev;
2786 PL_regcc = state.cc;
2787 PL_reg_re = state.re;
d3790889 2788 cache_re(PL_reg_re);
2c2d71f5
JH
2789
2790 /* XXXX This is too dramatic a measure... */
2791 PL_reg_maxiter = 0;
2792
8e514ae6 2793 logical = 0;
0f5d15d6
IZ
2794 sayNO;
2795 }
c277df42 2796 sw = SvTRUE(ret);
0f5d15d6 2797 logical = 0;
a0ed51b3
LW
2798 }
2799 else
3280af22 2800 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2801 break;
2802 }
a0d0e21e 2803 case OPEN:
c277df42 2804 n = ARG(scan); /* which paren pair */
3280af22
NIS
2805 PL_reg_start_tmp[n] = locinput;
2806 if (n > PL_regsize)
2807 PL_regsize = n;
a0d0e21e
LW
2808 break;
2809 case CLOSE:
c277df42 2810 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2811 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2812 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2813 if (n > *PL_reglastparen)
2814 *PL_reglastparen = n;
a01268b5 2815 *PL_reglastcloseparen = n;
a0d0e21e 2816 break;
c277df42
IZ
2817 case GROUPP:
2818 n = ARG(scan); /* which paren pair */
cf93c79d 2819 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2820 break;
2821 case IFTHEN:
2c2d71f5 2822 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2823 if (sw)
2824 next = NEXTOPER(NEXTOPER(scan));
2825 else {
2826 next = scan + ARG(scan);
2827 if (OP(next) == IFTHEN) /* Fake one. */
2828 next = NEXTOPER(NEXTOPER(next));
2829 }
2830 break;
2831 case LOGICAL:
0f5d15d6 2832 logical = scan->flags;
c277df42 2833 break;
2ab05381
IZ
2834/*******************************************************************
2835 PL_regcc contains infoblock about the innermost (...)* loop, and
2836 a pointer to the next outer infoblock.
2837
2838 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2839
2840 1) After matching X, regnode for CURLYX is processed;
2841
9041c2e3 2842 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2843 regmatch() recursively with the starting point at WHILEM node;
2844
2845 3) Each hit of WHILEM node tries to match A and Z (in the order
2846 depending on the current iteration, min/max of {min,max} and
2847 greediness). The information about where are nodes for "A"
2848 and "Z" is read from the infoblock, as is info on how many times "A"
2849 was already matched, and greediness.
2850
2851 4) After A matches, the same WHILEM node is hit again.
2852
2853 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2854 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2855 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2856 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2857 of the external loop.
2858
2859 Currently present infoblocks form a tree with a stem formed by PL_curcc
2860 and whatever it mentions via ->next, and additional attached trees
2861 corresponding to temporarily unset infoblocks as in "5" above.
2862
9041c2e3 2863 In the following picture infoblocks for outer loop of
2ab05381
IZ
2864 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2865 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2866 infoblocks are drawn below the "reset" infoblock.
2867
2868 In fact in the picture below we do not show failed matches for Z and T
2869 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2870 more obvious *why* one needs to *temporary* unset infoblocks.]
2871
2872 Matched REx position InfoBlocks Comment
2873 (Y(A)*?Z)*?T x
2874 Y(A)*?Z)*?T x <- O
2875 Y (A)*?Z)*?T x <- O
2876 Y A)*?Z)*?T x <- O <- I
2877 YA )*?Z)*?T x <- O <- I
2878 YA A)*?Z)*?T x <- O <- I
2879 YAA )*?Z)*?T x <- O <- I
2880 YAA Z)*?T x <- O # Temporary unset I
2881 I
2882
2883 YAAZ Y(A)*?Z)*?T x <- O
2884 I
2885
2886 YAAZY (A)*?Z)*?T x <- O
2887 I
2888
2889 YAAZY A)*?Z)*?T x <- O <- I
2890 I
2891
2892 YAAZYA )*?Z)*?T x <- O <- I
2893 I
2894
2895 YAAZYA Z)*?T x <- O # Temporary unset I
2896 I,I
2897
2898 YAAZYAZ )*?T x <- O
2899 I,I
2900
2901 YAAZYAZ T x # Temporary unset O
2902 O
2903 I,I
2904
2905 YAAZYAZT x
2906 O
2907 I,I
2908 *******************************************************************/
a0d0e21e
LW
2909 case CURLYX: {
2910 CURCUR cc;
3280af22 2911 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2912 /* No need to save/restore up to this paren */
2913 I32 parenfloor = scan->flags;
c277df42
IZ
2914
2915 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2916 next += ARG(next);
3280af22
NIS
2917 cc.oldcc = PL_regcc;
2918 PL_regcc = &cc;
cb434fcc
IZ
2919 /* XXXX Probably it is better to teach regpush to support
2920 parenfloor > PL_regsize... */
2921 if (parenfloor > *PL_reglastparen)
2922 parenfloor = *PL_reglastparen; /* Pessimization... */
2923 cc.parenfloor = parenfloor;
a0d0e21e
LW
2924 cc.cur = -1;
2925 cc.min = ARG1(scan);
2926 cc.max = ARG2(scan);
c277df42 2927 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2928 cc.next = next;
2929 cc.minmod = minmod;
2930 cc.lastloc = 0;
3280af22 2931 PL_reginput = locinput;
a0d0e21e
LW
2932 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2933 regcpblow(cp);
3280af22 2934 PL_regcc = cc.oldcc;
4633a7c4 2935 saySAME(n);
a0d0e21e
LW
2936 }
2937 /* NOT REACHED */
2938 case WHILEM: {
2939 /*
2940 * This is really hard to understand, because after we match
2941 * what we're trying to match, we must make sure the rest of
2c2d71f5 2942 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2943 * to go back UP the parse tree by recursing ever deeper. And
2944 * if it fails, we have to reset our parent's current state
2945 * that we can try again after backing off.
2946 */
2947
c277df42 2948 CHECKPOINT cp, lastcp;
3280af22 2949 CURCUR* cc = PL_regcc;
c277df42
IZ
2950 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2951
4633a7c4 2952 n = cc->cur + 1; /* how many we know we matched */
3280af22 2953 PL_reginput = locinput;
a0d0e21e 2954
c277df42 2955 DEBUG_r(
9041c2e3
NIS
2956 PerlIO_printf(Perl_debug_log,
2957 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2958 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2959 (long)n, (long)cc->min,
c277df42
IZ
2960 (long)cc->max, (long)cc)
2961 );
4633a7c4 2962
a0d0e21e
LW
2963 /* If degenerate scan matches "", assume scan done. */
2964
579cf2c3 2965 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2966 PL_regcc = cc->oldcc;
2ab05381
IZ
2967 if (PL_regcc)
2968 ln = PL_regcc->cur;
c277df42 2969 DEBUG_r(
c3464db5
DD
2970 PerlIO_printf(Perl_debug_log,
2971 "%*s empty match detected, try continuation...\n",
3280af22 2972 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2973 );
a0d0e21e 2974 if (regmatch(cc->next))
4633a7c4 2975 sayYES;
2ab05381
IZ
2976 if (PL_regcc)
2977 PL_regcc->cur = ln;
3280af22 2978 PL_regcc = cc;
4633a7c4 2979 sayNO;
a0d0e21e
LW
2980 }
2981
2982 /* First just match a string of min scans. */
2983
2984 if (n < cc->min) {
2985 cc->cur = n;
2986 cc->lastloc = locinput;
4633a7c4
LW
2987 if (regmatch(cc->scan))
2988 sayYES;
2989 cc->cur = n - 1;
c277df42 2990 cc->lastloc = lastloc;
4633a7c4 2991 sayNO;
a0d0e21e
LW
2992 }
2993
2c2d71f5
JH
2994 if (scan->flags) {
2995 /* Check whether we already were at this position.
2996 Postpone detection until we know the match is not
2997 *that* much linear. */
2998 if (!PL_reg_maxiter) {
2999 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3000 PL_reg_leftiter = PL_reg_maxiter;
3001 }
3002 if (PL_reg_leftiter-- == 0) {
3003 I32 size = (PL_reg_maxiter + 7)/8;
3004 if (PL_reg_poscache) {
3005 if (PL_reg_poscache_size < size) {
3006 Renew(PL_reg_poscache, size, char);
3007 PL_reg_poscache_size = size;
3008 }
3009 Zero(PL_reg_poscache, size, char);
3010 }
3011 else {
3012 PL_reg_poscache_size = size;
3013 Newz(29, PL_reg_poscache, size, char);
3014 }
3015 DEBUG_r(
3016 PerlIO_printf(Perl_debug_log,
3017 "%sDetected a super-linear match, switching on caching%s...\n",
3018 PL_colors[4], PL_colors[5])
3019 );
3020 }
3021 if (PL_reg_leftiter < 0) {
3022 I32 o = locinput - PL_bostr, b;
3023
3024 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3025 b = o % 8;
3026 o /= 8;
3027 if (PL_reg_poscache[o] & (1<<b)) {
3028 DEBUG_r(
3029 PerlIO_printf(Perl_debug_log,
3030 "%*s already tried at this position...\n",
3031 REPORT_CODE_OFF+PL_regindent*2, "")
3032 );
7821416a 3033 sayNO_SILENT;
2c2d71f5
JH
3034 }
3035 PL_reg_poscache[o] |= (1<<b);
3036 }
3037 }
3038
a0d0e21e
LW
3039 /* Prefer next over scan for minimal matching. */
3040
3041 if (cc->minmod) {
3280af22 3042 PL_regcc = cc->oldcc;
2ab05381
IZ
3043 if (PL_regcc)
3044 ln = PL_regcc->cur;
5f05dabc 3045 cp = regcppush(cc->parenfloor);
02db2b7b 3046 REGCP_SET(lastcp);
5f05dabc 3047 if (regmatch(cc->next)) {
c277df42 3048 regcpblow(cp);
4633a7c4 3049 sayYES; /* All done. */
5f05dabc 3050 }
02db2b7b 3051 REGCP_UNWIND(lastcp);
5f05dabc 3052 regcppop();
2ab05381
IZ
3053 if (PL_regcc)
3054 PL_regcc->cur = ln;
3280af22 3055 PL_regcc = cc;
a0d0e21e 3056
c277df42 3057 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3058 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3059 && !(PL_reg_flags & RF_warned)) {
3060 PL_reg_flags |= RF_warned;
e476b1b5 3061 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3062 "Complex regular subexpression recursion",
3063 REG_INFTY - 1);
c277df42 3064 }
4633a7c4 3065 sayNO;
c277df42 3066 }
a687059c 3067
c277df42 3068 DEBUG_r(
c3464db5
DD
3069 PerlIO_printf(Perl_debug_log,
3070 "%*s trying longer...\n",
3280af22 3071 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3072 );
a0d0e21e 3073 /* Try scanning more and see if it helps. */
3280af22 3074 PL_reginput = locinput;
a0d0e21e
LW
3075 cc->cur = n;
3076 cc->lastloc = locinput;
5f05dabc 3077 cp = regcppush(cc->parenfloor);
02db2b7b 3078 REGCP_SET(lastcp);
5f05dabc 3079 if (regmatch(cc->scan)) {
c277df42 3080 regcpblow(cp);
4633a7c4 3081 sayYES;
5f05dabc 3082 }
02db2b7b 3083 REGCP_UNWIND(lastcp);
5f05dabc 3084 regcppop();
4633a7c4 3085 cc->cur = n - 1;
c277df42 3086 cc->lastloc = lastloc;
4633a7c4 3087 sayNO;
a0d0e21e
LW
3088 }
3089
3090 /* Prefer scan over next for maximal matching. */
3091
3092 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3093 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3094 cc->cur = n;
3095 cc->lastloc = locinput;
02db2b7b 3096 REGCP_SET(lastcp);
5f05dabc 3097 if (regmatch(cc->scan)) {
c277df42 3098 regcpblow(cp);
4633a7c4 3099 sayYES;
5f05dabc 3100 }
02db2b7b 3101 REGCP_UNWIND(lastcp);
a0d0e21e 3102 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3103 PL_reginput = locinput;
c277df42 3104 DEBUG_r(
c3464db5
DD
3105 PerlIO_printf(Perl_debug_log,
3106 "%*s failed, try continuation...\n",
3280af22 3107 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3108 );
3109 }
9041c2e3 3110 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3111 && !(PL_reg_flags & RF_warned)) {
3280af22 3112 PL_reg_flags |= RF_warned;
e476b1b5 3113 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3114 "Complex regular subexpression recursion",
3115 REG_INFTY - 1);
a0d0e21e
LW
3116 }
3117
3118 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3119 PL_regcc = cc->oldcc;
2ab05381
IZ
3120 if (PL_regcc)
3121 ln = PL_regcc->cur;
a0d0e21e 3122 if (regmatch(cc->next))
4633a7c4 3123 sayYES;
2ab05381
IZ
3124 if (PL_regcc)
3125 PL_regcc->cur = ln;
3280af22 3126 PL_regcc = cc;
4633a7c4 3127 cc->cur = n - 1;
c277df42 3128 cc->lastloc = lastloc;
4633a7c4 3129 sayNO;
a0d0e21e
LW
3130 }
3131 /* NOT REACHED */
9041c2e3 3132 case BRANCHJ:
c277df42
IZ
3133 next = scan + ARG(scan);
3134 if (next == scan)
3135 next = NULL;
3136 inner = NEXTOPER(NEXTOPER(scan));
3137 goto do_branch;
9041c2e3 3138 case BRANCH:
c277df42
IZ
3139 inner = NEXTOPER(scan);
3140 do_branch:
3141 {
c277df42
IZ
3142 c1 = OP(scan);
3143 if (OP(next) != c1) /* No choice. */
3144 next = inner; /* Avoid recursion. */
a0d0e21e 3145 else {
02db2b7b
IZ
3146 I32 lastparen = *PL_reglastparen;
3147 I32 unwind1;
3148 re_unwind_branch_t *uw;
3149
3150 /* Put unwinding data on stack */
3151 unwind1 = SSNEWt(1,re_unwind_branch_t);
3152 uw = SSPTRt(unwind1,re_unwind_branch_t);
3153 uw->prev = unwind;
3154 unwind = unwind1;
3155 uw->type = ((c1 == BRANCH)
3156 ? RE_UNWIND_BRANCH
3157 : RE_UNWIND_BRANCHJ);
3158 uw->lastparen = lastparen;
3159 uw->next = next;
3160 uw->locinput = locinput;
3161 uw->nextchr = nextchr;
3162#ifdef DEBUGGING
3163 uw->regindent = ++PL_regindent;
3164#endif
c277df42 3165
02db2b7b
IZ
3166 REGCP_SET(uw->lastcp);
3167
3168 /* Now go into the first branch */
3169 next = inner;
a687059c 3170 }
a0d0e21e
LW
3171 }
3172 break;
3173 case MINMOD:
3174 minmod = 1;
3175 break;
c277df42
IZ
3176 case CURLYM:
3177 {
00db4c45 3178 I32 l = 0;
c277df42 3179 CHECKPOINT lastcp;
9041c2e3 3180
c277df42
IZ
3181 /* We suppose that the next guy does not need
3182 backtracking: in particular, it is of constant length,
3183 and has no parenths to influence future backrefs. */
3184 ln = ARG1(scan); /* min to match */
3185 n = ARG2(scan); /* max to match */
c277df42
IZ
3186 paren = scan->flags;
3187 if (paren) {
3280af22
NIS
3188 if (paren > PL_regsize)
3189 PL_regsize = paren;
3190 if (paren > *PL_reglastparen)
3191 *PL_reglastparen = paren;
c277df42 3192 }
dc45a647 3193 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3194 if (paren)
3195 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3196 PL_reginput = locinput;
c277df42
IZ
3197 if (minmod) {
3198 minmod = 0;
3199 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3200 sayNO;
f31a99c8
HS
3201 /* if we matched something zero-length we don't need to
3202 backtrack - capturing parens are already defined, so
3203 the caveat in the maximal case doesn't apply
3204
3205 XXXX if ln == 0, we can redo this check first time
3206 through the following loop
3207 */
3208 if (ln && l == 0)
3209 n = ln; /* don't backtrack */
3280af22 3210 locinput = PL_reginput;
cca55fe3 3211 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3212 regnode *text_node = next;
3213
cca55fe3 3214 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3215
cca55fe3 3216 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3217 else {
cca55fe3
JP
3218 if (PL_regkind[(U8)OP(text_node)] == REF) {
3219 I32 n, ln;
3220 n = ARG(text_node); /* which paren pair */
3221 ln = PL_regstartp[n];
3222 /* assume yes if we haven't seen CLOSEn */
3223 if (
3224 *PL_reglastparen < n ||
3225 ln == -1 ||
3226 ln == PL_regendp[n]
3227 ) {
3228 c1 = c2 = -1000;
3229 goto assume_ok_MM;
3230 }
3231 c1 = *(PL_bostr + ln);
3232 }
3233 else { c1 = (U8)*STRING(text_node); }
af5decee 3234 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3235 c2 = PL_fold[c1];
af5decee 3236 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3237 c2 = PL_fold_locale[c1];
3238 else
3239 c2 = c1;
3240 }
a0ed51b3
LW
3241 }
3242 else
c277df42 3243 c1 = c2 = -1000;
cca55fe3 3244 assume_ok_MM:
02db2b7b 3245 REGCP_SET(lastcp);
5f4b28b2 3246 /* This may be improved if l == 0. */
c277df42
IZ
3247 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3248 /* If it could work, try it. */
3249 if (c1 == -1000 ||
3280af22
NIS
3250 UCHARAT(PL_reginput) == c1 ||
3251 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3252 {
3253 if (paren) {
f31a99c8 3254 if (ln) {
cf93c79d
IZ
3255 PL_regstartp[paren] =
3256 HOPc(PL_reginput, -l) - PL_bostr;
3257 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3258 }
3259 else
cf93c79d 3260 PL_regendp[paren] = -1;
c277df42
IZ
3261 }
3262 if (regmatch(next))
3263 sayYES;
02db2b7b 3264 REGCP_UNWIND(lastcp);
c277df42
IZ
3265 }
3266 /* Couldn't or didn't -- move forward. */
3280af22 3267 PL_reginput = locinput;
c277df42
IZ
3268 if (regrepeat_hard(scan, 1, &l)) {
3269 ln++;
3280af22 3270 locinput = PL_reginput;
c277df42
IZ
3271 }
3272 else
3273 sayNO;
3274 }
a0ed51b3
LW
3275 }
3276 else {
c277df42 3277 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3278 /* if we matched something zero-length we don't need to
3279 backtrack, unless the minimum count is zero and we
3280 are capturing the result - in that case the capture
3281 being defined or not may affect later execution
3282 */
3283 if (n != 0 && l == 0 && !(paren && ln == 0))
3284 ln = n; /* don't backtrack */
3280af22 3285 locinput = PL_reginput;
c277df42 3286 DEBUG_r(
5c0ca799 3287 PerlIO_printf(Perl_debug_log,
faccc32b 3288 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3289 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3290 (IV) n, (IV)l)
c277df42
IZ
3291 );
3292 if (n >= ln) {
cca55fe3 3293 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3294 regnode *text_node = next;
3295
cca55fe3 3296 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3297
cca55fe3 3298 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3299 else {
cca55fe3
JP
3300 if (PL_regkind[(U8)OP(text_node)] == REF) {
3301 I32 n, ln;
3302 n = ARG(text_node); /* which paren pair */
3303 ln = PL_regstartp[n];
3304 /* assume yes if we haven't seen CLOSEn */
3305 if (
3306 *PL_reglastparen < n ||
3307 ln == -1 ||
3308 ln == PL_regendp[n]
3309 ) {
3310 c1 = c2 = -1000;
3311 goto assume_ok_REG;
3312 }
3313 c1 = *(PL_bostr + ln);
3314 }
3315 else { c1 = (U8)*STRING(text_node); }
3316
af5decee 3317 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3318 c2 = PL_fold[c1];
af5decee 3319 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3320 c2 = PL_fold_locale[c1];
3321 else
3322 c2 = c1;
3323 }
a0ed51b3
LW
3324 }
3325 else
c277df42
IZ
3326 c1 = c2 = -1000;
3327 }
cca55fe3 3328 assume_ok_REG:
02db2b7b 3329 REGCP_SET(lastcp);
c277df42
IZ
3330 while (n >= ln) {
3331 /* If it could work, try it. */
3332 if (c1 == -1000 ||
3280af22
NIS
3333 UCHARAT(PL_reginput) == c1 ||
3334 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3335 {
3336 DEBUG_r(
c3464db5 3337 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3338 "%*s trying tail with n=%"IVdf"...\n",
3339 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3340 );
3341 if (paren) {
3342 if (n) {
cf93c79d
IZ
3343 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3344 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3345 }
a0ed51b3 3346 else
cf93c79d 3347 PL_regendp[paren] = -1;
c277df42 3348 }
a0ed51b3
LW
3349 if (regmatch(next))
3350 sayYES;
02db2b7b 3351 REGCP_UNWIND(lastcp);
a0ed51b3 3352 }
c277df42
IZ
3353 /* Couldn't or didn't -- back up. */
3354 n--;
dfe13c55 3355 locinput = HOPc(locinput, -l);
3280af22 3356 PL_reginput = locinput;
c277df42
IZ
3357 }
3358 }
3359 sayNO;
3360 break;
3361 }
3362 case CURLYN:
3363 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3364 if (paren > PL_regsize)
3365 PL_regsize = paren;
3366 if (paren > *PL_reglastparen)
3367 *PL_reglastparen = paren;
c277df42
IZ
3368 ln = ARG1(scan); /* min to match */
3369 n = ARG2(scan); /* max to match */
dc45a647 3370 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3371 goto repeat;
a0d0e21e 3372 case CURLY:
c277df42 3373 paren = 0;
a0d0e21e
LW
3374 ln = ARG1(scan); /* min to match */
3375 n = ARG2(scan); /* max to match */
dc45a647 3376 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3377 goto repeat;
3378 case STAR:
3379 ln = 0;
c277df42 3380 n = REG_INFTY;
a0d0e21e 3381 scan = NEXTOPER(scan);
c277df42 3382 paren = 0;
a0d0e21e
LW
3383 goto repeat;
3384 case PLUS:
c277df42
IZ
3385 ln = 1;
3386 n = REG_INFTY;
3387 scan = NEXTOPER(scan);
3388 paren = 0;
3389 repeat:
a0d0e21e
LW
3390 /*
3391 * Lookahead to avoid useless match attempts
3392 * when we know what character comes next.
3393 */
5f80c4cf
JP
3394
3395 /*
3396 * Used to only do .*x and .*?x, but now it allows
3397 * for )'s, ('s and (?{ ... })'s to be in the way
3398 * of the quantifier and the EXACT-like node. -- japhy
3399 */
3400
cca55fe3 3401 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3402 U8 *s;
3403 regnode *text_node = next;
3404
cca55fe3 3405 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3406
cca55fe3 3407 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3408 else {
cca55fe3
JP
3409 if (PL_regkind[(U8)OP(text_node)] == REF) {
3410 I32 n, ln;
3411 n = ARG(text_node); /* which paren pair */
3412 ln = PL_regstartp[n];
3413 /* assume yes if we haven't seen CLOSEn */
3414 if (
3415 *PL_reglastparen < n ||
3416 ln == -1 ||
3417 ln == PL_regendp[n]
3418 ) {
3419 c1 = c2 = -1000;
3420 goto assume_ok_easy;
3421 }
9246c65e 3422 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3423 }
3424 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3425
3426 if (!UTF) {
3427 c2 = c1 = *s;
f65d3ee7 3428 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3429 c2 = PL_fold[c1];
f65d3ee7 3430 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3431 c2 = PL_fold_locale[c1];
1aa99e6b 3432 }
5f80c4cf 3433 else { /* UTF */
f65d3ee7 3434 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3435 STRLEN ulen1, ulen2;
e7ae6809
JH
3436 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3437 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3438
3439 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3440 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3441
3442 c1 = utf8_to_uvuni(tmpbuf1, 0);
3443 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3444 }
3445 else {
3446 c2 = c1 = utf8_to_uvchr(s, NULL);
3447 }
1aa99e6b
IH
3448 }
3449 }
bbce6d69 3450 }
a0d0e21e 3451 else
bbce6d69 3452 c1 = c2 = -1000;
cca55fe3 3453 assume_ok_easy:
3280af22 3454 PL_reginput = locinput;
a0d0e21e 3455 if (minmod) {
c277df42 3456 CHECKPOINT lastcp;
a0d0e21e
LW
3457 minmod = 0;
3458 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3459 sayNO;
a0ed51b3 3460 locinput = PL_reginput;
02db2b7b 3461 REGCP_SET(lastcp);
0fe9bf95 3462 if (c1 != -1000) {
1aa99e6b 3463 char *e; /* Should not check after this */
0fe9bf95
IZ
3464 char *old = locinput;
3465
1aa99e6b 3466 if (n == REG_INFTY) {
0fe9bf95 3467 e = PL_regeol - 1;
1aa99e6b
IH
3468 if (do_utf8)
3469 while (UTF8_IS_CONTINUATION(*(U8*)e))
3470 e--;
3471 }
3472 else if (do_utf8) {
3473 int m = n - ln;
3474 for (e = locinput;
3475 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3476 e += UTF8SKIP(e);
3477 }
3478 else {
3479 e = locinput + n - ln;
3480 if (e >= PL_regeol)
3481 e = PL_regeol - 1;
3482 }
0fe9bf95 3483 while (1) {
1aa99e6b 3484 int count;
0fe9bf95 3485 /* Find place 'next' could work */
1aa99e6b
IH
3486 if (!do_utf8) {
3487 if (c1 == c2) {
a8e8ab15
JH
3488 while (locinput <= e &&
3489 UCHARAT(locinput) != c1)
1aa99e6b
IH
3490 locinput++;
3491 } else {
9041c2e3 3492 while (locinput <= e
a8e8ab15
JH
3493 && UCHARAT(locinput) != c1
3494 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3495 locinput++;
3496 }
3497 count = locinput - old;
3498 }
3499 else {
3500 STRLEN len;
3501 if (c1 == c2) {
3502 for (count = 0;
3503 locinput <= e &&
9041c2e3 3504 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3505 count++)
3506 locinput += len;
3507
3508 } else {
3509 for (count = 0; locinput <= e; count++) {
9041c2e3 3510 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3511 if (c == c1 || c == c2)
3512 break;
9041c2e3 3513 locinput += len;
1aa99e6b
IH
3514 }
3515 }
0fe9bf95 3516 }
9041c2e3 3517 if (locinput > e)
0fe9bf95
IZ
3518 sayNO;
3519 /* PL_reginput == old now */
3520 if (locinput != old) {
3521 ln = 1; /* Did some */
1aa99e6b 3522 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3523 sayNO;
3524 }
3525 /* PL_reginput == locinput now */
29d1e993 3526 TRYPAREN(paren, ln, locinput);
0fe9bf95 3527 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3528 REGCP_UNWIND(lastcp);
0fe9bf95 3529 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3530 old = locinput;
3531 if (do_utf8)
3532 locinput += UTF8SKIP(locinput);
3533 else
3534 locinput++;
0fe9bf95
IZ
3535 }
3536 }
3537 else
c277df42 3538 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3539 UV c;
3540 if (c1 != -1000) {
3541 if (do_utf8)
9041c2e3 3542 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3543 else
9041c2e3 3544 c = UCHARAT(PL_reginput);
2390ecbc
PP
3545 /* If it could work, try it. */
3546 if (c == c1 || c == c2)
3547 {
3548 TRYPAREN(paren, n, PL_reginput);
3549 REGCP_UNWIND(lastcp);
3550 }
1aa99e6b 3551 }
a0d0e21e 3552 /* If it could work, try it. */
2390ecbc 3553 else if (c1 == -1000)
bbce6d69 3554 {
29d1e993 3555 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3556 REGCP_UNWIND(lastcp);
bbce6d69 3557 }
c277df42 3558 /* Couldn't or didn't -- move forward. */
a0ed51b3 3559 PL_reginput = locinput;
a0d0e21e
LW
3560 if (regrepeat(scan, 1)) {
3561 ln++;
a0ed51b3
LW
3562 locinput = PL_reginput;
3563 }
3564 else
4633a7c4 3565 sayNO;
a0d0e21e
LW
3566 }
3567 }
3568 else {
c277df42 3569 CHECKPOINT lastcp;
a0d0e21e 3570 n = regrepeat(scan, n);
a0ed51b3 3571 locinput = PL_reginput;
22c35a8c 3572 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3573 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3574 ln = n; /* why back off? */
1aeab75a
GS
3575 /* ...because $ and \Z can match before *and* after
3576 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3577 We should back off by one in this case. */
3578 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3579 ln--;
3580 }
02db2b7b 3581 REGCP_SET(lastcp);
c277df42 3582 if (paren) {
8fa7f367 3583 UV c = 0;
c277df42 3584 while (n >= ln) {
1aa99e6b
IH
3585 if (c1 != -1000) {
3586 if (do_utf8)
9041c2e3 3587 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3588 else