This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Unicode casefolding continues.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
2a782b5b 396 DEBUG_r({
40eddc46
JH
397 char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
398 int len = UTF ? strlen(s) : strend - strpos;
2a782b5b
JH
399 if (!PL_colorset)
400 reginitcolors();
401 PerlIO_printf(Perl_debug_log,
402 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
403 PL_colors[4],PL_colors[5],PL_colors[0],
404 prog->precomp,
405 PL_colors[1],
406 (strlen(prog->precomp) > 60 ? "..." : ""),
407 PL_colors[0],
408 (int)(len > 60 ? 60 : len),
409 s, PL_colors[1],
410 (len > 60 ? "..." : "")
411 );
412 });
cad2e5aa 413
1aa99e6b
IH
414 if (prog->reganch & ROPT_UTF8)
415 PL_reg_flags |= RF_utf8;
416
417 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
418 DEBUG_r(PerlIO_printf(Perl_debug_log,
419 "String too short... [re_intuit_start]\n"));
cad2e5aa 420 goto fail;
2c2d71f5 421 }
a1933d95 422 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 423 PL_regeol = strend;
653099ff 424 check = prog->check_substr;
2c2d71f5 425 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
426 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
427 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 428 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 429
7e25d62c
JH
430 if (!ml_anch) {
431 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
432 | ROPT_IMPLICIT)) /* not a real BOL */
433 /* SvCUR is not set on references: SvRV and SvPVX overlap */
434 && sv && !SvROK(sv)
435 && (strpos != strbeg)) {
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
437 goto fail;
438 }
439 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 440 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 441 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
442 I32 slen;
443
1aa99e6b 444 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
445 if (SvTAIL(check)) {
446 slen = SvCUR(check); /* >= 1 */
cad2e5aa 447
9041c2e3 448 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
449 || (strend - s == slen && strend[-1] != '\n')) {
450 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
451 goto fail_finish;
cad2e5aa
JH
452 }
453 /* Now should match s[0..slen-2] */
454 slen--;
653099ff 455 if (slen && (*SvPVX(check) != *s
cad2e5aa 456 || (slen > 1
653099ff 457 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
458 report_neq:
459 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
460 goto fail_finish;
461 }
cad2e5aa 462 }
653099ff
GS
463 else if (*SvPVX(check) != *s
464 || ((slen = SvCUR(check)) > 1
465 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
466 goto report_neq;
467 goto success_at_start;
7e25d62c 468 }
cad2e5aa 469 }
2c2d71f5 470 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 471 s = strpos;
2c2d71f5 472 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 473 end_shift = prog->minlen - start_shift -
653099ff 474 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 475 if (!ml_anch) {
653099ff
GS
476 I32 end = prog->check_offset_max + CHR_SVLEN(check)
477 - (SvTAIL(check) != 0);
1aa99e6b 478 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
479
480 if (end_shift < eshift)
481 end_shift = eshift;
482 }
cad2e5aa 483 }
2c2d71f5 484 else { /* Can match at random position */
cad2e5aa
JH
485 ml_anch = 0;
486 s = strpos;
2c2d71f5
JH
487 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
488 /* Should be nonnegative! */
489 end_shift = prog->minlen - start_shift -
653099ff 490 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
491 }
492
2c2d71f5 493#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 494 if (end_shift < 0)
6bbae5e6 495 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
496#endif
497
2c2d71f5
JH
498 restart:
499 /* Find a possible match in the region s..strend by looking for
500 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 501 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
502 I32 p = -1; /* Internal iterator of scream. */
503 I32 *pp = data ? data->scream_pos : &p;
504
2c2d71f5
JH
505 if (PL_screamfirst[BmRARE(check)] >= 0
506 || ( BmRARE(check) == '\n'
507 && (BmPREVIOUS(check) == SvCUR(check) - 1)
508 && SvTAIL(check) ))
9041c2e3 509 s = screaminstr(sv, check,
2c2d71f5 510 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 511 else
2c2d71f5 512 goto fail_finish;
cad2e5aa
JH
513 if (data)
514 *data->scream_olds = s;
515 }
f33976b4 516 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
517 s = fbm_instr((U8*)(s + start_shift),
518 (U8*)(strend - end_shift),
519 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 520 else
1aa99e6b
IH
521 s = fbm_instr(HOP3(s, start_shift, strend),
522 HOP3(strend, -end_shift, strbeg),
2c2d71f5 523 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
524
525 /* Update the count-of-usability, remove useless subpatterns,
526 unshift s. */
2c2d71f5
JH
527
528 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
529 (s ? "Found" : "Did not find"),
530 ((check == prog->anchored_substr) ? "anchored" : "floating"),
531 PL_colors[0],
7b0972df
JH
532 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
533 SvPVX(check),
2c2d71f5
JH
534 PL_colors[1], (SvTAIL(check) ? "$" : ""),
535 (s ? " at offset " : "...\n") ) );
536
537 if (!s)
538 goto fail_finish;
539
6eb5f6b9
JH
540 check_at = s;
541
2c2d71f5 542 /* Finish the diagnostic message */
30944b6d 543 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
544
545 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
546 Start with the other substr.
547 XXXX no SCREAM optimization yet - and a very coarse implementation
548 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
549 *always* match. Probably should be marked during compile...
550 Probably it is right to do no SCREAM here...
551 */
552
553 if (prog->float_substr && prog->anchored_substr) {
30944b6d 554 /* Take into account the "other" substring. */
2c2d71f5
JH
555 /* XXXX May be hopelessly wrong for UTF... */
556 if (!other_last)
6eb5f6b9 557 other_last = strpos;
2c2d71f5 558 if (check == prog->float_substr) {
30944b6d
IZ
559 do_other_anchored:
560 {
1aa99e6b 561 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
562 char *s1 = s;
563
2c2d71f5
JH
564 t = s - prog->check_offset_max;
565 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
566 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 567 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 568 && t > strpos)))
30944b6d 569 /* EMPTY */;
2c2d71f5
JH
570 else
571 t = strpos;
1aa99e6b 572 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
573 if (t < other_last) /* These positions already checked */
574 t = other_last;
1aa99e6b 575 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
576 if (last < last1)
577 last1 = last;
578 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
579 /* On end-of-str: see comment below. */
580 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
581 HOP3(HOP3(last1, prog->anchored_offset, strend)
582 + SvCUR(prog->anchored_substr),
583 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
584 prog->anchored_substr,
585 PL_multiline ? FBMrf_MULTILINE : 0);
586 DEBUG_r(PerlIO_printf(Perl_debug_log,
587 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
588 (s ? "Found" : "Contradicts"),
589 PL_colors[0],
7b0972df
JH
590 (int)(SvCUR(prog->anchored_substr)
591 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
592 SvPVX(prog->anchored_substr),
593 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
594 if (!s) {
595 if (last1 >= last2) {
596 DEBUG_r(PerlIO_printf(Perl_debug_log,
597 ", giving up...\n"));
598 goto fail_finish;
599 }
600 DEBUG_r(PerlIO_printf(Perl_debug_log,
601 ", trying floating at offset %ld...\n",
1aa99e6b
IH
602 (long)(HOP3c(s1, 1, strend) - i_strpos)));
603 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
604 s = HOP3c(last, 1, strend);
2c2d71f5
JH
605 goto restart;
606 }
607 else {
608 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 609 (long)(s - i_strpos)));
1aa99e6b
IH
610 t = HOP3c(s, -prog->anchored_offset, strbeg);
611 other_last = HOP3c(s, 1, strend);
30944b6d 612 s = s1;
2c2d71f5
JH
613 if (t == strpos)
614 goto try_at_start;
2c2d71f5
JH
615 goto try_at_offset;
616 }
30944b6d 617 }
2c2d71f5
JH
618 }
619 else { /* Take into account the floating substring. */
620 char *last, *last1;
621 char *s1 = s;
622
1aa99e6b
IH
623 t = HOP3c(s, -start_shift, strbeg);
624 last1 = last =
625 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
626 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
627 last = HOP3c(t, prog->float_max_offset, strend);
628 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
629 if (s < other_last)
630 s = other_last;
2c2d71f5
JH
631 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
632 /* fbm_instr() takes into account exact value of end-of-str
633 if the check is SvTAIL(ed). Since false positives are OK,
634 and end-of-str is not later than strend we are OK. */
635 s = fbm_instr((unsigned char*)s,
636 (unsigned char*)last + SvCUR(prog->float_substr)
637 - (SvTAIL(prog->float_substr)!=0),
638 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
639 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
640 (s ? "Found" : "Contradicts"),
641 PL_colors[0],
7b0972df
JH
642 (int)(SvCUR(prog->float_substr)
643 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
644 SvPVX(prog->float_substr),
645 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
646 if (!s) {
647 if (last1 == last) {
648 DEBUG_r(PerlIO_printf(Perl_debug_log,
649 ", giving up...\n"));
650 goto fail_finish;
651 }
652 DEBUG_r(PerlIO_printf(Perl_debug_log,
653 ", trying anchored starting at offset %ld...\n",
30944b6d 654 (long)(s1 + 1 - i_strpos)));
803ff556 655 other_last = last;
1aa99e6b 656 s = HOP3c(t, 1, strend);
2c2d71f5
JH
657 goto restart;
658 }
659 else {
660 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 661 (long)(s - i_strpos)));
803ff556 662 other_last = s; /* Fix this later. --Hugo */
30944b6d 663 s = s1;
2c2d71f5
JH
664 if (t == strpos)
665 goto try_at_start;
2c2d71f5
JH
666 goto try_at_offset;
667 }
668 }
cad2e5aa 669 }
2c2d71f5
JH
670
671 t = s - prog->check_offset_max;
2c2d71f5
JH
672 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
673 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
674 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
675 && t > strpos))) {
2c2d71f5
JH
676 /* Fixed substring is found far enough so that the match
677 cannot start at strpos. */
678 try_at_offset:
cad2e5aa 679 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
680 /* Eventually fbm_*() should handle this, but often
681 anchored_offset is not 0, so this check will not be wasted. */
682 /* XXXX In the code below we prefer to look for "^" even in
683 presence of anchored substrings. And we search even
684 beyond the found float position. These pessimizations
685 are historical artefacts only. */
686 find_anchor:
2c2d71f5 687 while (t < strend - prog->minlen) {
cad2e5aa 688 if (*t == '\n') {
4ee3650e 689 if (t < check_at - prog->check_offset_min) {
30944b6d 690 if (prog->anchored_substr) {
4ee3650e
GS
691 /* Since we moved from the found position,
692 we definitely contradict the found anchored
30944b6d
IZ
693 substr. Due to the above check we do not
694 contradict "check" substr.
695 Thus we can arrive here only if check substr
696 is float. Redo checking for "other"=="fixed".
697 */
9041c2e3 698 strpos = t + 1;
30944b6d
IZ
699 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
700 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
701 goto do_other_anchored;
702 }
4ee3650e
GS
703 /* We don't contradict the found floating substring. */
704 /* XXXX Why not check for STCLASS? */
cad2e5aa 705 s = t + 1;
2c2d71f5 706 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 707 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
708 goto set_useful;
709 }
4ee3650e
GS
710 /* Position contradicts check-string */
711 /* XXXX probably better to look for check-string
712 than for "\n", so one should lower the limit for t? */
713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 714 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 715 other_last = strpos = s = t + 1;
cad2e5aa
JH
716 goto restart;
717 }
718 t++;
719 }
2c2d71f5
JH
720 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
721 PL_colors[0],PL_colors[1]));
722 goto fail_finish;
cad2e5aa 723 }
f5952150
GS
724 else {
725 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
726 PL_colors[0],PL_colors[1]));
727 }
cad2e5aa
JH
728 s = t;
729 set_useful:
2c2d71f5 730 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
731 }
732 else {
f5952150 733 /* The found string does not prohibit matching at strpos,
2c2d71f5 734 - no optimization of calling REx engine can be performed,
f5952150
GS
735 unless it was an MBOL and we are not after MBOL,
736 or a future STCLASS check will fail this. */
2c2d71f5
JH
737 try_at_start:
738 /* Even in this situation we may use MBOL flag if strpos is offset
739 wrt the start of the string. */
05b4157f 740 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 741 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
742 /* May be due to an implicit anchor of m{.*foo} */
743 && !(prog->reganch & ROPT_IMPLICIT))
744 {
cad2e5aa
JH
745 t = strpos;
746 goto find_anchor;
747 }
30944b6d 748 DEBUG_r( if (ml_anch)
f5952150
GS
749 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
750 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 751 );
2c2d71f5 752 success_at_start:
30944b6d 753 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 754 && prog->check_substr /* Could be deleted already */
cad2e5aa 755 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
756 && prog->check_substr == prog->float_substr)
757 {
cad2e5aa 758 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 759 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
760 SvREFCNT_dec(prog->check_substr);
761 prog->check_substr = Nullsv; /* disable */
762 prog->float_substr = Nullsv; /* clear */
5e39e1e5 763 check = Nullsv; /* abort */
cad2e5aa 764 s = strpos;
3cf5c195
IZ
765 /* XXXX This is a remnant of the old implementation. It
766 looks wasteful, since now INTUIT can use many
6eb5f6b9 767 other heuristics. */
cad2e5aa
JH
768 prog->reganch &= ~RE_USE_INTUIT;
769 }
770 else
771 s = strpos;
772 }
773
6eb5f6b9
JH
774 /* Last resort... */
775 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
776 if (prog->regstclass) {
777 /* minlen == 0 is possible if regstclass is \b or \B,
778 and the fixed substr is ''$.
779 Since minlen is already taken into account, s+1 is before strend;
780 accidentally, minlen >= 1 guaranties no false positives at s + 1
781 even for \b or \B. But (minlen? 1 : 0) below assumes that
782 regstclass does not come from lookahead... */
783 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
784 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 785 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 786 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 787 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 788 : 1);
6eb5f6b9 789 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
790 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
791 : (prog->float_substr
792 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
793 cl_l, strend)
794 : strend);
a1933d95 795 char *startpos = strbeg;
6eb5f6b9
JH
796
797 t = s;
76384e4a 798 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 799 PL_regdata = prog->data;
76384e4a
GS
800 PL_bostr = startpos;
801 }
f33976b4 802 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
803 if (!s) {
804#ifdef DEBUGGING
b7953727 805 char *what = 0;
6eb5f6b9
JH
806#endif
807 if (endpos == strend) {
808 DEBUG_r( PerlIO_printf(Perl_debug_log,
809 "Could not match STCLASS...\n") );
810 goto fail;
811 }
66e933ab
GS
812 DEBUG_r( PerlIO_printf(Perl_debug_log,
813 "This position contradicts STCLASS...\n") );
653099ff
GS
814 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
815 goto fail;
6eb5f6b9
JH
816 /* Contradict one of substrings */
817 if (prog->anchored_substr) {
6eb5f6b9
JH
818 if (prog->anchored_substr == check) {
819 DEBUG_r( what = "anchored" );
820 hop_and_restart:
1aa99e6b 821 s = HOP3c(t, 1, strend);
66e933ab
GS
822 if (s + start_shift + end_shift > strend) {
823 /* XXXX Should be taken into account earlier? */
824 DEBUG_r( PerlIO_printf(Perl_debug_log,
825 "Could not match STCLASS...\n") );
826 goto fail;
827 }
5e39e1e5
HS
828 if (!check)
829 goto giveup;
6eb5f6b9 830 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 831 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
832 what, (long)(s + start_shift - i_strpos)) );
833 goto restart;
834 }
66e933ab 835 /* Have both, check_string is floating */
6eb5f6b9
JH
836 if (t + start_shift >= check_at) /* Contradicts floating=check */
837 goto retry_floating_check;
838 /* Recheck anchored substring, but not floating... */
9041c2e3 839 s = check_at;
5e39e1e5
HS
840 if (!check)
841 goto giveup;
6eb5f6b9 842 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 843 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
844 (long)(other_last - i_strpos)) );
845 goto do_other_anchored;
846 }
60e71179
GS
847 /* Another way we could have checked stclass at the
848 current position only: */
849 if (ml_anch) {
850 s = t = t + 1;
5e39e1e5
HS
851 if (!check)
852 goto giveup;
60e71179 853 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
854 "Looking for /%s^%s/m starting at offset %ld...\n",
855 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 856 goto try_at_offset;
66e933ab 857 }
60e71179
GS
858 if (!prog->float_substr) /* Could have been deleted */
859 goto fail;
6eb5f6b9
JH
860 /* Check is floating subtring. */
861 retry_floating_check:
862 t = check_at - start_shift;
863 DEBUG_r( what = "floating" );
864 goto hop_and_restart;
865 }
b7953727
JH
866 if (t != s) {
867 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 868 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
869 (long)(t - i_strpos), (long)(s - i_strpos))
870 );
871 }
872 else {
873 DEBUG_r(PerlIO_printf(Perl_debug_log,
874 "Does not contradict STCLASS...\n");
875 );
876 }
6eb5f6b9 877 }
5e39e1e5
HS
878 giveup:
879 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
880 PL_colors[4], (check ? "Guessed" : "Giving up"),
881 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 882 return s;
2c2d71f5
JH
883
884 fail_finish: /* Substring not found */
66e933ab
GS
885 if (prog->check_substr) /* could be removed already */
886 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 887 fail:
2c2d71f5 888 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
889 PL_colors[4],PL_colors[5]));
890 return Nullch;
891}
9661b544 892
6eb5f6b9 893/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
894STATIC char *
895S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 896{
6eb5f6b9
JH
897 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
898 char *m;
d8093b23 899 STRLEN ln;
900 unsigned int c1;
901 unsigned int c2;
6eb5f6b9
JH
902 char *e;
903 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 904 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 905
6eb5f6b9
JH
906 /* We know what class it must start with. */
907 switch (OP(c)) {
6eb5f6b9 908 case ANYOF:
a0ed51b3 909 while (s < strend) {
ffc61ed2 910 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
911 if (tmp && (norun || regtry(prog, s)))
912 goto got_it;
913 else
914 tmp = doevery;
a0ed51b3 915 }
6eb5f6b9
JH
916 else
917 tmp = 1;
ffc61ed2 918 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 919 }
6eb5f6b9 920 break;
f33976b4
DB
921 case CANY:
922 while (s < strend) {
923 if (tmp && (norun || regtry(prog, s)))
924 goto got_it;
925 else
926 tmp = doevery;
927 s++;
928 }
929 break;
6eb5f6b9
JH
930 case EXACTF:
931 m = STRING(c);
932 ln = STR_LEN(c);
1aa99e6b 933 if (UTF) {
a2a2844f 934 STRLEN ulen1, ulen2;
e7ae6809
JH
935 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
936 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
937
938 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
939 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
940
941 c1 = utf8_to_uvuni(tmpbuf1, 0);
942 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
943 }
944 else {
945 c1 = *(U8*)m;
946 c2 = PL_fold[c1];
947 }
6eb5f6b9
JH
948 goto do_exactf;
949 case EXACTFL:
950 m = STRING(c);
951 ln = STR_LEN(c);
d8093b23 952 c1 = *(U8*)m;
6eb5f6b9
JH
953 c2 = PL_fold_locale[c1];
954 do_exactf:
955 e = strend - ln;
b3c9acc1 956
6eb5f6b9
JH
957 if (norun && e < s)
958 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
959
960 if (do_utf8) {
961 STRLEN len;
cadb39a9
JH
962 /* The ibcmp_utf8() uses to_uni_fold() which is more
963 * correct folding for Unicode than using lowercase.
964 * However, it doesn't work quite fully since the folding
965 * is a one-to-many mapping and the regex optimizer is
966 * unaware of this, so it may throw out good matches.
967 * Fortunately, not getting this right is allowed
968 * for Unicode Regular Expression Support level 1,
969 * only one-to-one matching is required. --jhi */
1aa99e6b
IH
970 if (c1 == c2)
971 while (s <= e) {
9041c2e3 972 if ( utf8_to_uvchr((U8*)s, &len) == c1
bc517b45 973 && (ln == len ||
332ddc25 974 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
975 m, UTF, ln))
976 && (norun || regtry(prog, s)) )
1aa99e6b
IH
977 goto got_it;
978 s += len;
979 }
980 else
981 while (s <= e) {
9041c2e3 982 UV c = utf8_to_uvchr((U8*)s, &len);
701a277b 983 if ( (c == c1 || c == c2)
bc517b45 984 && (ln == len ||
332ddc25 985 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
986 m, UTF, ln))
987 && (norun || regtry(prog, s)) )
1aa99e6b
IH
988 goto got_it;
989 s += len;
990 }
991 }
992 else {
993 if (c1 == c2)
994 while (s <= e) {
995 if ( *(U8*)s == c1
996 && (ln == 1 || !(OP(c) == EXACTF
997 ? ibcmp(s, m, ln)
998 : ibcmp_locale(s, m, ln)))
999 && (norun || regtry(prog, s)) )
1000 goto got_it;
1001 s++;
1002 }
1003 else
1004 while (s <= e) {
1005 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1006 && (ln == 1 || !(OP(c) == EXACTF
1007 ? ibcmp(s, m, ln)
1008 : ibcmp_locale(s, m, ln)))
1009 && (norun || regtry(prog, s)) )
1010 goto got_it;
1011 s++;
1012 }
b3c9acc1
IZ
1013 }
1014 break;
bbce6d69 1015 case BOUNDL:
3280af22 1016 PL_reg_flags |= RF_tainted;
bbce6d69 1017 /* FALL THROUGH */
a0d0e21e 1018 case BOUND:
ffc61ed2 1019 if (do_utf8) {
12d33761 1020 if (s == PL_bostr)
ffc61ed2
JH
1021 tmp = '\n';
1022 else {
1aa99e6b 1023 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1024
0064a8a9
JH
1025 if (s > (char*)r)
1026 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1027 }
1028 tmp = ((OP(c) == BOUND ?
9041c2e3 1029 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1030 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1031 while (s < strend) {
1032 if (tmp == !(OP(c) == BOUND ?
3568d838 1033 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1034 isALNUM_LC_utf8((U8*)s)))
1035 {
1036 tmp = !tmp;
1037 if ((norun || regtry(prog, s)))
1038 goto got_it;
1039 }
1040 s += UTF8SKIP(s);
a687059c 1041 }
a0d0e21e 1042 }
667bb95a 1043 else {
12d33761 1044 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1045 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1046 while (s < strend) {
1047 if (tmp ==
1048 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1049 tmp = !tmp;
1050 if ((norun || regtry(prog, s)))
1051 goto got_it;
1052 }
1053 s++;
a0ed51b3 1054 }
a0ed51b3 1055 }
6eb5f6b9 1056 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1057 goto got_it;
1058 break;
bbce6d69 1059 case NBOUNDL:
3280af22 1060 PL_reg_flags |= RF_tainted;
bbce6d69 1061 /* FALL THROUGH */
a0d0e21e 1062 case NBOUND:
ffc61ed2 1063 if (do_utf8) {
12d33761 1064 if (s == PL_bostr)
ffc61ed2
JH
1065 tmp = '\n';
1066 else {
1aa99e6b 1067 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1068
0064a8a9
JH
1069 if (s > (char*)r)
1070 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1071 }
1072 tmp = ((OP(c) == NBOUND ?
9041c2e3 1073 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1074 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1075 while (s < strend) {
1076 if (tmp == !(OP(c) == NBOUND ?
3568d838 1077 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1078 isALNUM_LC_utf8((U8*)s)))
1079 tmp = !tmp;
1080 else if ((norun || regtry(prog, s)))
1081 goto got_it;
1082 s += UTF8SKIP(s);
1083 }
a0d0e21e 1084 }
667bb95a 1085 else {
12d33761 1086 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1087 tmp = ((OP(c) == NBOUND ?
1088 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1089 while (s < strend) {
1090 if (tmp ==
1091 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1092 tmp = !tmp;
1093 else if ((norun || regtry(prog, s)))
1094 goto got_it;
1095 s++;
1096 }
a0ed51b3 1097 }
6eb5f6b9 1098 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1099 goto got_it;
1100 break;
a0d0e21e 1101 case ALNUM:
ffc61ed2 1102 if (do_utf8) {
8269fa76 1103 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1104 while (s < strend) {
3568d838 1105 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1106 if (tmp && (norun || regtry(prog, s)))
1107 goto got_it;
1108 else
1109 tmp = doevery;
1110 }
bbce6d69 1111 else
ffc61ed2
JH
1112 tmp = 1;
1113 s += UTF8SKIP(s);
bbce6d69 1114 }
bbce6d69 1115 }
ffc61ed2
JH
1116 else {
1117 while (s < strend) {
1118 if (isALNUM(*s)) {
1119 if (tmp && (norun || regtry(prog, s)))
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
a0ed51b3 1124 else
ffc61ed2
JH
1125 tmp = 1;
1126 s++;
a0ed51b3 1127 }
a0ed51b3
LW
1128 }
1129 break;
bbce6d69 1130 case ALNUML:
3280af22 1131 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1132 if (do_utf8) {
1133 while (s < strend) {
1134 if (isALNUM_LC_utf8((U8*)s)) {
1135 if (tmp && (norun || regtry(prog, s)))
1136 goto got_it;
1137 else
1138 tmp = doevery;
1139 }
a687059c 1140 else
ffc61ed2
JH
1141 tmp = 1;
1142 s += UTF8SKIP(s);
a0d0e21e 1143 }
a0d0e21e 1144 }
ffc61ed2
JH
1145 else {
1146 while (s < strend) {
1147 if (isALNUM_LC(*s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
a0ed51b3 1153 else
ffc61ed2
JH
1154 tmp = 1;
1155 s++;
a0ed51b3 1156 }
a0ed51b3
LW
1157 }
1158 break;
a0d0e21e 1159 case NALNUM:
ffc61ed2 1160 if (do_utf8) {
8269fa76 1161 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1162 while (s < strend) {
3568d838 1163 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1164 if (tmp && (norun || regtry(prog, s)))
1165 goto got_it;
1166 else
1167 tmp = doevery;
1168 }
bbce6d69 1169 else
ffc61ed2
JH
1170 tmp = 1;
1171 s += UTF8SKIP(s);
bbce6d69 1172 }
bbce6d69 1173 }
ffc61ed2
JH
1174 else {
1175 while (s < strend) {
1176 if (!isALNUM(*s)) {
1177 if (tmp && (norun || regtry(prog, s)))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
a0ed51b3 1182 else
ffc61ed2
JH
1183 tmp = 1;
1184 s++;
a0ed51b3 1185 }
a0ed51b3
LW
1186 }
1187 break;
bbce6d69 1188 case NALNUML:
3280af22 1189 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1190 if (do_utf8) {
1191 while (s < strend) {
1192 if (!isALNUM_LC_utf8((U8*)s)) {
1193 if (tmp && (norun || regtry(prog, s)))
1194 goto got_it;
1195 else
1196 tmp = doevery;
1197 }
a687059c 1198 else
ffc61ed2
JH
1199 tmp = 1;
1200 s += UTF8SKIP(s);
a687059c 1201 }
a0d0e21e 1202 }
ffc61ed2
JH
1203 else {
1204 while (s < strend) {
1205 if (!isALNUM_LC(*s)) {
1206 if (tmp && (norun || regtry(prog, s)))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
a0ed51b3 1211 else
ffc61ed2
JH
1212 tmp = 1;
1213 s++;
a0ed51b3 1214 }
a0ed51b3
LW
1215 }
1216 break;
a0d0e21e 1217 case SPACE:
ffc61ed2 1218 if (do_utf8) {
8269fa76 1219 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1220 while (s < strend) {
3568d838 1221 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1222 if (tmp && (norun || regtry(prog, s)))
1223 goto got_it;
1224 else
1225 tmp = doevery;
1226 }
a0d0e21e 1227 else
ffc61ed2
JH
1228 tmp = 1;
1229 s += UTF8SKIP(s);
2304df62 1230 }
a0d0e21e 1231 }
ffc61ed2
JH
1232 else {
1233 while (s < strend) {
1234 if (isSPACE(*s)) {
1235 if (tmp && (norun || regtry(prog, s)))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
a0ed51b3 1240 else
ffc61ed2
JH
1241 tmp = 1;
1242 s++;
a0ed51b3 1243 }
a0ed51b3
LW
1244 }
1245 break;
bbce6d69 1246 case SPACEL:
3280af22 1247 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1248 if (do_utf8) {
1249 while (s < strend) {
1250 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1251 if (tmp && (norun || regtry(prog, s)))
1252 goto got_it;
1253 else
1254 tmp = doevery;
1255 }
bbce6d69 1256 else
ffc61ed2
JH
1257 tmp = 1;
1258 s += UTF8SKIP(s);
bbce6d69 1259 }
bbce6d69 1260 }
ffc61ed2
JH
1261 else {
1262 while (s < strend) {
1263 if (isSPACE_LC(*s)) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
a0ed51b3 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s++;
a0ed51b3 1272 }
a0ed51b3
LW
1273 }
1274 break;
a0d0e21e 1275 case NSPACE:
ffc61ed2 1276 if (do_utf8) {
8269fa76 1277 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1278 while (s < strend) {
3568d838 1279 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1280 if (tmp && (norun || regtry(prog, s)))
1281 goto got_it;
1282 else
1283 tmp = doevery;
1284 }
a0d0e21e 1285 else
ffc61ed2
JH
1286 tmp = 1;
1287 s += UTF8SKIP(s);
a687059c 1288 }
a0d0e21e 1289 }
ffc61ed2
JH
1290 else {
1291 while (s < strend) {
1292 if (!isSPACE(*s)) {
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
a0ed51b3 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s++;
a0ed51b3 1301 }
a0ed51b3
LW
1302 }
1303 break;
bbce6d69 1304 case NSPACEL:
3280af22 1305 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1306 if (do_utf8) {
1307 while (s < strend) {
1308 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1309 if (tmp && (norun || regtry(prog, s)))
1310 goto got_it;
1311 else
1312 tmp = doevery;
1313 }
bbce6d69 1314 else
ffc61ed2
JH
1315 tmp = 1;
1316 s += UTF8SKIP(s);
bbce6d69 1317 }
bbce6d69 1318 }
ffc61ed2
JH
1319 else {
1320 while (s < strend) {
1321 if (!isSPACE_LC(*s)) {
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
a0ed51b3 1327 else
ffc61ed2
JH
1328 tmp = 1;
1329 s++;
a0ed51b3 1330 }
a0ed51b3
LW
1331 }
1332 break;
a0d0e21e 1333 case DIGIT:
ffc61ed2 1334 if (do_utf8) {
8269fa76 1335 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1336 while (s < strend) {
3568d838 1337 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1338 if (tmp && (norun || regtry(prog, s)))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
a0d0e21e 1343 else
ffc61ed2
JH
1344 tmp = 1;
1345 s += UTF8SKIP(s);
2b69d0c2 1346 }
a0d0e21e 1347 }
ffc61ed2
JH
1348 else {
1349 while (s < strend) {
1350 if (isDIGIT(*s)) {
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
a0ed51b3 1356 else
ffc61ed2
JH
1357 tmp = 1;
1358 s++;
a0ed51b3 1359 }
a0ed51b3
LW
1360 }
1361 break;
b8c5462f
JH
1362 case DIGITL:
1363 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1364 if (do_utf8) {
1365 while (s < strend) {
1366 if (isDIGIT_LC_utf8((U8*)s)) {
1367 if (tmp && (norun || regtry(prog, s)))
1368 goto got_it;
1369 else
1370 tmp = doevery;
1371 }
b8c5462f 1372 else
ffc61ed2
JH
1373 tmp = 1;
1374 s += UTF8SKIP(s);
b8c5462f 1375 }
b8c5462f 1376 }
ffc61ed2
JH
1377 else {
1378 while (s < strend) {
1379 if (isDIGIT_LC(*s)) {
1380 if (tmp && (norun || regtry(prog, s)))
1381 goto got_it;
1382 else
1383 tmp = doevery;
1384 }
b8c5462f 1385 else
ffc61ed2
JH
1386 tmp = 1;
1387 s++;
b8c5462f 1388 }
b8c5462f
JH
1389 }
1390 break;
a0d0e21e 1391 case NDIGIT:
ffc61ed2 1392 if (do_utf8) {
8269fa76 1393 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1394 while (s < strend) {
3568d838 1395 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1396 if (tmp && (norun || regtry(prog, s)))
1397 goto got_it;
1398 else
1399 tmp = doevery;
1400 }
a0d0e21e 1401 else
ffc61ed2
JH
1402 tmp = 1;
1403 s += UTF8SKIP(s);
a687059c 1404 }
a0d0e21e 1405 }
ffc61ed2
JH
1406 else {
1407 while (s < strend) {
1408 if (!isDIGIT(*s)) {
1409 if (tmp && (norun || regtry(prog, s)))
1410 goto got_it;
1411 else
1412 tmp = doevery;
1413 }
a0ed51b3 1414 else
ffc61ed2
JH
1415 tmp = 1;
1416 s++;
a0ed51b3 1417 }
a0ed51b3
LW
1418 }
1419 break;
b8c5462f
JH
1420 case NDIGITL:
1421 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1422 if (do_utf8) {
1423 while (s < strend) {
1424 if (!isDIGIT_LC_utf8((U8*)s)) {
1425 if (tmp && (norun || regtry(prog, s)))
1426 goto got_it;
1427 else
1428 tmp = doevery;
1429 }
b8c5462f 1430 else
ffc61ed2
JH
1431 tmp = 1;
1432 s += UTF8SKIP(s);
b8c5462f 1433 }
a0ed51b3 1434 }
ffc61ed2
JH
1435 else {
1436 while (s < strend) {
1437 if (!isDIGIT_LC(*s)) {
1438 if (tmp && (norun || regtry(prog, s)))
1439 goto got_it;
1440 else
1441 tmp = doevery;
1442 }
cf93c79d 1443 else
ffc61ed2
JH
1444 tmp = 1;
1445 s++;
b8c5462f 1446 }
b8c5462f
JH
1447 }
1448 break;
b3c9acc1 1449 default:
3c3eec57
GS
1450 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1451 break;
d6a28714 1452 }
6eb5f6b9
JH
1453 return 0;
1454 got_it:
1455 return s;
1456}
1457
1458/*
1459 - regexec_flags - match a regexp against a string
1460 */
1461I32
1462Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1463 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1464/* strend: pointer to null at end of string */
1465/* strbeg: real beginning of string */
1466/* minend: end of match must be >=minend after stringarg. */
1467/* data: May be used for some additional optimizations. */
1468/* nosave: For optimizations. */
1469{
6eb5f6b9
JH
1470 register char *s;
1471 register regnode *c;
1472 register char *startpos = stringarg;
6eb5f6b9
JH
1473 I32 minlen; /* must match at least this many chars */
1474 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1475 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1476 constant substr. */ /* CC */
1477 I32 end_shift = 0; /* Same for the end. */ /* CC */
1478 I32 scream_pos = -1; /* Internal iterator of scream. */
1479 char *scream_olds;
1480 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1481 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1482#ifdef DEBUGGING
ce333219 1483 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
2a782b5b 1484#endif
6eb5f6b9
JH
1485
1486 PL_regcc = 0;
1487
1488 cache_re(prog);
1489#ifdef DEBUGGING
aea4f609 1490 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1491#endif
1492
1493 /* Be paranoid... */
1494 if (prog == NULL || startpos == NULL) {
1495 Perl_croak(aTHX_ "NULL regexp parameter");
1496 return 0;
1497 }
1498
1499 minlen = prog->minlen;
a72c7584
JH
1500 if (strend - startpos < minlen) {
1501 DEBUG_r(PerlIO_printf(Perl_debug_log,
1502 "String too short [regexec_flags]...\n"));
1503 goto phooey;
1aa99e6b 1504 }
6eb5f6b9 1505
6eb5f6b9
JH
1506 /* Check validity of program. */
1507 if (UCHARAT(prog->program) != REG_MAGIC) {
1508 Perl_croak(aTHX_ "corrupted regexp program");
1509 }
1510
1511 PL_reg_flags = 0;
1512 PL_reg_eval_set = 0;
1513 PL_reg_maxiter = 0;
1514
1515 if (prog->reganch & ROPT_UTF8)
1516 PL_reg_flags |= RF_utf8;
1517
1518 /* Mark beginning of line for ^ and lookbehind. */
1519 PL_regbol = startpos;
1520 PL_bostr = strbeg;
1521 PL_reg_sv = sv;
1522
1523 /* Mark end of line for $ (and such) */
1524 PL_regeol = strend;
1525
1526 /* see how far we have to get to not match where we matched before */
1527 PL_regtill = startpos+minend;
1528
1529 /* We start without call_cc context. */
1530 PL_reg_call_cc = 0;
1531
1532 /* If there is a "must appear" string, look for it. */
1533 s = startpos;
1534
1535 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1536 MAGIC *mg;
1537
1538 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1539 PL_reg_ganch = startpos;
1540 else if (sv && SvTYPE(sv) >= SVt_PVMG
1541 && SvMAGIC(sv)
14befaf4
DM
1542 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1543 && mg->mg_len >= 0) {
6eb5f6b9
JH
1544 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1545 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1546 if (s > PL_reg_ganch)
6eb5f6b9
JH
1547 goto phooey;
1548 s = PL_reg_ganch;
1549 }
1550 }
1551 else /* pos() not defined */
1552 PL_reg_ganch = strbeg;
1553 }
1554
699c3c34
JH
1555 if (do_utf8 == (UTF!=0) &&
1556 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1557 re_scream_pos_data d;
1558
1559 d.scream_olds = &scream_olds;
1560 d.scream_pos = &scream_pos;
1561 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1562 if (!s) {
1563 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1564 goto phooey; /* not present */
3fa9c3d7 1565 }
6eb5f6b9
JH
1566 }
1567
2a782b5b 1568 DEBUG_r({
df1ffd02
JH
1569 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1570 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1571 if (!PL_colorset)
1572 reginitcolors();
1573 PerlIO_printf(Perl_debug_log,
1574 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1575 PL_colors[4],PL_colors[5],PL_colors[0],
1576 prog->precomp,
1577 PL_colors[1],
1578 (strlen(prog->precomp) > 60 ? "..." : ""),
1579 PL_colors[0],
1580 (int)(len > 60 ? 60 : len),
1581 s, PL_colors[1],
1582 (len > 60 ? "..." : "")
1583 );
1584 });
6eb5f6b9
JH
1585
1586 /* Simplest case: anchored match need be tried only once. */
1587 /* [unless only anchor is BOL and multiline is set] */
1588 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1589 if (s == startpos && regtry(prog, startpos))
1590 goto got_it;
1591 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1592 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1593 {
1594 char *end;
1595
1596 if (minlen)
1597 dontbother = minlen - 1;
1aa99e6b 1598 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1599 /* for multiline we only have to try after newlines */
1600 if (prog->check_substr) {
1601 if (s == startpos)
1602 goto after_try;
1603 while (1) {
1604 if (regtry(prog, s))
1605 goto got_it;
1606 after_try:
1607 if (s >= end)
1608 goto phooey;
1609 if (prog->reganch & RE_USE_INTUIT) {
1610 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1611 if (!s)
1612 goto phooey;
1613 }
1614 else
1615 s++;
1616 }
1617 } else {
1618 if (s > startpos)
1619 s--;
1620 while (s < end) {
1621 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1622 if (regtry(prog, s))
1623 goto got_it;
1624 }
1625 }
1626 }
1627 }
1628 goto phooey;
1629 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1630 if (regtry(prog, PL_reg_ganch))
1631 goto got_it;
1632 goto phooey;
1633 }
1634
1635 /* Messy cases: unanchored match. */
9041c2e3 1636 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1637 /* we have /x+whatever/ */
1638 /* it must be a one character string (XXXX Except UTF?) */
1639 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1640#ifdef DEBUGGING
1641 int did_match = 0;
1642#endif
1643
1aa99e6b 1644 if (do_utf8) {
6eb5f6b9
JH
1645 while (s < strend) {
1646 if (*s == ch) {
bf93d4cc 1647 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1648 if (regtry(prog, s)) goto got_it;
1649 s += UTF8SKIP(s);
1650 while (s < strend && *s == ch)
1651 s += UTF8SKIP(s);
1652 }
1653 s += UTF8SKIP(s);
1654 }
1655 }
1656 else {
1657 while (s < strend) {
1658 if (*s == ch) {
bf93d4cc 1659 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1660 if (regtry(prog, s)) goto got_it;
1661 s++;
1662 while (s < strend && *s == ch)
1663 s++;
1664 }
1665 s++;
1666 }
1667 }
b7953727 1668 DEBUG_r(if (!did_match)
bf93d4cc 1669 PerlIO_printf(Perl_debug_log,
b7953727
JH
1670 "Did not find anchored character...\n")
1671 );
6eb5f6b9
JH
1672 }
1673 /*SUPPRESS 560*/
1aa99e6b
IH
1674 else if (do_utf8 == (UTF!=0) &&
1675 (prog->anchored_substr != Nullsv
9041c2e3 1676 || (prog->float_substr != Nullsv
1aa99e6b 1677 && prog->float_max_offset < strend - s))) {
9041c2e3 1678 SV *must = prog->anchored_substr
6eb5f6b9 1679 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1680 I32 back_max =
6eb5f6b9 1681 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1682 I32 back_min =
6eb5f6b9 1683 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1684 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1685 -(I32)(CHR_SVLEN(must)
1aa99e6b 1686 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1687 char *last1; /* Last position checked before */
bf93d4cc
GS
1688#ifdef DEBUGGING
1689 int did_match = 0;
1690#endif
6eb5f6b9
JH
1691
1692 if (s > PL_bostr)
1693 last1 = HOPc(s, -1);
1694 else
1695 last1 = s - 1; /* bogus */
1696
1697 /* XXXX check_substr already used to find `s', can optimize if
1698 check_substr==must. */
1699 scream_pos = -1;
1700 dontbother = end_shift;
1701 strend = HOPc(strend, -dontbother);
1702 while ( (s <= last) &&
9041c2e3 1703 ((flags & REXEC_SCREAM)
1aa99e6b 1704 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1705 end_shift, &scream_pos, 0))
1aa99e6b 1706 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1707 (unsigned char*)strend, must,
6eb5f6b9 1708 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1709 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1710 if (HOPc(s, -back_max) > last1) {
1711 last1 = HOPc(s, -back_min);
1712 s = HOPc(s, -back_max);
1713 }
1714 else {
1715 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1716
1717 last1 = HOPc(s, -back_min);
1718 s = t;
1719 }
1aa99e6b 1720 if (do_utf8) {
6eb5f6b9
JH
1721 while (s <= last1) {
1722 if (regtry(prog, s))
1723 goto got_it;
1724 s += UTF8SKIP(s);
1725 }
1726 }
1727 else {
1728 while (s <= last1) {
1729 if (regtry(prog, s))
1730 goto got_it;
1731 s++;
1732 }
1733 }
1734 }
b7953727
JH
1735 DEBUG_r(if (!did_match)
1736 PerlIO_printf(Perl_debug_log,
1737 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1738 ((must == prog->anchored_substr)
1739 ? "anchored" : "floating"),
1740 PL_colors[0],
1741 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1742 SvPVX(must),
b7953727
JH
1743 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1744 );
6eb5f6b9
JH
1745 goto phooey;
1746 }
155aba94 1747 else if ((c = prog->regstclass)) {
66e933ab
GS
1748 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1749 /* don't bother with what can't match */
6eb5f6b9 1750 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1751 DEBUG_r({
1752 SV *prop = sv_newmortal();
1753 regprop(prop, c);
2a782b5b 1754 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1755 });
6eb5f6b9
JH
1756 if (find_byclass(prog, c, s, strend, startpos, 0))
1757 goto got_it;
bf93d4cc 1758 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1759 }
1760 else {
1761 dontbother = 0;
1762 if (prog->float_substr != Nullsv) { /* Trim the end. */
1763 char *last;
d6a28714
JH
1764
1765 if (flags & REXEC_SCREAM) {
1766 last = screaminstr(sv, prog->float_substr, s - strbeg,
1767 end_shift, &scream_pos, 1); /* last one */
1768 if (!last)
ffc61ed2 1769 last = scream_olds; /* Only one occurrence. */
b8c5462f 1770 }
d6a28714
JH
1771 else {
1772 STRLEN len;
1773 char *little = SvPV(prog->float_substr, len);
1774
1775 if (SvTAIL(prog->float_substr)) {
1776 if (memEQ(strend - len + 1, little, len - 1))
1777 last = strend - len + 1;
1778 else if (!PL_multiline)
9041c2e3 1779 last = memEQ(strend - len, little, len)
d6a28714 1780 ? strend - len : Nullch;
b8c5462f 1781 else
d6a28714
JH
1782 goto find_last;
1783 } else {
1784 find_last:
9041c2e3 1785 if (len)
d6a28714 1786 last = rninstr(s, strend, little, little + len);
b8c5462f 1787 else
d6a28714 1788 last = strend; /* matching `$' */
b8c5462f 1789 }
b8c5462f 1790 }
bf93d4cc
GS
1791 if (last == NULL) {
1792 DEBUG_r(PerlIO_printf(Perl_debug_log,
1793 "%sCan't trim the tail, match fails (should not happen)%s\n",
1794 PL_colors[4],PL_colors[5]));
1795 goto phooey; /* Should not happen! */
1796 }
d6a28714
JH
1797 dontbother = strend - last + prog->float_min_offset;
1798 }
1799 if (minlen && (dontbother < minlen))
1800 dontbother = minlen - 1;
1801 strend -= dontbother; /* this one's always in bytes! */
1802 /* We don't know much -- general case. */
1aa99e6b 1803 if (do_utf8) {
d6a28714
JH
1804 for (;;) {
1805 if (regtry(prog, s))
1806 goto got_it;
1807 if (s >= strend)
1808 break;
b8c5462f 1809 s += UTF8SKIP(s);
d6a28714
JH
1810 };
1811 }
1812 else {
1813 do {
1814 if (regtry(prog, s))
1815 goto got_it;
1816 } while (s++ < strend);
1817 }
1818 }
1819
1820 /* Failure. */
1821 goto phooey;
1822
1823got_it:
1824 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1825
1826 if (PL_reg_eval_set) {
1827 /* Preserve the current value of $^R */
1828 if (oreplsv != GvSV(PL_replgv))
1829 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1830 restored, the value remains
1831 the same. */
acfe0abc 1832 restore_pos(aTHX_ 0);
d6a28714
JH
1833 }
1834
1835 /* make sure $`, $&, $', and $digit will work later */
1836 if ( !(flags & REXEC_NOT_FIRST) ) {
1837 if (RX_MATCH_COPIED(prog)) {
1838 Safefree(prog->subbeg);
1839 RX_MATCH_COPIED_off(prog);
1840 }
1841 if (flags & REXEC_COPY_STR) {
1842 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1843
1844 s = savepvn(strbeg, i);
1845 prog->subbeg = s;
1846 prog->sublen = i;
1847 RX_MATCH_COPIED_on(prog);
1848 }
1849 else {
1850 prog->subbeg = strbeg;
1851 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1852 }
1853 }
9041c2e3 1854
d6a28714
JH
1855 return 1;
1856
1857phooey:
bf93d4cc
GS
1858 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1859 PL_colors[4],PL_colors[5]));
d6a28714 1860 if (PL_reg_eval_set)
acfe0abc 1861 restore_pos(aTHX_ 0);
d6a28714
JH
1862 return 0;
1863}
1864
1865/*
1866 - regtry - try match at specific point
1867 */
1868STATIC I32 /* 0 failure, 1 success */
1869S_regtry(pTHX_ regexp *prog, char *startpos)
1870{
d6a28714
JH
1871 register I32 i;
1872 register I32 *sp;
1873 register I32 *ep;
1874 CHECKPOINT lastcp;
1875
02db2b7b
IZ
1876#ifdef DEBUGGING
1877 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1878#endif
d6a28714
JH
1879 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1880 MAGIC *mg;
1881
1882 PL_reg_eval_set = RS_init;
1883 DEBUG_r(DEBUG_s(
b900a521
JH
1884 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1885 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1886 ));
e8347627 1887 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1888 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1889 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1890 SAVETMPS;
1891 /* Apparently this is not needed, judging by wantarray. */
e8347627 1892 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1893 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1894
1895 if (PL_reg_sv) {
1896 /* Make $_ available to executed code. */
1897 if (PL_reg_sv != DEFSV) {
4d1ff10f 1898 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1899 SAVESPTR(DEFSV);
1900 DEFSV = PL_reg_sv;
b8c5462f 1901 }
d6a28714 1902
9041c2e3 1903 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1904 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1905 /* prepare for quick setting of pos */
14befaf4
DM
1906 sv_magic(PL_reg_sv, (SV*)0,
1907 PERL_MAGIC_regex_global, Nullch, 0);
1908 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1909 mg->mg_len = -1;
b8c5462f 1910 }
d6a28714
JH
1911 PL_reg_magic = mg;
1912 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1913 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1914 }
09687e5a 1915 if (!PL_reg_curpm) {
0f79a09d 1916 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1917#ifdef USE_ITHREADS
1918 {
1919 SV* repointer = newSViv(0);
577e12cc 1920 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1921 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1922 av_push(PL_regex_padav,repointer);
1923 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1924 PL_regex_pad = AvARRAY(PL_regex_padav);
1925 }
1926#endif
1927 }
aaa362c4 1928 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1929 PL_reg_oldcurpm = PL_curpm;
1930 PL_curpm = PL_reg_curpm;
1931 if (RX_MATCH_COPIED(prog)) {
1932 /* Here is a serious problem: we cannot rewrite subbeg,
1933 since it may be needed if this match fails. Thus
1934 $` inside (?{}) could fail... */
1935 PL_reg_oldsaved = prog->subbeg;
1936 PL_reg_oldsavedlen = prog->sublen;
1937 RX_MATCH_COPIED_off(prog);
1938 }
1939 else
1940 PL_reg_oldsaved = Nullch;
1941 prog->subbeg = PL_bostr;
1942 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1943 }
1944 prog->startp[0] = startpos - PL_bostr;
1945 PL_reginput = startpos;
1946 PL_regstartp = prog->startp;
1947 PL_regendp = prog->endp;
1948 PL_reglastparen = &prog->lastparen;
a01268b5 1949 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1950 prog->lastparen = 0;
1951 PL_regsize = 0;
1952 DEBUG_r(PL_reg_starttry = startpos);
1953 if (PL_reg_start_tmpl <= prog->nparens) {
1954 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1955 if(PL_reg_start_tmp)
1956 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1957 else
1958 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1959 }
1960
128e8167
JH
1961#ifdef DEBUGGING
1962 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
1963 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
1964 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
1965#endif
1966
d6a28714
JH
1967 /* XXXX What this code is doing here?!!! There should be no need
1968 to do this again and again, PL_reglastparen should take care of
3dd2943c 1969 this! --ilya*/
dafc8851
JH
1970
1971 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1972 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1973 * PL_reglastparen), is not needed at all by the test suite
1974 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1975 * enough, for building DynaLoader, or otherwise this
1976 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1977 * will happen. Meanwhile, this code *is* needed for the
1978 * above-mentioned test suite tests to succeed. The common theme
1979 * on those tests seems to be returning null fields from matches.
1980 * --jhi */
dafc8851 1981#if 1
d6a28714
JH
1982 sp = prog->startp;
1983 ep = prog->endp;
1984 if (prog->nparens) {
09e8ae3b 1985 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1986 *++sp = -1;
1987 *++ep = -1;
1988 }
1989 }
dafc8851 1990#endif
02db2b7b 1991 REGCP_SET(lastcp);
d6a28714
JH
1992 if (regmatch(prog->program + 1)) {
1993 prog->endp[0] = PL_reginput - PL_bostr;
1994 return 1;
1995 }
02db2b7b 1996 REGCP_UNWIND(lastcp);
d6a28714
JH
1997 return 0;
1998}
1999
02db2b7b
IZ
2000#define RE_UNWIND_BRANCH 1
2001#define RE_UNWIND_BRANCHJ 2
2002
2003union re_unwind_t;
2004
2005typedef struct { /* XX: makes sense to enlarge it... */
2006 I32 type;
2007 I32 prev;
2008 CHECKPOINT lastcp;
2009} re_unwind_generic_t;
2010
2011typedef struct {
2012 I32 type;
2013 I32 prev;
2014 CHECKPOINT lastcp;
2015 I32 lastparen;
2016 regnode *next;
2017 char *locinput;
2018 I32 nextchr;
2019#ifdef DEBUGGING
2020 int regindent;
2021#endif
2022} re_unwind_branch_t;
2023
2024typedef union re_unwind_t {
2025 I32 type;
2026 re_unwind_generic_t generic;
2027 re_unwind_branch_t branch;
2028} re_unwind_t;
2029
8ba1375e
MJD
2030#define sayYES goto yes
2031#define sayNO goto no
2032#define sayYES_FINAL goto yes_final
2033#define sayYES_LOUD goto yes_loud
2034#define sayNO_FINAL goto no_final
2035#define sayNO_SILENT goto do_no
2036#define saySAME(x) if (x) goto yes; else goto no
2037
2038#define REPORT_CODE_OFF 24
2039
d6a28714
JH
2040/*
2041 - regmatch - main matching routine
2042 *
2043 * Conceptually the strategy is simple: check to see whether the current
2044 * node matches, call self recursively to see whether the rest matches,
2045 * and then act accordingly. In practice we make some effort to avoid
2046 * recursion, in particular by going through "ordinary" nodes (that don't
2047 * need to know whether the rest of the match failed) by a loop instead of
2048 * by recursion.
2049 */
2050/* [lwall] I've hoisted the register declarations to the outer block in order to
2051 * maybe save a little bit of pushing and popping on the stack. It also takes
2052 * advantage of machines that use a register save mask on subroutine entry.
2053 */
2054STATIC I32 /* 0 failure, 1 success */
2055S_regmatch(pTHX_ regnode *prog)
2056{
d6a28714
JH
2057 register regnode *scan; /* Current node. */
2058 regnode *next; /* Next node. */
2059 regnode *inner; /* Next node in internal branch. */
2060 register I32 nextchr; /* renamed nextchr - nextchar colides with
2061 function of same name */
2062 register I32 n; /* no or next */
b7953727
JH
2063 register I32 ln = 0; /* len or last */
2064 register char *s = Nullch; /* operand or save */
d6a28714 2065 register char *locinput = PL_reginput;
b7953727 2066 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2067 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2068 I32 unwind = 0;
b7953727 2069#if 0
02db2b7b 2070 I32 firstcp = PL_savestack_ix;
b7953727 2071#endif
53c4c00c 2072 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2073#ifdef DEBUGGING
ce333219
JH
2074 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2075 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2076 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2077#endif
02db2b7b 2078
d6a28714
JH
2079#ifdef DEBUGGING
2080 PL_regindent++;
2081#endif
2082
2083 /* Note that nextchr is a byte even in UTF */
2084 nextchr = UCHARAT(locinput);
2085 scan = prog;
2086 while (scan != NULL) {
8ba1375e 2087
2a782b5b 2088 DEBUG_r( {
d6a28714
JH
2089 SV *prop = sv_newmortal();
2090 int docolor = *PL_colors[0];
2091 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2092 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2093 /* The part of the string before starttry has one color
2094 (pref0_len chars), between starttry and current
2095 position another one (pref_len - pref0_len chars),
2096 after the current position the third one.
2097 We assume that pref0_len <= pref_len, otherwise we
2098 decrease pref0_len. */
9041c2e3 2099 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2100 ? (5 + taill) - l : locinput - PL_bostr;
2101 int pref0_len;
d6a28714 2102
df1ffd02 2103 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2104 pref_len++;
2105 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2106 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2107 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2108 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2109 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2110 l--;
d6a28714
JH
2111 if (pref0_len < 0)
2112 pref0_len = 0;
2113 if (pref0_len > pref_len)
2114 pref0_len = pref_len;
2115 regprop(prop, scan);
2a782b5b
JH
2116 {
2117 char *s0 =
df1ffd02 2118 do_utf8 ?
2a782b5b
JH
2119 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2120 pref0_len, 60, 0) :
2121 locinput - pref_len;
df1ffd02
JH
2122 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2123 char *s1 = do_utf8 ?
2a782b5b
JH
2124 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2125 pref_len - pref0_len, 60, 0) :
2126 locinput - pref_len + pref0_len;
df1ffd02
JH
2127 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2128 char *s2 = do_utf8 ?
2a782b5b
JH
2129 pv_uni_display(dsv2, (U8*)locinput,
2130 PL_regeol - locinput, 60, 0) :
2131 locinput;
df1ffd02 2132 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2133 PerlIO_printf(Perl_debug_log,
2134 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2135 (IV)(locinput - PL_bostr),
2136 PL_colors[4],
2137 len0, s0,
2138 PL_colors[5],
2139 PL_colors[2],
2140 len1, s1,
2141 PL_colors[3],
2142 (docolor ? "" : "> <"),
2143 PL_colors[0],
2144 len2, s2,
2145 PL_colors[1],
2146 15 - l - pref_len + 1,
2147 "",
2148 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2149 SvPVX(prop));
2150 }
2151 });
d6a28714
JH
2152
2153 next = scan + NEXT_OFF(scan);
2154 if (next == scan)
2155 next = NULL;
2156
2157 switch (OP(scan)) {
2158 case BOL:
12d33761
HS
2159 if (locinput == PL_bostr || (PL_multiline &&
2160 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2161 {
2162 /* regtill = regbol; */
b8c5462f
JH
2163 break;
2164 }
d6a28714
JH
2165 sayNO;
2166 case MBOL:
12d33761
HS
2167 if (locinput == PL_bostr ||
2168 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2169 {
b8c5462f
JH
2170 break;
2171 }
d6a28714
JH
2172 sayNO;
2173 case SBOL:
c2a73568 2174 if (locinput == PL_bostr)
b8c5462f 2175 break;
d6a28714
JH
2176 sayNO;
2177 case GPOS:
2178 if (locinput == PL_reg_ganch)
2179 break;
2180 sayNO;
2181 case EOL:
2182 if (PL_multiline)
2183 goto meol;
2184 else
2185 goto seol;
2186 case MEOL:
2187 meol:
2188 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2189 sayNO;
b8c5462f 2190 break;
d6a28714
JH
2191 case SEOL:
2192 seol:
2193 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2194 sayNO;
d6a28714 2195 if (PL_regeol - locinput > 1)
b8c5462f 2196 sayNO;
b8c5462f 2197 break;
d6a28714
JH
2198 case EOS:
2199 if (PL_regeol != locinput)
b8c5462f 2200 sayNO;
d6a28714 2201 break;
ffc61ed2 2202 case SANY:
d6a28714 2203 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2204 sayNO;
f33976b4
DB
2205 if (do_utf8) {
2206 locinput += PL_utf8skip[nextchr];
2207 if (locinput > PL_regeol)
2208 sayNO;
2209 nextchr = UCHARAT(locinput);
2210 }
2211 else
2212 nextchr = UCHARAT(++locinput);
2213 break;
2214 case CANY:
2215 if (!nextchr && locinput >= PL_regeol)
2216 sayNO;
b8c5462f 2217 nextchr = UCHARAT(++locinput);
a0d0e21e 2218 break;
ffc61ed2 2219 case REG_ANY:
1aa99e6b
IH
2220 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2221 sayNO;
2222 if (do_utf8) {
b8c5462f 2223 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2224 if (locinput > PL_regeol)
2225 sayNO;
a0ed51b3 2226 nextchr = UCHARAT(locinput);
a0ed51b3 2227 }
1aa99e6b
IH
2228 else
2229 nextchr = UCHARAT(++locinput);
a0ed51b3 2230 break;
d6a28714 2231 case EXACT:
cd439c50
IZ
2232 s = STRING(scan);
2233 ln = STR_LEN(scan);
1aa99e6b 2234 if (do_utf8 != (UTF!=0)) {
bc517b45 2235 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2236 char *l = locinput;
2237 char *e = s + ln;
bc517b45 2238 STRLEN ulen;
a72c7584 2239
5ff6fc6d
JH
2240 if (do_utf8) {
2241 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2242 while (s < e) {
2243 if (l >= PL_regeol)
5ff6fc6d
JH
2244 sayNO;
2245 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2246 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2247 sayNO;
bc517b45 2248 l += ulen;
5ff6fc6d 2249 s ++;
1aa99e6b 2250 }
5ff6fc6d
JH
2251 }
2252 else {
2253 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2254 while (s < e) {
2255 if (l >= PL_regeol)
2256 sayNO;
5ff6fc6d 2257 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2258 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2259 sayNO;
bc517b45 2260 s += ulen;
a72c7584 2261 l ++;
1aa99e6b 2262 }
5ff6fc6d 2263 }
1aa99e6b
IH
2264 locinput = l;
2265 nextchr = UCHARAT(locinput);
2266 break;
2267 }
bc517b45 2268 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2269 /* Inline the first character, for speed. */
2270 if (UCHARAT(s) != nextchr)
2271 sayNO;
2272 if (PL_regeol - locinput < ln)
2273 sayNO;
2274 if (ln > 1 && memNE(s, locinput, ln))
2275 sayNO;
2276 locinput += ln;
2277 nextchr = UCHARAT(locinput);
2278 break;
2279 case EXACTFL:
b8c5462f
JH
2280 PL_reg_flags |= RF_tainted;
2281 /* FALL THROUGH */
d6a28714 2282 case EXACTF:
cd439c50
IZ
2283 s = STRING(scan);
2284 ln = STR_LEN(scan);
d6a28714 2285
bc517b45 2286 {
d6a28714 2287 char *l = locinput;
bc517b45
JH
2288 char *e = s + ln;
2289 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
2290
2291 if (do_utf8 != (UTF!=0)) {
2292 /* The target and the pattern have differing utf8ness. */
2293 STRLEN ulen1, ulen2;
2294 UV cs, cl;
2295
2296 if (do_utf8) {
2297 /* The target is utf8, the pattern is not utf8. */
2298 while (s < e) {
2299 if (l >= PL_regeol)
2300 sayNO;
2301
2302 cs = to_uni_fold(NATIVE_TO_UNI(*(U8*)s),
2303 (U8*)s, &ulen1);
2304 cl = utf8_to_uvchr((U8*)l, &ulen2);
2305
2306 if (cs != cl) {
2307 cl = to_uni_fold(cl, (U8*)l, &ulen2);
2308 if (ulen1 != ulen2 || cs != cl)
2309 sayNO;
2310 }
2311 l += ulen1;
2312 s ++;
2313 }
2314 }
2315 else {
2316 /* The target is not utf8, the pattern is utf8. */
2317 while (s < e) {
2318 if (l >= PL_regeol)
2319 sayNO;
2320
2321 cs = utf8_to_uvchr((U8*)s, &ulen1);
2322
2323 cl = to_uni_fold(NATIVE_TO_UNI(*(U8*)l),
2324 (U8*)l, &ulen2);
2325
2326 if (cs != cl) {
2327 cs = to_uni_fold(cs, (U8*)s, &ulen1);
2328 if (ulen1 != ulen2 || cs != cl)
2329 sayNO;
2330 }
2331 l ++;
2332 s += ulen1;
2333 }
2334 }
2335 locinput = l;
2336 nextchr = UCHARAT(locinput);
2337 break;
2338 }
2339
2340 if (do_utf8 && UTF) {
2341 /* Both the target and the pattern are utf8. */
2342 STRLEN ulen;
2343
2344 while (s < e) {
2345 if (l >= PL_regeol)
2346 sayNO;
2347 if (UTF8SKIP(s) != UTF8SKIP(l) ||
2348 memNE(s, (char*)l, UTF8SKIP(s))) {
2349 to_utf8_fold((U8*)l, tmpbuf, &ulen);
2350 if (UTF8SKIP(s) != ulen ||
2351 memNE(s, (char*)tmpbuf, ulen))
2352 sayNO;
2353 }
2354 l += UTF8SKIP(l);
2355 s += UTF8SKIP(s);
2356 }
2357 locinput = l;
2358 nextchr = UCHARAT(locinput);
2359 break;
b8c5462f 2360 }
a0ed51b3 2361 }
d6a28714 2362
bc517b45
JH
2363 /* Neither the target and the pattern are utf8. */
2364
d6a28714
JH
2365 /* Inline the first character, for speed. */
2366 if (UCHARAT(s) != nextchr &&
2367 UCHARAT(s) != ((OP(scan) == EXACTF)
2368 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2369 sayNO;
d6a28714 2370 if (PL_regeol - locinput < ln)
b8c5462f 2371 sayNO;
d6a28714
JH
2372 if (ln > 1 && (OP(scan) == EXACTF
2373 ? ibcmp(s, locinput, ln)
2374 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2375 sayNO;
d6a28714
JH
2376 locinput += ln;
2377 nextchr = UCHARAT(locinput);
a0d0e21e 2378 break;
d6a28714 2379 case ANYOF:
ffc61ed2
JH
2380 if (do_utf8) {
2381 if (!reginclass(scan, (U8*)locinput, do_utf8))
2382 sayNO;
2383 if (locinput >= PL_regeol)
2384 sayNO;
2385 locinput += PL_utf8skip[nextchr];
b8c5462f 2386 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2387 }
2388 else {
2389 if (nextchr < 0)
2390 nextchr = UCHARAT(locinput);
2391 if (!reginclass(scan, (U8*)locinput, do_utf8))
2392 sayNO;
2393 if (!nextchr && locinput >= PL_regeol)
2394 sayNO;
2395 nextchr = UCHARAT(++locinput);
2396 }
b8c5462f 2397 break;
d6a28714 2398 case ALNUML:
b8c5462f
JH
2399 PL_reg_flags |= RF_tainted;
2400 /* FALL THROUGH */
d6a28714 2401 case ALNUM:
b8c5462f 2402 if (!nextchr)
4633a7c4 2403 sayNO;
ffc61ed2 2404 if (do_utf8) {
ad24be35 2405 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2406 if (!(OP(scan) == ALNUM
3568d838 2407 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2408 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2409 {
2410 sayNO;
a0ed51b3 2411 }
b8c5462f 2412 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2413 nextchr = UCHARAT(locinput);
2414 break;
2415 }
ffc61ed2 2416 if (!(OP(scan) == ALNUM
d6a28714 2417 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2418 sayNO;
b8c5462f 2419 nextchr = UCHARAT(++locinput);
a0d0e21e 2420 break;
d6a28714 2421 case NALNUML:
b8c5462f
JH
2422 PL_reg_flags |= RF_tainted;
2423 /* FALL THROUGH */
d6a28714
JH
2424 case NALNUM:
2425 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2426 sayNO;
ffc61ed2 2427 if (do_utf8) {
8269fa76 2428 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2429 if (OP(scan) == NALNUM
3568d838 2430 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2431 : isALNUM_LC_utf8((U8*)locinput))
2432 {
b8c5462f 2433 sayNO;
d6a28714 2434 }
b8c5462f
JH
2435 locinput += PL_utf8skip[nextchr];
2436 nextchr = UCHARAT(locinput);
2437 break;
2438 }
ffc61ed2 2439 if (OP(scan) == NALNUM
d6a28714 2440 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2441 sayNO;
76e3520e 2442 nextchr = UCHARAT(++locinput);
a0d0e21e 2443 break;
d6a28714
JH
2444 case BOUNDL:
2445 case NBOUNDL:
3280af22 2446 PL_reg_flags |= RF_tainted;
bbce6d69 2447 /* FALL THROUGH */
d6a28714
JH
2448 case BOUND:
2449 case NBOUND:
2450 /* was last char in word? */
ffc61ed2 2451 if (do_utf8) {
12d33761
HS
2452 if (locinput == PL_bostr)
2453 ln = '\n';
ffc61ed2
JH
2454 else {
2455 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2456
2b9d42f0 2457 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2458 }
2459 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2460 ln = isALNUM_uni(ln);
8269fa76 2461 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2462 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2463 }
2464 else {
9041c2e3 2465 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2466 n = isALNUM_LC_utf8((U8*)locinput);
2467 }
a0ed51b3 2468 }
d6a28714 2469 else {
12d33761
HS
2470 ln = (locinput != PL_bostr) ?
2471 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2472 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2473 ln = isALNUM(ln);
2474 n = isALNUM(nextchr);
2475 }
2476 else {
2477 ln = isALNUM_LC(ln);
2478 n = isALNUM_LC(nextchr);
2479 }
d6a28714 2480 }
ffc61ed2
JH
2481 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2482 OP(scan) == BOUNDL))
2483 sayNO;
a0ed51b3 2484 break;
d6a28714 2485 case SPACEL:
3280af22 2486 PL_reg_flags |= RF_tainted;
bbce6d69 2487 /* FALL THROUGH */
d6a28714 2488 case SPACE:
9442cb0e 2489 if (!nextchr)
4633a7c4 2490 sayNO;
1aa99e6b 2491 if (do_utf8) {
fd400ab9 2492 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2493 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2494 if (!(OP(scan) == SPACE
3568d838 2495 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2496 : isSPACE_LC_utf8((U8*)locinput)))
2497 {
2498 sayNO;
2499 }
2500 locinput += PL_utf8skip[nextchr];
2501 nextchr = UCHARAT(locinput);
2502 break;
d6a28714 2503 }
ffc61ed2
JH
2504 if (!(OP(scan) == SPACE
2505 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2506 sayNO;
2507 nextchr = UCHARAT(++locinput);
2508 }
2509 else {
2510 if (!(OP(scan) == SPACE
2511 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2512 sayNO;
2513 nextchr = UCHARAT(++locinput);
a0ed51b3 2514 }
a0ed51b3 2515 break;
d6a28714 2516 case NSPACEL:
3280af22 2517 PL_reg_flags |= RF_tainted;
bbce6d69 2518 /* FALL THROUGH */
d6a28714 2519 case NSPACE:
9442cb0e 2520 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2521 sayNO;
1aa99e6b 2522 if (do_utf8) {
8269fa76 2523 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2524 if (OP(scan) == NSPACE
3568d838 2525 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2526 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2527 {
2528 sayNO;
2529 }
2530 locinput += PL_utf8skip[nextchr];
2531 nextchr = UCHARAT(locinput);
2532 break;
a0ed51b3 2533 }
ffc61ed2 2534 if (OP(scan) == NSPACE
d6a28714 2535 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2536 sayNO;
76e3520e 2537 nextchr = UCHARAT(++locinput);
a0d0e21e 2538 break;
d6a28714 2539 case DIGITL:
a0ed51b3
LW
2540 PL_reg_flags |= RF_tainted;
2541 /* FALL THROUGH */
d6a28714 2542 case DIGIT:
9442cb0e 2543 if (!nextchr)
a0ed51b3 2544 sayNO;
1aa99e6b 2545 if (do_utf8) {
8269fa76 2546 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2547 if (!(OP(scan) == DIGIT
3568d838 2548 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2549 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2550 {
a0ed51b3 2551 sayNO;
dfe13c55 2552 }
6f06b55f 2553 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2554 nextchr = UCHARAT(locinput);
2555 break;
2556 }
ffc61ed2 2557 if (!(OP(scan) == DIGIT
9442cb0e 2558 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2559 sayNO;
2560 nextchr = UCHARAT(++locinput);
2561 break;
d6a28714 2562 case NDIGITL:
b8c5462f
JH
2563 PL_reg_flags |= RF_tainted;
2564 /* FALL THROUGH */
d6a28714 2565 case NDIGIT:
9442cb0e 2566 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2567 sayNO;
1aa99e6b 2568 if (do_utf8) {
8269fa76 2569 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2570 if (OP(scan) == NDIGIT
3568d838 2571 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2572 : isDIGIT_LC_utf8((U8*)locinput))
2573 {
a0ed51b3 2574 sayNO;
9442cb0e 2575 }
6f06b55f 2576 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2577 nextchr = UCHARAT(locinput);
2578 break;
2579 }
ffc61ed2 2580 if (OP(scan) == NDIGIT
9442cb0e 2581 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2582 sayNO;
2583 nextchr = UCHARAT(++locinput);
2584 break;
2585 case CLUMP:
8269fa76 2586 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2587 if (locinput >= PL_regeol ||
2588 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2589 sayNO;
6f06b55f 2590 locinput += PL_utf8skip[nextchr];
3568d838
JH
2591 while (locinput < PL_regeol &&
2592 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2593 locinput += UTF8SKIP(locinput);
2594 if (locinput > PL_regeol)
2595 sayNO;
2596 nextchr = UCHARAT(locinput);
2597 break;
c8756f30 2598 case REFFL:
3280af22 2599 PL_reg_flags |= RF_tainted;
c8756f30 2600 /* FALL THROUGH */
c277df42 2601 case REF:
c8756f30 2602 case REFF:
c277df42 2603 n = ARG(scan); /* which paren pair */
cf93c79d 2604 ln = PL_regstartp[n];
2c2d71f5 2605 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2606 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2607 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2608 if (ln == PL_regendp[n])
a0d0e21e 2609 break;
a0ed51b3 2610
cf93c79d 2611 s = PL_bostr + ln;
1aa99e6b 2612 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2613 char *l = locinput;
cf93c79d 2614 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2615 /*
2616 * Note that we can't do the "other character" lookup trick as
2617 * in the 8-bit case (no pun intended) because in Unicode we
2618 * have to map both upper and title case to lower case.
2619 */
2620 if (OP(scan) == REFF) {
a2a2844f 2621 STRLEN ulen1, ulen2;
e7ae6809
JH
2622 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2623 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2624 while (s < e) {
2625 if (l >= PL_regeol)
2626 sayNO;
a2a2844f
JH
2627 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2628 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2629 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2630 sayNO;
a2a2844f
JH
2631 s += ulen1;
2632 l += ulen2;
a0ed51b3
LW
2633 }
2634 }
2635 locinput = l;
2636 nextchr = UCHARAT(locinput);
2637 break;
2638 }
2639
a0d0e21e 2640 /* Inline the first character, for speed. */
76e3520e 2641 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2642 (OP(scan) == REF ||
2643 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2644 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2645 sayNO;
cf93c79d 2646 ln = PL_regendp[n] - ln;
3280af22 2647 if (locinput + ln > PL_regeol)
4633a7c4 2648 sayNO;
c8756f30
AK
2649 if (ln > 1 && (OP(scan) == REF
2650 ? memNE(s, locinput, ln)
2651 : (OP(scan) == REFF
2652 ? ibcmp(s, locinput, ln)
2653 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2654 sayNO;
a0d0e21e 2655 locinput += ln;
76e3520e 2656 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2657 break;
2658
2659 case NOTHING:
c277df42 2660 case TAIL:
a0d0e21e
LW
2661 break;
2662 case BACK:
2663 break;
c277df42
IZ
2664 case EVAL:
2665 {
2666 dSP;
533c011a 2667 OP_4tree *oop = PL_op;
3280af22
NIS
2668 COP *ocurcop = PL_curcop;
2669 SV **ocurpad = PL_curpad;
c277df42 2670 SV *ret;
9041c2e3 2671
c277df42 2672 n = ARG(scan);
533c011a 2673 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2674 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2675 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2676 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2677
8e5e9ebe
RGS
2678 {
2679 SV **before = SP;
2680 CALLRUNOPS(aTHX); /* Scalar context. */
2681 SPAGAIN;
2682 if (SP == before)
2683 ret = Nullsv; /* protect against empty (?{}) blocks. */
2684 else {
2685 ret = POPs;
2686 PUTBACK;
2687 }
2688 }
2689
0f5d15d6
IZ
2690 PL_op = oop;
2691 PL_curpad = ocurpad;
2692 PL_curcop = ocurcop;
c277df42 2693 if (logical) {
0f5d15d6
IZ
2694 if (logical == 2) { /* Postponed subexpression. */
2695 regexp *re;
22c35a8c 2696 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2697 re_cc_state state;
0f5d15d6
IZ
2698 CHECKPOINT cp, lastcp;
2699
2700 if(SvROK(ret) || SvRMAGICAL(ret)) {
2701 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2702
2703 if(SvMAGICAL(sv))
14befaf4 2704 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2705 }
2706 if (mg) {
2707 re = (regexp *)mg->mg_obj;
df0003d4 2708 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2709 }
2710 else {
2711 STRLEN len;
2712 char *t = SvPV(ret, len);
2713 PMOP pm;
2714 char *oprecomp = PL_regprecomp;
2715 I32 osize = PL_regsize;
2716 I32 onpar = PL_regnpar;
2717
5fcd1c1b 2718 Zero(&pm, 1, PMOP);
cea2e8a9 2719 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2720 if (!(SvFLAGS(ret)
0f5d15d6 2721 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2722 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2723 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2724 PL_regprecomp = oprecomp;
2725 PL_regsize = osize;
2726 PL_regnpar = onpar;
2727 }
2728 DEBUG_r(
9041c2e3 2729 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2730 "Entering embedded `%s%.60s%s%s'\n",
2731 PL_colors[0],
2732 re->precomp,
2733 PL_colors[1],
2734 (strlen(re->precomp) > 60 ? "..." : ""))
2735 );
2736 state.node = next;
2737 state.prev = PL_reg_call_cc;
2738 state.cc = PL_regcc;
2739 state.re = PL_reg_re;
2740
2ab05381 2741 PL_regcc = 0;
9041c2e3 2742
0f5d15d6 2743 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2744 REGCP_SET(lastcp);
0f5d15d6
IZ
2745 cache_re(re);
2746 state.ss = PL_savestack_ix;
2747 *PL_reglastparen = 0;
a01268b5 2748 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2749 PL_reg_call_cc = &state;
2750 PL_reginput = locinput;
2c2d71f5
JH
2751
2752 /* XXXX This is too dramatic a measure... */
2753 PL_reg_maxiter = 0;
2754
0f5d15d6 2755 if (regmatch(re->program + 1)) {
2c914db6
IZ
2756 /* Even though we succeeded, we need to restore
2757 global variables, since we may be wrapped inside
2758 SUSPEND, thus the match may be not finished yet. */
2759
2760 /* XXXX Do this only if SUSPENDed? */
2761 PL_reg_call_cc = state.prev;
2762 PL_regcc = state.cc;
2763 PL_reg_re = state.re;
2764 cache_re(PL_reg_re);
2765
2766 /* XXXX This is too dramatic a measure... */
2767 PL_reg_maxiter = 0;
2768
2769 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2770 ReREFCNT_dec(re);
2771 regcpblow(cp);
2772 sayYES;
2773 }
0f5d15d6 2774 ReREFCNT_dec(re);
02db2b7b 2775 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2776 regcppop();
2777 PL_reg_call_cc = state.prev;
2778 PL_regcc = state.cc;
2779 PL_reg_re = state.re;
d3790889 2780 cache_re(PL_reg_re);
2c2d71f5
JH
2781
2782 /* XXXX This is too dramatic a measure... */
2783 PL_reg_maxiter = 0;
2784
8e514ae6 2785 logical = 0;
0f5d15d6
IZ
2786 sayNO;
2787 }
c277df42 2788 sw = SvTRUE(ret);
0f5d15d6 2789 logical = 0;
a0ed51b3
LW
2790 }
2791 else
3280af22 2792 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2793 break;
2794 }
a0d0e21e 2795 case OPEN:
c277df42 2796 n = ARG(scan); /* which paren pair */
3280af22
NIS
2797 PL_reg_start_tmp[n] = locinput;
2798 if (n > PL_regsize)
2799 PL_regsize = n;
a0d0e21e
LW
2800 break;
2801 case CLOSE:
c277df42 2802 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2803 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2804 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2805 if (n > *PL_reglastparen)
2806 *PL_reglastparen = n;
a01268b5 2807 *PL_reglastcloseparen = n;
a0d0e21e 2808 break;
c277df42
IZ
2809 case GROUPP:
2810 n = ARG(scan); /* which paren pair */
cf93c79d 2811 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2812 break;
2813 case IFTHEN:
2c2d71f5 2814 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2815 if (sw)
2816 next = NEXTOPER(NEXTOPER(scan));
2817 else {
2818 next = scan + ARG(scan);
2819 if (OP(next) == IFTHEN) /* Fake one. */
2820 next = NEXTOPER(NEXTOPER(next));
2821 }
2822 break;
2823 case LOGICAL:
0f5d15d6 2824 logical = scan->flags;
c277df42 2825 break;
2ab05381
IZ
2826/*******************************************************************
2827 PL_regcc contains infoblock about the innermost (...)* loop, and
2828 a pointer to the next outer infoblock.
2829
2830 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2831
2832 1) After matching X, regnode for CURLYX is processed;
2833
9041c2e3 2834 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2835 regmatch() recursively with the starting point at WHILEM node;
2836
2837 3) Each hit of WHILEM node tries to match A and Z (in the order
2838 depending on the current iteration, min/max of {min,max} and
2839 greediness). The information about where are nodes for "A"
2840 and "Z" is read from the infoblock, as is info on how many times "A"
2841 was already matched, and greediness.
2842
2843 4) After A matches, the same WHILEM node is hit again.
2844
2845 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2846 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2847 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2848 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2849 of the external loop.
2850
2851 Currently present infoblocks form a tree with a stem formed by PL_curcc
2852 and whatever it mentions via ->next, and additional attached trees
2853 corresponding to temporarily unset infoblocks as in "5" above.
2854
9041c2e3 2855 In the following picture infoblocks for outer loop of
2ab05381
IZ
2856 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2857 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2858 infoblocks are drawn below the "reset" infoblock.
2859
2860 In fact in the picture below we do not show failed matches for Z and T
2861 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2862 more obvious *why* one needs to *temporary* unset infoblocks.]
2863
2864 Matched REx position InfoBlocks Comment
2865 (Y(A)*?Z)*?T x
2866 Y(A)*?Z)*?T x <- O
2867 Y (A)*?Z)*?T x <- O
2868 Y A)*?Z)*?T x <- O <- I
2869 YA )*?Z)*?T x <- O <- I
2870 YA A)*?Z)*?T x <- O <- I
2871 YAA )*?Z)*?T x <- O <- I
2872 YAA Z)*?T x <- O # Temporary unset I
2873 I
2874
2875 YAAZ Y(A)*?Z)*?T x <- O
2876 I
2877
2878 YAAZY (A)*?Z)*?T x <- O
2879 I
2880
2881 YAAZY A)*?Z)*?T x <- O <- I
2882 I
2883
2884 YAAZYA )*?Z)*?T x <- O <- I
2885 I
2886
2887 YAAZYA Z)*?T x <- O # Temporary unset I
2888 I,I
2889
2890 YAAZYAZ )*?T x <- O
2891 I,I
2892
2893 YAAZYAZ T x # Temporary unset O
2894 O
2895 I,I
2896
2897 YAAZYAZT x
2898 O
2899 I,I
2900 *******************************************************************/
a0d0e21e
LW
2901 case CURLYX: {
2902 CURCUR cc;
3280af22 2903 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2904 /* No need to save/restore up to this paren */
2905 I32 parenfloor = scan->flags;
c277df42
IZ
2906
2907 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2908 next += ARG(next);
3280af22
NIS
2909 cc.oldcc = PL_regcc;
2910 PL_regcc = &cc;
cb434fcc
IZ
2911 /* XXXX Probably it is better to teach regpush to support
2912 parenfloor > PL_regsize... */
2913 if (parenfloor > *PL_reglastparen)
2914 parenfloor = *PL_reglastparen; /* Pessimization... */
2915 cc.parenfloor = parenfloor;
a0d0e21e
LW
2916 cc.cur = -1;
2917 cc.min = ARG1(scan);
2918 cc.max = ARG2(scan);
c277df42 2919 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2920 cc.next = next;
2921 cc.minmod = minmod;
2922 cc.lastloc = 0;
3280af22 2923 PL_reginput = locinput;
a0d0e21e
LW
2924 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2925 regcpblow(cp);
3280af22 2926 PL_regcc = cc.oldcc;
4633a7c4 2927 saySAME(n);
a0d0e21e
LW
2928 }
2929 /* NOT REACHED */
2930 case WHILEM: {
2931 /*
2932 * This is really hard to understand, because after we match
2933 * what we're trying to match, we must make sure the rest of
2c2d71f5 2934 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2935 * to go back UP the parse tree by recursing ever deeper. And
2936 * if it fails, we have to reset our parent's current state
2937 * that we can try again after backing off.
2938 */
2939
c277df42 2940 CHECKPOINT cp, lastcp;
3280af22 2941 CURCUR* cc = PL_regcc;
c277df42
IZ
2942 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2943
4633a7c4 2944 n = cc->cur + 1; /* how many we know we matched */
3280af22 2945 PL_reginput = locinput;
a0d0e21e 2946
c277df42 2947 DEBUG_r(
9041c2e3
NIS
2948 PerlIO_printf(Perl_debug_log,
2949 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2950 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2951 (long)n, (long)cc->min,
c277df42
IZ
2952 (long)cc->max, (long)cc)
2953 );
4633a7c4 2954
a0d0e21e
LW
2955 /* If degenerate scan matches "", assume scan done. */
2956
579cf2c3 2957 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2958 PL_regcc = cc->oldcc;
2ab05381
IZ
2959 if (PL_regcc)
2960 ln = PL_regcc->cur;
c277df42 2961 DEBUG_r(
c3464db5
DD
2962 PerlIO_printf(Perl_debug_log,
2963 "%*s empty match detected, try continuation...\n",
3280af22 2964 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2965 );
a0d0e21e 2966 if (regmatch(cc->next))
4633a7c4 2967 sayYES;
2ab05381
IZ
2968 if (PL_regcc)
2969 PL_regcc->cur = ln;
3280af22 2970 PL_regcc = cc;
4633a7c4 2971 sayNO;
a0d0e21e
LW
2972 }
2973
2974 /* First just match a string of min scans. */
2975
2976 if (n < cc->min) {
2977 cc->cur = n;
2978 cc->lastloc = locinput;
4633a7c4
LW
2979 if (regmatch(cc->scan))
2980 sayYES;
2981 cc->cur = n - 1;
c277df42 2982 cc->lastloc = lastloc;
4633a7c4 2983 sayNO;
a0d0e21e
LW
2984 }
2985
2c2d71f5
JH
2986 if (scan->flags) {
2987 /* Check whether we already were at this position.
2988 Postpone detection until we know the match is not
2989 *that* much linear. */
2990 if (!PL_reg_maxiter) {
2991 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2992 PL_reg_leftiter = PL_reg_maxiter;
2993 }
2994 if (PL_reg_leftiter-- == 0) {
2995 I32 size = (PL_reg_maxiter + 7)/8;
2996 if (PL_reg_poscache) {
2997 if (PL_reg_poscache_size < size) {
2998 Renew(PL_reg_poscache, size, char);
2999 PL_reg_poscache_size = size;
3000 }
3001 Zero(PL_reg_poscache, size, char);
3002 }
3003 else {
3004 PL_reg_poscache_size = size;
3005 Newz(29, PL_reg_poscache, size, char);
3006 }
3007 DEBUG_r(
3008 PerlIO_printf(Perl_debug_log,
3009 "%sDetected a super-linear match, switching on caching%s...\n",
3010 PL_colors[4], PL_colors[5])
3011 );
3012 }
3013 if (PL_reg_leftiter < 0) {
3014 I32 o = locinput - PL_bostr, b;
3015
3016 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3017 b = o % 8;
3018 o /= 8;
3019 if (PL_reg_poscache[o] & (1<<b)) {
3020 DEBUG_r(
3021 PerlIO_printf(Perl_debug_log,
3022 "%*s already tried at this position...\n",
3023 REPORT_CODE_OFF+PL_regindent*2, "")
3024 );
7821416a 3025 sayNO_SILENT;
2c2d71f5
JH
3026 }
3027 PL_reg_poscache[o] |= (1<<b);
3028 }
3029 }
3030
a0d0e21e
LW
3031 /* Prefer next over scan for minimal matching. */
3032
3033 if (cc->minmod) {
3280af22 3034 PL_regcc = cc->oldcc;
2ab05381
IZ
3035 if (PL_regcc)
3036 ln = PL_regcc->cur;
5f05dabc 3037 cp = regcppush(cc->parenfloor);
02db2b7b 3038 REGCP_SET(lastcp);
5f05dabc 3039 if (regmatch(cc->next)) {
c277df42 3040 regcpblow(cp);
4633a7c4 3041 sayYES; /* All done. */
5f05dabc 3042 }
02db2b7b 3043 REGCP_UNWIND(lastcp);
5f05dabc 3044 regcppop();
2ab05381
IZ
3045 if (PL_regcc)
3046 PL_regcc->cur = ln;
3280af22 3047 PL_regcc = cc;
a0d0e21e 3048
c277df42 3049 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3050 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3051 && !(PL_reg_flags & RF_warned)) {
3052 PL_reg_flags |= RF_warned;
e476b1b5 3053 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3054 "Complex regular subexpression recursion",
3055 REG_INFTY - 1);
c277df42 3056 }
4633a7c4 3057 sayNO;
c277df42 3058 }
a687059c 3059
c277df42 3060 DEBUG_r(
c3464db5
DD
3061 PerlIO_printf(Perl_debug_log,
3062 "%*s trying longer...\n",
3280af22 3063 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3064 );
a0d0e21e 3065 /* Try scanning more and see if it helps. */
3280af22 3066 PL_reginput = locinput;
a0d0e21e
LW
3067 cc->cur = n;
3068 cc->lastloc = locinput;
5f05dabc 3069 cp = regcppush(cc->parenfloor);
02db2b7b 3070 REGCP_SET(lastcp);
5f05dabc 3071 if (regmatch(cc->scan)) {
c277df42 3072 regcpblow(cp);
4633a7c4 3073 sayYES;
5f05dabc 3074 }
02db2b7b 3075 REGCP_UNWIND(lastcp);
5f05dabc 3076 regcppop();
4633a7c4 3077 cc->cur = n - 1;
c277df42 3078 cc->lastloc = lastloc;
4633a7c4 3079 sayNO;
a0d0e21e
LW
3080 }
3081
3082 /* Prefer scan over next for maximal matching. */
3083
3084 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3085 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3086 cc->cur = n;
3087 cc->lastloc = locinput;
02db2b7b 3088 REGCP_SET(lastcp);
5f05dabc 3089 if (regmatch(cc->scan)) {
c277df42 3090 regcpblow(cp);
4633a7c4 3091 sayYES;
5f05dabc 3092 }
02db2b7b 3093 REGCP_UNWIND(lastcp);
a0d0e21e 3094 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3095 PL_reginput = locinput;
c277df42 3096 DEBUG_r(
c3464db5
DD
3097 PerlIO_printf(Perl_debug_log,
3098 "%*s failed, try continuation...\n",
3280af22 3099 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3100 );
3101 }
9041c2e3 3102 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3103 && !(PL_reg_flags & RF_warned)) {
3280af22 3104 PL_reg_flags |= RF_warned;
e476b1b5 3105 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3106 "Complex regular subexpression recursion",
3107 REG_INFTY - 1);
a0d0e21e
LW
3108 }
3109
3110 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3111 PL_regcc = cc->oldcc;
2ab05381
IZ
3112 if (PL_regcc)
3113 ln = PL_regcc->cur;
a0d0e21e 3114 if (regmatch(cc->next))
4633a7c4 3115 sayYES;
2ab05381
IZ
3116 if (PL_regcc)
3117 PL_regcc->cur = ln;
3280af22 3118 PL_regcc = cc;
4633a7c4 3119 cc->cur = n - 1;
c277df42 3120 cc->lastloc = lastloc;
4633a7c4 3121 sayNO;
a0d0e21e
LW
3122 }
3123 /* NOT REACHED */
9041c2e3 3124 case BRANCHJ:
c277df42
IZ
3125 next = scan + ARG(scan);
3126 if (next == scan)
3127 next = NULL;
3128 inner = NEXTOPER(NEXTOPER(scan));
3129 goto do_branch;
9041c2e3 3130 case BRANCH:
c277df42
IZ
3131 inner = NEXTOPER(scan);
3132 do_branch:
3133 {
c277df42
IZ
3134 c1 = OP(scan);
3135 if (OP(next) != c1) /* No choice. */
3136 next = inner; /* Avoid recursion. */
a0d0e21e 3137 else {
02db2b7b
IZ
3138 I32 lastparen = *PL_reglastparen;
3139 I32 unwind1;
3140 re_unwind_branch_t *uw;
3141
3142 /* Put unwinding data on stack */
3143 unwind1 = SSNEWt(1,re_unwind_branch_t);
3144 uw = SSPTRt(unwind1,re_unwind_branch_t);
3145 uw->prev = unwind;
3146 unwind = unwind1;
3147 uw->type = ((c1 == BRANCH)
3148 ? RE_UNWIND_BRANCH
3149 : RE_UNWIND_BRANCHJ);
3150 uw->lastparen = lastparen;
3151 uw->next = next;
3152 uw->locinput = locinput;
3153 uw->nextchr = nextchr;
3154#ifdef DEBUGGING
3155 uw->regindent = ++PL_regindent;
3156#endif
c277df42 3157
02db2b7b
IZ
3158 REGCP_SET(uw->lastcp);
3159
3160 /* Now go into the first branch */
3161 next = inner;
a687059c 3162 }
a0d0e21e
LW
3163 }
3164 break;
3165 case MINMOD:
3166 minmod = 1;
3167 break;
c277df42
IZ
3168 case CURLYM:
3169 {
00db4c45 3170 I32 l = 0;
c277df42 3171 CHECKPOINT lastcp;
9041c2e3 3172
c277df42
IZ
3173 /* We suppose that the next guy does not need
3174 backtracking: in particular, it is of constant length,
3175 and has no parenths to influence future backrefs. */
3176 ln = ARG1(scan); /* min to match */
3177 n = ARG2(scan); /* max to match */
c277df42
IZ
3178 paren = scan->flags;
3179 if (paren) {
3280af22
NIS
3180 if (paren > PL_regsize)
3181 PL_regsize = paren;
3182 if (paren > *PL_reglastparen)
3183 *PL_reglastparen = paren;
c277df42 3184 }
dc45a647 3185 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3186 if (paren)
3187 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3188 PL_reginput = locinput;
c277df42
IZ
3189 if (minmod) {
3190 minmod = 0;
3191 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3192 sayNO;
f31a99c8
HS
3193 /* if we matched something zero-length we don't need to
3194 backtrack - capturing parens are already defined, so
3195 the caveat in the maximal case doesn't apply
3196
3197 XXXX if ln == 0, we can redo this check first time
3198 through the following loop
3199 */
3200 if (ln && l == 0)
3201 n = ln; /* don't backtrack */
3280af22 3202 locinput = PL_reginput;
cca55fe3 3203 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3204 regnode *text_node = next;
3205
cca55fe3 3206 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3207
cca55fe3 3208 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3209 else {
cca55fe3
JP
3210 if (PL_regkind[(U8)OP(text_node)] == REF) {
3211 I32 n, ln;
3212 n = ARG(text_node); /* which paren pair */
3213 ln = PL_regstartp[n];
3214 /* assume yes if we haven't seen CLOSEn */
3215 if (
3216 *PL_reglastparen < n ||
3217 ln == -1 ||
3218 ln == PL_regendp[n]
3219 ) {
3220 c1 = c2 = -1000;
3221 goto assume_ok_MM;
3222 }
3223 c1 = *(PL_bostr + ln);
3224 }
3225 else { c1 = (U8)*STRING(text_node); }
af5decee 3226 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3227 c2 = PL_fold[c1];
af5decee 3228 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3229 c2 = PL_fold_locale[c1];
3230 else
3231 c2 = c1;
3232 }
a0ed51b3
LW
3233 }
3234 else
c277df42 3235 c1 = c2 = -1000;
cca55fe3 3236 assume_ok_MM:
02db2b7b 3237 REGCP_SET(lastcp);
5f4b28b2 3238 /* This may be improved if l == 0. */
c277df42
IZ
3239 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3240 /* If it could work, try it. */
3241 if (c1 == -1000 ||
3280af22
NIS
3242 UCHARAT(PL_reginput) == c1 ||
3243 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3244 {
3245 if (paren) {
f31a99c8 3246 if (ln) {
cf93c79d
IZ
3247 PL_regstartp[paren] =
3248 HOPc(PL_reginput, -l) - PL_bostr;
3249 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3250 }
3251 else
cf93c79d 3252 PL_regendp[paren] = -1;
c277df42
IZ
3253 }
3254 if (regmatch(next))
3255 sayYES;
02db2b7b 3256 REGCP_UNWIND(lastcp);
c277df42
IZ
3257 }
3258 /* Couldn't or didn't -- move forward. */
3280af22 3259 PL_reginput = locinput;
c277df42
IZ
3260 if (regrepeat_hard(scan, 1, &l)) {
3261 ln++;
3280af22 3262 locinput = PL_reginput;
c277df42
IZ
3263 }
3264 else
3265 sayNO;
3266 }
a0ed51b3
LW
3267 }
3268 else {
c277df42 3269 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3270 /* if we matched something zero-length we don't need to
3271 backtrack, unless the minimum count is zero and we
3272 are capturing the result - in that case the capture
3273 being defined or not may affect later execution
3274 */
3275 if (n != 0 && l == 0 && !(paren && ln == 0))
3276 ln = n; /* don't backtrack */
3280af22 3277 locinput = PL_reginput;
c277df42 3278 DEBUG_r(
5c0ca799 3279 PerlIO_printf(Perl_debug_log,
faccc32b 3280 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3281 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3282 (IV) n, (IV)l)
c277df42
IZ
3283 );
3284 if (n >= ln) {
cca55fe3 3285 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3286 regnode *text_node = next;
3287
cca55fe3 3288 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3289
cca55fe3 3290 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3291 else {
cca55fe3
JP
3292 if (PL_regkind[(U8)OP(text_node)] == REF) {
3293 I32 n, ln;
3294 n = ARG(text_node); /* which paren pair */
3295 ln = PL_regstartp[n];
3296 /* assume yes if we haven't seen CLOSEn */
3297 if (
3298 *PL_reglastparen < n ||
3299 ln == -1 ||
3300 ln == PL_regendp[n]
3301 ) {
3302 c1 = c2 = -1000;
3303 goto assume_ok_REG;
3304 }
3305 c1 = *(PL_bostr + ln);
3306 }
3307 else { c1 = (U8)*STRING(text_node); }
3308
af5decee 3309 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3310 c2 = PL_fold[c1];
af5decee 3311 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3312 c2 = PL_fold_locale[c1];
3313 else
3314 c2 = c1;
3315 }
a0ed51b3
LW
3316 }
3317 else
c277df42
IZ
3318 c1 = c2 = -1000;
3319 }
cca55fe3 3320 assume_ok_REG:
02db2b7b 3321 REGCP_SET(lastcp);
c277df42
IZ
3322 while (n >= ln) {
3323 /* If it could work, try it. */
3324 if (c1 == -1000 ||
3280af22
NIS
3325 UCHARAT(PL_reginput) == c1 ||
3326 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3327 {
3328 DEBUG_r(
c3464db5 3329 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3330 "%*s trying tail with n=%"IVdf"...\n",
3331 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3332 );
3333 if (paren) {
3334 if (n) {
cf93c79d
IZ
3335 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3336 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3337 }
a0ed51b3 3338 else
cf93c79d 3339 PL_regendp[paren] = -1;
c277df42 3340 }
a0ed51b3
LW
3341 if (regmatch(next))
3342 sayYES;
02db2b7b 3343 REGCP_UNWIND(lastcp);
a0ed51b3 3344 }
c277df42
IZ
3345 /* Couldn't or didn't -- back up. */
3346 n--;
dfe13c55 3347 locinput = HOPc(locinput, -l);
3280af22 3348 PL_reginput = locinput;
c277df42
IZ
3349 }
3350 }
3351 sayNO;
3352 break;
3353 }
3354 case CURLYN:
3355 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3356 if (paren > PL_regsize)
3357 PL_regsize = paren;
3358 if (paren > *PL_reglastparen)
3359 *PL_reglastparen = paren;
c277df42
IZ
3360 ln = ARG1(scan); /* min to match */
3361 n = ARG2(scan); /* max to match */
dc45a647 3362 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3363 goto repeat;
a0d0e21e 3364 case CURLY:
c277df42 3365 paren = 0;
a0d0e21e
LW
3366 ln = ARG1(scan); /* min to match */
3367 n = ARG2(scan); /* max to match */
dc45a647 3368 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3369 goto repeat;
3370 case STAR:
3371 ln = 0;
c277df42 3372 n = REG_INFTY;
a0d0e21e 3373 scan = NEXTOPER(scan);
c277df42 3374 paren = 0;
a0d0e21e
LW
3375 goto repeat;
3376 case PLUS:
c277df42
IZ
3377 ln = 1;
3378 n = REG_INFTY;
3379 scan = NEXTOPER(scan);
3380 paren = 0;
3381 repeat:
a0d0e21e
LW
3382 /*
3383 * Lookahead to avoid useless match attempts
3384 * when we know what character comes next.
3385 */
5f80c4cf
JP
3386
3387 /*
3388 * Used to only do .*x and .*?x, but now it allows
3389 * for )'s, ('s and (?{ ... })'s to be in the way
3390 * of the quantifier and the EXACT-like node. -- japhy
3391 */
3392
cca55fe3 3393 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3394 U8 *s;
3395 regnode *text_node = next;
3396
cca55fe3 3397 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3398
cca55fe3 3399 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3400 else {
cca55fe3
JP
3401 if (PL_regkind[(U8)OP(text_node)] == REF) {
3402 I32 n, ln;
3403 n = ARG(text_node); /* which paren pair */
3404 ln = PL_regstartp[n];
3405 /* assume yes if we haven't seen CLOSEn */
3406 if (
3407 *PL_reglastparen < n ||
3408 ln == -1 ||
3409 ln == PL_regendp[n]
3410 ) {
3411 c1 = c2 = -1000;
3412 goto assume_ok_easy;
3413 }
9246c65e 3414 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3415 }
3416 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3417
3418 if (!UTF) {
3419 c2 = c1 = *s;
f65d3ee7 3420 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3421 c2 = PL_fold[c1];
f65d3ee7 3422 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3423 c2 = PL_fold_locale[c1];
1aa99e6b 3424 }
5f80c4cf 3425 else { /* UTF */
f65d3ee7 3426 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3427 STRLEN ulen1, ulen2;
e7ae6809
JH
3428 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3429 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3430
3431 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3432 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3433
3434 c1 = utf8_to_uvuni(tmpbuf1, 0);
3435 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3436 }
3437 else {
3438 c2 = c1 = utf8_to_uvchr(s, NULL);
3439 }
1aa99e6b
IH
3440 }
3441 }
bbce6d69 3442 }
a0d0e21e 3443 else
bbce6d69 3444 c1 = c2 = -1000;
cca55fe3 3445 assume_ok_easy:
3280af22 3446 PL_reginput = locinput;
a0d0e21e 3447 if (minmod) {
c277df42 3448 CHECKPOINT lastcp;
a0d0e21e
LW
3449 minmod = 0;
3450 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3451 sayNO;
a0ed51b3 3452 locinput = PL_reginput;
02db2b7b 3453 REGCP_SET(lastcp);
0fe9bf95 3454 if (c1 != -1000) {
1aa99e6b 3455 char *e; /* Should not check after this */
0fe9bf95
IZ
3456 char *old = locinput;
3457
1aa99e6b 3458 if (n == REG_INFTY) {
0fe9bf95 3459 e = PL_regeol - 1;
1aa99e6b
IH
3460 if (do_utf8)
3461 while (UTF8_IS_CONTINUATION(*(U8*)e))
3462 e--;
3463 }
3464 else if (do_utf8) {
3465 int m = n - ln;
3466 for (e = locinput;
3467 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3468 e += UTF8SKIP(e);
3469 }
3470 else {
3471 e = locinput + n - ln;
3472 if (e >= PL_regeol)
3473 e = PL_regeol - 1;
3474 }
0fe9bf95 3475 while (1) {
1aa99e6b 3476 int count;
0fe9bf95 3477 /* Find place 'next' could work */
1aa99e6b
IH
3478 if (!do_utf8) {
3479 if (c1 == c2) {
a8e8ab15
JH
3480 while (locinput <= e &&
3481 UCHARAT(locinput) != c1)
1aa99e6b
IH
3482 locinput++;
3483 } else {
9041c2e3 3484 while (locinput <= e
a8e8ab15
JH
3485 && UCHARAT(locinput) != c1
3486 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3487 locinput++;
3488 }
3489 count = locinput - old;
3490 }
3491 else {
3492 STRLEN len;
3493 if (c1 == c2) {
3494 for (count = 0;
3495 locinput <= e &&
9041c2e3 3496 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3497 count++)
3498 locinput += len;
3499
3500 } else {
3501 for (count = 0; locinput <= e; count++) {
9041c2e3 3502 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3503 if (c == c1 || c == c2)
3504 break;
9041c2e3 3505 locinput += len;
1aa99e6b
IH
3506 }
3507 }
0fe9bf95 3508 }
9041c2e3 3509 if (locinput > e)
0fe9bf95
IZ
3510 sayNO;
3511 /* PL_reginput == old now */
3512 if (locinput != old) {
3513 ln = 1; /* Did some */
1aa99e6b 3514 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3515 sayNO;
3516 }
3517 /* PL_reginput == locinput now */
29d1e993 3518 TRYPAREN(paren, ln, locinput);
0fe9bf95 3519 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3520 REGCP_UNWIND(lastcp);
0fe9bf95 3521 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3522 old = locinput;
3523 if (do_utf8)
3524 locinput += UTF8SKIP(locinput);
3525 else
3526 locinput++;
0fe9bf95
IZ
3527 }
3528 }
3529 else
c277df42 3530 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3531 UV c;
3532 if (c1 != -1000) {
3533 if (do_utf8)
9041c2e3 3534 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3535 else
9041c2e3 3536 c = UCHARAT(PL_reginput);
2390ecbc
PP
3537 /* If it could work, try it. */
3538 if (c == c1 || c == c2)
3539 {
3540 TRYPAREN(paren, n, PL_reginput);
3541 REGCP_UNWIND(lastcp);
3542 }
1aa99e6b 3543 }
a0d0e21e 3544 /* If it could work, try it. */
2390ecbc 3545 else if (c1 == -1000)
bbce6d69 3546 {
29d1e993 3547 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3548 REGCP_UNWIND(lastcp);
bbce6d69 3549 }
c277df42 3550 /* Couldn't or didn't -- move forward. */
a0ed51b3 3551 PL_reginput = locinput;
a0d0e21e
LW
3552 if (regrepeat(scan, 1)) {
3553 ln++;
a0ed51b3
LW
3554 locinput = PL_reginput;
3555 }
3556 else
4633a7c4 3557 sayNO;
a0d0e21e
LW
3558 }
3559 }
3560 else {
c277df42 3561 CHECKPOINT lastcp;
a0d0e21e 3562 n = regrepeat(scan, n);
a0ed51b3 3563 locinput = PL_reginput;
22c35a8c 3564 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3565 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3566 ln = n; /* why back off? */
1aeab75a
GS
3567 /* ...because $ and \Z can match before *and* after
3568 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3569 We should back off by one in this case. */
3570 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3571 ln--;
3572 }
02db2b7b 3573 REGCP_SET(lastcp);
c277df42 3574 if (paren) {
8fa7f367 3575 UV c = 0;
c277df42 3576 while (n >= ln) {
1aa99e6b
IH
3577 if (c1 != -1000) {
3578 if (do_utf8)
9041c2e3 3579 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3580 else
9041c2e3 3581 c = UCHARAT(PL_reginput);
1aa99e6b 3582 }
c277df42 3583 /* If it could work, try it. */
1aa99e6b 3584 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3585 {
29d1e993