This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Rework the make logic (again).
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
2a782b5b 396 DEBUG_r({
40eddc46
JH
397 char*s = UTF ? sv_uni_display(dsv, sv, 60, 0) : strpos;
398 int len = UTF ? strlen(s) : strend - strpos;
2a782b5b
JH
399 if (!PL_colorset)
400 reginitcolors();
401 PerlIO_printf(Perl_debug_log,
402 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
403 PL_colors[4],PL_colors[5],PL_colors[0],
404 prog->precomp,
405 PL_colors[1],
406 (strlen(prog->precomp) > 60 ? "..." : ""),
407 PL_colors[0],
408 (int)(len > 60 ? 60 : len),
409 s, PL_colors[1],
410 (len > 60 ? "..." : "")
411 );
412 });
cad2e5aa 413
1aa99e6b
IH
414 if (prog->reganch & ROPT_UTF8)
415 PL_reg_flags |= RF_utf8;
416
417 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
418 DEBUG_r(PerlIO_printf(Perl_debug_log,
419 "String too short... [re_intuit_start]\n"));
cad2e5aa 420 goto fail;
2c2d71f5 421 }
a1933d95 422 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 423 PL_regeol = strend;
653099ff 424 check = prog->check_substr;
2c2d71f5 425 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
426 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
427 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 428 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 429
7e25d62c
JH
430 if (!ml_anch) {
431 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
432 | ROPT_IMPLICIT)) /* not a real BOL */
433 /* SvCUR is not set on references: SvRV and SvPVX overlap */
434 && sv && !SvROK(sv)
435 && (strpos != strbeg)) {
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
437 goto fail;
438 }
439 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 440 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 441 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
442 I32 slen;
443
1aa99e6b 444 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
445 if (SvTAIL(check)) {
446 slen = SvCUR(check); /* >= 1 */
cad2e5aa 447
9041c2e3 448 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
449 || (strend - s == slen && strend[-1] != '\n')) {
450 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
451 goto fail_finish;
cad2e5aa
JH
452 }
453 /* Now should match s[0..slen-2] */
454 slen--;
653099ff 455 if (slen && (*SvPVX(check) != *s
cad2e5aa 456 || (slen > 1
653099ff 457 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
458 report_neq:
459 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
460 goto fail_finish;
461 }
cad2e5aa 462 }
653099ff
GS
463 else if (*SvPVX(check) != *s
464 || ((slen = SvCUR(check)) > 1
465 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
466 goto report_neq;
467 goto success_at_start;
7e25d62c 468 }
cad2e5aa 469 }
2c2d71f5 470 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 471 s = strpos;
2c2d71f5 472 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 473 end_shift = prog->minlen - start_shift -
653099ff 474 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 475 if (!ml_anch) {
653099ff
GS
476 I32 end = prog->check_offset_max + CHR_SVLEN(check)
477 - (SvTAIL(check) != 0);
1aa99e6b 478 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
479
480 if (end_shift < eshift)
481 end_shift = eshift;
482 }
cad2e5aa 483 }
2c2d71f5 484 else { /* Can match at random position */
cad2e5aa
JH
485 ml_anch = 0;
486 s = strpos;
2c2d71f5
JH
487 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
488 /* Should be nonnegative! */
489 end_shift = prog->minlen - start_shift -
653099ff 490 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
491 }
492
2c2d71f5 493#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 494 if (end_shift < 0)
6bbae5e6 495 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
496#endif
497
2c2d71f5
JH
498 restart:
499 /* Find a possible match in the region s..strend by looking for
500 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 501 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
502 I32 p = -1; /* Internal iterator of scream. */
503 I32 *pp = data ? data->scream_pos : &p;
504
2c2d71f5
JH
505 if (PL_screamfirst[BmRARE(check)] >= 0
506 || ( BmRARE(check) == '\n'
507 && (BmPREVIOUS(check) == SvCUR(check) - 1)
508 && SvTAIL(check) ))
9041c2e3 509 s = screaminstr(sv, check,
2c2d71f5 510 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 511 else
2c2d71f5 512 goto fail_finish;
cad2e5aa
JH
513 if (data)
514 *data->scream_olds = s;
515 }
f33976b4 516 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
517 s = fbm_instr((U8*)(s + start_shift),
518 (U8*)(strend - end_shift),
519 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 520 else
1aa99e6b
IH
521 s = fbm_instr(HOP3(s, start_shift, strend),
522 HOP3(strend, -end_shift, strbeg),
2c2d71f5 523 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
524
525 /* Update the count-of-usability, remove useless subpatterns,
526 unshift s. */
2c2d71f5
JH
527
528 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
529 (s ? "Found" : "Did not find"),
530 ((check == prog->anchored_substr) ? "anchored" : "floating"),
531 PL_colors[0],
7b0972df
JH
532 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
533 SvPVX(check),
2c2d71f5
JH
534 PL_colors[1], (SvTAIL(check) ? "$" : ""),
535 (s ? " at offset " : "...\n") ) );
536
537 if (!s)
538 goto fail_finish;
539
6eb5f6b9
JH
540 check_at = s;
541
2c2d71f5 542 /* Finish the diagnostic message */
30944b6d 543 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
544
545 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
546 Start with the other substr.
547 XXXX no SCREAM optimization yet - and a very coarse implementation
548 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
549 *always* match. Probably should be marked during compile...
550 Probably it is right to do no SCREAM here...
551 */
552
553 if (prog->float_substr && prog->anchored_substr) {
30944b6d 554 /* Take into account the "other" substring. */
2c2d71f5
JH
555 /* XXXX May be hopelessly wrong for UTF... */
556 if (!other_last)
6eb5f6b9 557 other_last = strpos;
2c2d71f5 558 if (check == prog->float_substr) {
30944b6d
IZ
559 do_other_anchored:
560 {
1aa99e6b 561 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
562 char *s1 = s;
563
2c2d71f5
JH
564 t = s - prog->check_offset_max;
565 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
566 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 567 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 568 && t > strpos)))
30944b6d 569 /* EMPTY */;
2c2d71f5
JH
570 else
571 t = strpos;
1aa99e6b 572 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
573 if (t < other_last) /* These positions already checked */
574 t = other_last;
1aa99e6b 575 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
576 if (last < last1)
577 last1 = last;
578 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
579 /* On end-of-str: see comment below. */
580 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
581 HOP3(HOP3(last1, prog->anchored_offset, strend)
582 + SvCUR(prog->anchored_substr),
583 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
584 prog->anchored_substr,
585 PL_multiline ? FBMrf_MULTILINE : 0);
586 DEBUG_r(PerlIO_printf(Perl_debug_log,
587 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
588 (s ? "Found" : "Contradicts"),
589 PL_colors[0],
7b0972df
JH
590 (int)(SvCUR(prog->anchored_substr)
591 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
592 SvPVX(prog->anchored_substr),
593 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
594 if (!s) {
595 if (last1 >= last2) {
596 DEBUG_r(PerlIO_printf(Perl_debug_log,
597 ", giving up...\n"));
598 goto fail_finish;
599 }
600 DEBUG_r(PerlIO_printf(Perl_debug_log,
601 ", trying floating at offset %ld...\n",
1aa99e6b
IH
602 (long)(HOP3c(s1, 1, strend) - i_strpos)));
603 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
604 s = HOP3c(last, 1, strend);
2c2d71f5
JH
605 goto restart;
606 }
607 else {
608 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 609 (long)(s - i_strpos)));
1aa99e6b
IH
610 t = HOP3c(s, -prog->anchored_offset, strbeg);
611 other_last = HOP3c(s, 1, strend);
30944b6d 612 s = s1;
2c2d71f5
JH
613 if (t == strpos)
614 goto try_at_start;
2c2d71f5
JH
615 goto try_at_offset;
616 }
30944b6d 617 }
2c2d71f5
JH
618 }
619 else { /* Take into account the floating substring. */
620 char *last, *last1;
621 char *s1 = s;
622
1aa99e6b
IH
623 t = HOP3c(s, -start_shift, strbeg);
624 last1 = last =
625 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
626 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
627 last = HOP3c(t, prog->float_max_offset, strend);
628 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
629 if (s < other_last)
630 s = other_last;
2c2d71f5
JH
631 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
632 /* fbm_instr() takes into account exact value of end-of-str
633 if the check is SvTAIL(ed). Since false positives are OK,
634 and end-of-str is not later than strend we are OK. */
635 s = fbm_instr((unsigned char*)s,
636 (unsigned char*)last + SvCUR(prog->float_substr)
637 - (SvTAIL(prog->float_substr)!=0),
638 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
639 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
640 (s ? "Found" : "Contradicts"),
641 PL_colors[0],
7b0972df
JH
642 (int)(SvCUR(prog->float_substr)
643 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
644 SvPVX(prog->float_substr),
645 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
646 if (!s) {
647 if (last1 == last) {
648 DEBUG_r(PerlIO_printf(Perl_debug_log,
649 ", giving up...\n"));
650 goto fail_finish;
651 }
652 DEBUG_r(PerlIO_printf(Perl_debug_log,
653 ", trying anchored starting at offset %ld...\n",
30944b6d 654 (long)(s1 + 1 - i_strpos)));
803ff556 655 other_last = last;
1aa99e6b 656 s = HOP3c(t, 1, strend);
2c2d71f5
JH
657 goto restart;
658 }
659 else {
660 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 661 (long)(s - i_strpos)));
803ff556 662 other_last = s; /* Fix this later. --Hugo */
30944b6d 663 s = s1;
2c2d71f5
JH
664 if (t == strpos)
665 goto try_at_start;
2c2d71f5
JH
666 goto try_at_offset;
667 }
668 }
cad2e5aa 669 }
2c2d71f5
JH
670
671 t = s - prog->check_offset_max;
2c2d71f5
JH
672 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
673 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
674 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
675 && t > strpos))) {
2c2d71f5
JH
676 /* Fixed substring is found far enough so that the match
677 cannot start at strpos. */
678 try_at_offset:
cad2e5aa 679 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
680 /* Eventually fbm_*() should handle this, but often
681 anchored_offset is not 0, so this check will not be wasted. */
682 /* XXXX In the code below we prefer to look for "^" even in
683 presence of anchored substrings. And we search even
684 beyond the found float position. These pessimizations
685 are historical artefacts only. */
686 find_anchor:
2c2d71f5 687 while (t < strend - prog->minlen) {
cad2e5aa 688 if (*t == '\n') {
4ee3650e 689 if (t < check_at - prog->check_offset_min) {
30944b6d 690 if (prog->anchored_substr) {
4ee3650e
GS
691 /* Since we moved from the found position,
692 we definitely contradict the found anchored
30944b6d
IZ
693 substr. Due to the above check we do not
694 contradict "check" substr.
695 Thus we can arrive here only if check substr
696 is float. Redo checking for "other"=="fixed".
697 */
9041c2e3 698 strpos = t + 1;
30944b6d
IZ
699 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
700 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
701 goto do_other_anchored;
702 }
4ee3650e
GS
703 /* We don't contradict the found floating substring. */
704 /* XXXX Why not check for STCLASS? */
cad2e5aa 705 s = t + 1;
2c2d71f5 706 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 707 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
708 goto set_useful;
709 }
4ee3650e
GS
710 /* Position contradicts check-string */
711 /* XXXX probably better to look for check-string
712 than for "\n", so one should lower the limit for t? */
713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 714 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 715 other_last = strpos = s = t + 1;
cad2e5aa
JH
716 goto restart;
717 }
718 t++;
719 }
2c2d71f5
JH
720 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
721 PL_colors[0],PL_colors[1]));
722 goto fail_finish;
cad2e5aa 723 }
f5952150
GS
724 else {
725 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
726 PL_colors[0],PL_colors[1]));
727 }
cad2e5aa
JH
728 s = t;
729 set_useful:
2c2d71f5 730 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
731 }
732 else {
f5952150 733 /* The found string does not prohibit matching at strpos,
2c2d71f5 734 - no optimization of calling REx engine can be performed,
f5952150
GS
735 unless it was an MBOL and we are not after MBOL,
736 or a future STCLASS check will fail this. */
2c2d71f5
JH
737 try_at_start:
738 /* Even in this situation we may use MBOL flag if strpos is offset
739 wrt the start of the string. */
05b4157f 740 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 741 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
742 /* May be due to an implicit anchor of m{.*foo} */
743 && !(prog->reganch & ROPT_IMPLICIT))
744 {
cad2e5aa
JH
745 t = strpos;
746 goto find_anchor;
747 }
30944b6d 748 DEBUG_r( if (ml_anch)
f5952150
GS
749 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
750 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 751 );
2c2d71f5 752 success_at_start:
30944b6d 753 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 754 && prog->check_substr /* Could be deleted already */
cad2e5aa 755 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
756 && prog->check_substr == prog->float_substr)
757 {
cad2e5aa 758 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 759 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
760 SvREFCNT_dec(prog->check_substr);
761 prog->check_substr = Nullsv; /* disable */
762 prog->float_substr = Nullsv; /* clear */
5e39e1e5 763 check = Nullsv; /* abort */
cad2e5aa 764 s = strpos;
3cf5c195
IZ
765 /* XXXX This is a remnant of the old implementation. It
766 looks wasteful, since now INTUIT can use many
6eb5f6b9 767 other heuristics. */
cad2e5aa
JH
768 prog->reganch &= ~RE_USE_INTUIT;
769 }
770 else
771 s = strpos;
772 }
773
6eb5f6b9
JH
774 /* Last resort... */
775 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
776 if (prog->regstclass) {
777 /* minlen == 0 is possible if regstclass is \b or \B,
778 and the fixed substr is ''$.
779 Since minlen is already taken into account, s+1 is before strend;
780 accidentally, minlen >= 1 guaranties no false positives at s + 1
781 even for \b or \B. But (minlen? 1 : 0) below assumes that
782 regstclass does not come from lookahead... */
783 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
784 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 785 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 786 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 787 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 788 : 1);
6eb5f6b9 789 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
790 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
791 : (prog->float_substr
792 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
793 cl_l, strend)
794 : strend);
a1933d95 795 char *startpos = strbeg;
6eb5f6b9
JH
796
797 t = s;
76384e4a 798 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 799 PL_regdata = prog->data;
76384e4a
GS
800 PL_bostr = startpos;
801 }
f33976b4 802 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
803 if (!s) {
804#ifdef DEBUGGING
b7953727 805 char *what = 0;
6eb5f6b9
JH
806#endif
807 if (endpos == strend) {
808 DEBUG_r( PerlIO_printf(Perl_debug_log,
809 "Could not match STCLASS...\n") );
810 goto fail;
811 }
66e933ab
GS
812 DEBUG_r( PerlIO_printf(Perl_debug_log,
813 "This position contradicts STCLASS...\n") );
653099ff
GS
814 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
815 goto fail;
6eb5f6b9
JH
816 /* Contradict one of substrings */
817 if (prog->anchored_substr) {
6eb5f6b9
JH
818 if (prog->anchored_substr == check) {
819 DEBUG_r( what = "anchored" );
820 hop_and_restart:
1aa99e6b 821 s = HOP3c(t, 1, strend);
66e933ab
GS
822 if (s + start_shift + end_shift > strend) {
823 /* XXXX Should be taken into account earlier? */
824 DEBUG_r( PerlIO_printf(Perl_debug_log,
825 "Could not match STCLASS...\n") );
826 goto fail;
827 }
5e39e1e5
HS
828 if (!check)
829 goto giveup;
6eb5f6b9 830 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 831 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
832 what, (long)(s + start_shift - i_strpos)) );
833 goto restart;
834 }
66e933ab 835 /* Have both, check_string is floating */
6eb5f6b9
JH
836 if (t + start_shift >= check_at) /* Contradicts floating=check */
837 goto retry_floating_check;
838 /* Recheck anchored substring, but not floating... */
9041c2e3 839 s = check_at;
5e39e1e5
HS
840 if (!check)
841 goto giveup;
6eb5f6b9 842 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 843 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
844 (long)(other_last - i_strpos)) );
845 goto do_other_anchored;
846 }
60e71179
GS
847 /* Another way we could have checked stclass at the
848 current position only: */
849 if (ml_anch) {
850 s = t = t + 1;
5e39e1e5
HS
851 if (!check)
852 goto giveup;
60e71179 853 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
854 "Looking for /%s^%s/m starting at offset %ld...\n",
855 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 856 goto try_at_offset;
66e933ab 857 }
60e71179
GS
858 if (!prog->float_substr) /* Could have been deleted */
859 goto fail;
6eb5f6b9
JH
860 /* Check is floating subtring. */
861 retry_floating_check:
862 t = check_at - start_shift;
863 DEBUG_r( what = "floating" );
864 goto hop_and_restart;
865 }
b7953727
JH
866 if (t != s) {
867 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 868 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
869 (long)(t - i_strpos), (long)(s - i_strpos))
870 );
871 }
872 else {
873 DEBUG_r(PerlIO_printf(Perl_debug_log,
874 "Does not contradict STCLASS...\n");
875 );
876 }
6eb5f6b9 877 }
5e39e1e5
HS
878 giveup:
879 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
880 PL_colors[4], (check ? "Guessed" : "Giving up"),
881 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 882 return s;
2c2d71f5
JH
883
884 fail_finish: /* Substring not found */
66e933ab
GS
885 if (prog->check_substr) /* could be removed already */
886 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 887 fail:
2c2d71f5 888 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
889 PL_colors[4],PL_colors[5]));
890 return Nullch;
891}
9661b544 892
6eb5f6b9 893/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
894STATIC char *
895S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 896{
6eb5f6b9
JH
897 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
898 char *m;
d8093b23 899 STRLEN ln;
900 unsigned int c1;
901 unsigned int c2;
6eb5f6b9
JH
902 char *e;
903 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 904 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 905
6eb5f6b9
JH
906 /* We know what class it must start with. */
907 switch (OP(c)) {
6eb5f6b9 908 case ANYOF:
a0ed51b3 909 while (s < strend) {
ffc61ed2 910 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
911 if (tmp && (norun || regtry(prog, s)))
912 goto got_it;
913 else
914 tmp = doevery;
a0ed51b3 915 }
6eb5f6b9
JH
916 else
917 tmp = 1;
ffc61ed2 918 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 919 }
6eb5f6b9 920 break;
f33976b4
DB
921 case CANY:
922 while (s < strend) {
923 if (tmp && (norun || regtry(prog, s)))
924 goto got_it;
925 else
926 tmp = doevery;
927 s++;
928 }
929 break;
6eb5f6b9
JH
930 case EXACTF:
931 m = STRING(c);
932 ln = STR_LEN(c);
1aa99e6b 933 if (UTF) {
a2a2844f 934 STRLEN ulen1, ulen2;
e7ae6809
JH
935 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
936 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
937
938 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
939 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
940
941 c1 = utf8_to_uvuni(tmpbuf1, 0);
942 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
943 }
944 else {
945 c1 = *(U8*)m;
946 c2 = PL_fold[c1];
947 }
6eb5f6b9
JH
948 goto do_exactf;
949 case EXACTFL:
950 m = STRING(c);
951 ln = STR_LEN(c);
d8093b23 952 c1 = *(U8*)m;
6eb5f6b9
JH
953 c2 = PL_fold_locale[c1];
954 do_exactf:
955 e = strend - ln;
b3c9acc1 956
6eb5f6b9
JH
957 if (norun && e < s)
958 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
959
960 if (do_utf8) {
961 STRLEN len;
cadb39a9
JH
962 /* The ibcmp_utf8() uses to_uni_fold() which is more
963 * correct folding for Unicode than using lowercase.
964 * However, it doesn't work quite fully since the folding
965 * is a one-to-many mapping and the regex optimizer is
966 * unaware of this, so it may throw out good matches.
967 * Fortunately, not getting this right is allowed
968 * for Unicode Regular Expression Support level 1,
969 * only one-to-one matching is required. --jhi */
1aa99e6b
IH
970 if (c1 == c2)
971 while (s <= e) {
9041c2e3 972 if ( utf8_to_uvchr((U8*)s, &len) == c1
701a277b 973 && (ln == 1 ||
332ddc25 974 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
975 m, UTF, ln))
976 && (norun || regtry(prog, s)) )
1aa99e6b
IH
977 goto got_it;
978 s += len;
979 }
980 else
981 while (s <= e) {
9041c2e3 982 UV c = utf8_to_uvchr((U8*)s, &len);
701a277b
JH
983 if ( (c == c1 || c == c2)
984 && (ln == 1 ||
332ddc25 985 ibcmp_utf8(s, do_utf8, strend - s,
55da9344
JH
986 m, UTF, ln))
987 && (norun || regtry(prog, s)) )
1aa99e6b
IH
988 goto got_it;
989 s += len;
990 }
991 }
992 else {
993 if (c1 == c2)
994 while (s <= e) {
995 if ( *(U8*)s == c1
996 && (ln == 1 || !(OP(c) == EXACTF
997 ? ibcmp(s, m, ln)
998 : ibcmp_locale(s, m, ln)))
999 && (norun || regtry(prog, s)) )
1000 goto got_it;
1001 s++;
1002 }
1003 else
1004 while (s <= e) {
1005 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1006 && (ln == 1 || !(OP(c) == EXACTF
1007 ? ibcmp(s, m, ln)
1008 : ibcmp_locale(s, m, ln)))
1009 && (norun || regtry(prog, s)) )
1010 goto got_it;
1011 s++;
1012 }
b3c9acc1
IZ
1013 }
1014 break;
bbce6d69 1015 case BOUNDL:
3280af22 1016 PL_reg_flags |= RF_tainted;
bbce6d69 1017 /* FALL THROUGH */
a0d0e21e 1018 case BOUND:
ffc61ed2 1019 if (do_utf8) {
12d33761 1020 if (s == PL_bostr)
ffc61ed2
JH
1021 tmp = '\n';
1022 else {
1aa99e6b 1023 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1024
0064a8a9
JH
1025 if (s > (char*)r)
1026 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1027 }
1028 tmp = ((OP(c) == BOUND ?
9041c2e3 1029 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1030 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1031 while (s < strend) {
1032 if (tmp == !(OP(c) == BOUND ?
3568d838 1033 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1034 isALNUM_LC_utf8((U8*)s)))
1035 {
1036 tmp = !tmp;
1037 if ((norun || regtry(prog, s)))
1038 goto got_it;
1039 }
1040 s += UTF8SKIP(s);
a687059c 1041 }
a0d0e21e 1042 }
667bb95a 1043 else {
12d33761 1044 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1045 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1046 while (s < strend) {
1047 if (tmp ==
1048 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1049 tmp = !tmp;
1050 if ((norun || regtry(prog, s)))
1051 goto got_it;
1052 }
1053 s++;
a0ed51b3 1054 }
a0ed51b3 1055 }
6eb5f6b9 1056 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1057 goto got_it;
1058 break;
bbce6d69 1059 case NBOUNDL:
3280af22 1060 PL_reg_flags |= RF_tainted;
bbce6d69 1061 /* FALL THROUGH */
a0d0e21e 1062 case NBOUND:
ffc61ed2 1063 if (do_utf8) {
12d33761 1064 if (s == PL_bostr)
ffc61ed2
JH
1065 tmp = '\n';
1066 else {
1aa99e6b 1067 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1068
0064a8a9
JH
1069 if (s > (char*)r)
1070 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1071 }
1072 tmp = ((OP(c) == NBOUND ?
9041c2e3 1073 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1074 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1075 while (s < strend) {
1076 if (tmp == !(OP(c) == NBOUND ?
3568d838 1077 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1078 isALNUM_LC_utf8((U8*)s)))
1079 tmp = !tmp;
1080 else if ((norun || regtry(prog, s)))
1081 goto got_it;
1082 s += UTF8SKIP(s);
1083 }
a0d0e21e 1084 }
667bb95a 1085 else {
12d33761 1086 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1087 tmp = ((OP(c) == NBOUND ?
1088 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1089 while (s < strend) {
1090 if (tmp ==
1091 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1092 tmp = !tmp;
1093 else if ((norun || regtry(prog, s)))
1094 goto got_it;
1095 s++;
1096 }
a0ed51b3 1097 }
6eb5f6b9 1098 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1099 goto got_it;
1100 break;
a0d0e21e 1101 case ALNUM:
ffc61ed2 1102 if (do_utf8) {
8269fa76 1103 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1104 while (s < strend) {
3568d838 1105 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1106 if (tmp && (norun || regtry(prog, s)))
1107 goto got_it;
1108 else
1109 tmp = doevery;
1110 }
bbce6d69 1111 else
ffc61ed2
JH
1112 tmp = 1;
1113 s += UTF8SKIP(s);
bbce6d69 1114 }
bbce6d69 1115 }
ffc61ed2
JH
1116 else {
1117 while (s < strend) {
1118 if (isALNUM(*s)) {
1119 if (tmp && (norun || regtry(prog, s)))
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
a0ed51b3 1124 else
ffc61ed2
JH
1125 tmp = 1;
1126 s++;
a0ed51b3 1127 }
a0ed51b3
LW
1128 }
1129 break;
bbce6d69 1130 case ALNUML:
3280af22 1131 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1132 if (do_utf8) {
1133 while (s < strend) {
1134 if (isALNUM_LC_utf8((U8*)s)) {
1135 if (tmp && (norun || regtry(prog, s)))
1136 goto got_it;
1137 else
1138 tmp = doevery;
1139 }
a687059c 1140 else
ffc61ed2
JH
1141 tmp = 1;
1142 s += UTF8SKIP(s);
a0d0e21e 1143 }
a0d0e21e 1144 }
ffc61ed2
JH
1145 else {
1146 while (s < strend) {
1147 if (isALNUM_LC(*s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
a0ed51b3 1153 else
ffc61ed2
JH
1154 tmp = 1;
1155 s++;
a0ed51b3 1156 }
a0ed51b3
LW
1157 }
1158 break;
a0d0e21e 1159 case NALNUM:
ffc61ed2 1160 if (do_utf8) {
8269fa76 1161 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1162 while (s < strend) {
3568d838 1163 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1164 if (tmp && (norun || regtry(prog, s)))
1165 goto got_it;
1166 else
1167 tmp = doevery;
1168 }
bbce6d69 1169 else
ffc61ed2
JH
1170 tmp = 1;
1171 s += UTF8SKIP(s);
bbce6d69 1172 }
bbce6d69 1173 }
ffc61ed2
JH
1174 else {
1175 while (s < strend) {
1176 if (!isALNUM(*s)) {
1177 if (tmp && (norun || regtry(prog, s)))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
a0ed51b3 1182 else
ffc61ed2
JH
1183 tmp = 1;
1184 s++;
a0ed51b3 1185 }
a0ed51b3
LW
1186 }
1187 break;
bbce6d69 1188 case NALNUML:
3280af22 1189 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1190 if (do_utf8) {
1191 while (s < strend) {
1192 if (!isALNUM_LC_utf8((U8*)s)) {
1193 if (tmp && (norun || regtry(prog, s)))
1194 goto got_it;
1195 else
1196 tmp = doevery;
1197 }
a687059c 1198 else
ffc61ed2
JH
1199 tmp = 1;
1200 s += UTF8SKIP(s);
a687059c 1201 }
a0d0e21e 1202 }
ffc61ed2
JH
1203 else {
1204 while (s < strend) {
1205 if (!isALNUM_LC(*s)) {
1206 if (tmp && (norun || regtry(prog, s)))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
a0ed51b3 1211 else
ffc61ed2
JH
1212 tmp = 1;
1213 s++;
a0ed51b3 1214 }
a0ed51b3
LW
1215 }
1216 break;
a0d0e21e 1217 case SPACE:
ffc61ed2 1218 if (do_utf8) {
8269fa76 1219 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1220 while (s < strend) {
3568d838 1221 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1222 if (tmp && (norun || regtry(prog, s)))
1223 goto got_it;
1224 else
1225 tmp = doevery;
1226 }
a0d0e21e 1227 else
ffc61ed2
JH
1228 tmp = 1;
1229 s += UTF8SKIP(s);
2304df62 1230 }
a0d0e21e 1231 }
ffc61ed2
JH
1232 else {
1233 while (s < strend) {
1234 if (isSPACE(*s)) {
1235 if (tmp && (norun || regtry(prog, s)))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
a0ed51b3 1240 else
ffc61ed2
JH
1241 tmp = 1;
1242 s++;
a0ed51b3 1243 }
a0ed51b3
LW
1244 }
1245 break;
bbce6d69 1246 case SPACEL:
3280af22 1247 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1248 if (do_utf8) {
1249 while (s < strend) {
1250 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1251 if (tmp && (norun || regtry(prog, s)))
1252 goto got_it;
1253 else
1254 tmp = doevery;
1255 }
bbce6d69 1256 else
ffc61ed2
JH
1257 tmp = 1;
1258 s += UTF8SKIP(s);
bbce6d69 1259 }
bbce6d69 1260 }
ffc61ed2
JH
1261 else {
1262 while (s < strend) {
1263 if (isSPACE_LC(*s)) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
a0ed51b3 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s++;
a0ed51b3 1272 }
a0ed51b3
LW
1273 }
1274 break;
a0d0e21e 1275 case NSPACE:
ffc61ed2 1276 if (do_utf8) {
8269fa76 1277 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1278 while (s < strend) {
3568d838 1279 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1280 if (tmp && (norun || regtry(prog, s)))
1281 goto got_it;
1282 else
1283 tmp = doevery;
1284 }
a0d0e21e 1285 else
ffc61ed2
JH
1286 tmp = 1;
1287 s += UTF8SKIP(s);
a687059c 1288 }
a0d0e21e 1289 }
ffc61ed2
JH
1290 else {
1291 while (s < strend) {
1292 if (!isSPACE(*s)) {
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
a0ed51b3 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s++;
a0ed51b3 1301 }
a0ed51b3
LW
1302 }
1303 break;
bbce6d69 1304 case NSPACEL:
3280af22 1305 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1306 if (do_utf8) {
1307 while (s < strend) {
1308 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1309 if (tmp && (norun || regtry(prog, s)))
1310 goto got_it;
1311 else
1312 tmp = doevery;
1313 }
bbce6d69 1314 else
ffc61ed2
JH
1315 tmp = 1;
1316 s += UTF8SKIP(s);
bbce6d69 1317 }
bbce6d69 1318 }
ffc61ed2
JH
1319 else {
1320 while (s < strend) {
1321 if (!isSPACE_LC(*s)) {
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
a0ed51b3 1327 else
ffc61ed2
JH
1328 tmp = 1;
1329 s++;
a0ed51b3 1330 }
a0ed51b3
LW
1331 }
1332 break;
a0d0e21e 1333 case DIGIT:
ffc61ed2 1334 if (do_utf8) {
8269fa76 1335 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1336 while (s < strend) {
3568d838 1337 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1338 if (tmp && (norun || regtry(prog, s)))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
a0d0e21e 1343 else
ffc61ed2
JH
1344 tmp = 1;
1345 s += UTF8SKIP(s);
2b69d0c2 1346 }
a0d0e21e 1347 }
ffc61ed2
JH
1348 else {
1349 while (s < strend) {
1350 if (isDIGIT(*s)) {
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
a0ed51b3 1356 else
ffc61ed2
JH
1357 tmp = 1;
1358 s++;
a0ed51b3 1359 }
a0ed51b3
LW
1360 }
1361 break;
b8c5462f
JH
1362 case DIGITL:
1363 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1364 if (do_utf8) {
1365 while (s < strend) {
1366 if (isDIGIT_LC_utf8((U8*)s)) {
1367 if (tmp && (norun || regtry(prog, s)))
1368 goto got_it;
1369 else
1370 tmp = doevery;
1371 }
b8c5462f 1372 else
ffc61ed2
JH
1373 tmp = 1;
1374 s += UTF8SKIP(s);
b8c5462f 1375 }
b8c5462f 1376 }
ffc61ed2
JH
1377 else {
1378 while (s < strend) {
1379 if (isDIGIT_LC(*s)) {
1380 if (tmp && (norun || regtry(prog, s)))
1381 goto got_it;
1382 else
1383 tmp = doevery;
1384 }
b8c5462f 1385 else
ffc61ed2
JH
1386 tmp = 1;
1387 s++;
b8c5462f 1388 }
b8c5462f
JH
1389 }
1390 break;
a0d0e21e 1391 case NDIGIT:
ffc61ed2 1392 if (do_utf8) {
8269fa76 1393 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1394 while (s < strend) {
3568d838 1395 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1396 if (tmp && (norun || regtry(prog, s)))
1397 goto got_it;
1398 else
1399 tmp = doevery;
1400 }
a0d0e21e 1401 else
ffc61ed2
JH
1402 tmp = 1;
1403 s += UTF8SKIP(s);
a687059c 1404 }
a0d0e21e 1405 }
ffc61ed2
JH
1406 else {
1407 while (s < strend) {
1408 if (!isDIGIT(*s)) {
1409 if (tmp && (norun || regtry(prog, s)))
1410 goto got_it;
1411 else
1412 tmp = doevery;
1413 }
a0ed51b3 1414 else
ffc61ed2
JH
1415 tmp = 1;
1416 s++;
a0ed51b3 1417 }
a0ed51b3
LW
1418 }
1419 break;
b8c5462f
JH
1420 case NDIGITL:
1421 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1422 if (do_utf8) {
1423 while (s < strend) {
1424 if (!isDIGIT_LC_utf8((U8*)s)) {
1425 if (tmp && (norun || regtry(prog, s)))
1426 goto got_it;
1427 else
1428 tmp = doevery;
1429 }
b8c5462f 1430 else
ffc61ed2
JH
1431 tmp = 1;
1432 s += UTF8SKIP(s);
b8c5462f 1433 }
a0ed51b3 1434 }
ffc61ed2
JH
1435 else {
1436 while (s < strend) {
1437 if (!isDIGIT_LC(*s)) {
1438 if (tmp && (norun || regtry(prog, s)))
1439 goto got_it;
1440 else
1441 tmp = doevery;
1442 }
cf93c79d 1443 else
ffc61ed2
JH
1444 tmp = 1;
1445 s++;
b8c5462f 1446 }
b8c5462f
JH
1447 }
1448 break;
b3c9acc1 1449 default:
3c3eec57
GS
1450 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1451 break;
d6a28714 1452 }
6eb5f6b9
JH
1453 return 0;
1454 got_it:
1455 return s;
1456}
1457
1458/*
1459 - regexec_flags - match a regexp against a string
1460 */
1461I32
1462Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1463 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1464/* strend: pointer to null at end of string */
1465/* strbeg: real beginning of string */
1466/* minend: end of match must be >=minend after stringarg. */
1467/* data: May be used for some additional optimizations. */
1468/* nosave: For optimizations. */
1469{
6eb5f6b9
JH
1470 register char *s;
1471 register regnode *c;
1472 register char *startpos = stringarg;
6eb5f6b9
JH
1473 I32 minlen; /* must match at least this many chars */
1474 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1475 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1476 constant substr. */ /* CC */
1477 I32 end_shift = 0; /* Same for the end. */ /* CC */
1478 I32 scream_pos = -1; /* Internal iterator of scream. */
1479 char *scream_olds;
1480 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1481 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1482#ifdef DEBUGGING
ce333219 1483 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
2a782b5b 1484#endif
6eb5f6b9
JH
1485
1486 PL_regcc = 0;
1487
1488 cache_re(prog);
1489#ifdef DEBUGGING
aea4f609 1490 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1491#endif
1492
1493 /* Be paranoid... */
1494 if (prog == NULL || startpos == NULL) {
1495 Perl_croak(aTHX_ "NULL regexp parameter");
1496 return 0;
1497 }
1498
1499 minlen = prog->minlen;
a72c7584
JH
1500 if (strend - startpos < minlen) {
1501 DEBUG_r(PerlIO_printf(Perl_debug_log,
1502 "String too short [regexec_flags]...\n"));
1503 goto phooey;
1aa99e6b 1504 }
6eb5f6b9 1505
6eb5f6b9
JH
1506 /* Check validity of program. */
1507 if (UCHARAT(prog->program) != REG_MAGIC) {
1508 Perl_croak(aTHX_ "corrupted regexp program");
1509 }
1510
1511 PL_reg_flags = 0;
1512 PL_reg_eval_set = 0;
1513 PL_reg_maxiter = 0;
1514
1515 if (prog->reganch & ROPT_UTF8)
1516 PL_reg_flags |= RF_utf8;
1517
1518 /* Mark beginning of line for ^ and lookbehind. */
1519 PL_regbol = startpos;
1520 PL_bostr = strbeg;
1521 PL_reg_sv = sv;
1522
1523 /* Mark end of line for $ (and such) */
1524 PL_regeol = strend;
1525
1526 /* see how far we have to get to not match where we matched before */
1527 PL_regtill = startpos+minend;
1528
1529 /* We start without call_cc context. */
1530 PL_reg_call_cc = 0;
1531
1532 /* If there is a "must appear" string, look for it. */
1533 s = startpos;
1534
1535 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1536 MAGIC *mg;
1537
1538 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1539 PL_reg_ganch = startpos;
1540 else if (sv && SvTYPE(sv) >= SVt_PVMG
1541 && SvMAGIC(sv)
14befaf4
DM
1542 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1543 && mg->mg_len >= 0) {
6eb5f6b9
JH
1544 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1545 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1546 if (s > PL_reg_ganch)
6eb5f6b9
JH
1547 goto phooey;
1548 s = PL_reg_ganch;
1549 }
1550 }
1551 else /* pos() not defined */
1552 PL_reg_ganch = strbeg;
1553 }
1554
699c3c34
JH
1555 if (do_utf8 == (UTF!=0) &&
1556 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1557 re_scream_pos_data d;
1558
1559 d.scream_olds = &scream_olds;
1560 d.scream_pos = &scream_pos;
1561 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1562 if (!s) {
1563 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1564 goto phooey; /* not present */
3fa9c3d7 1565 }
6eb5f6b9
JH
1566 }
1567
2a782b5b 1568 DEBUG_r({
df1ffd02
JH
1569 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1570 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1571 if (!PL_colorset)
1572 reginitcolors();
1573 PerlIO_printf(Perl_debug_log,
1574 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1575 PL_colors[4],PL_colors[5],PL_colors[0],
1576 prog->precomp,
1577 PL_colors[1],
1578 (strlen(prog->precomp) > 60 ? "..." : ""),
1579 PL_colors[0],
1580 (int)(len > 60 ? 60 : len),
1581 s, PL_colors[1],
1582 (len > 60 ? "..." : "")
1583 );
1584 });
6eb5f6b9
JH
1585
1586 /* Simplest case: anchored match need be tried only once. */
1587 /* [unless only anchor is BOL and multiline is set] */
1588 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1589 if (s == startpos && regtry(prog, startpos))
1590 goto got_it;
1591 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1592 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1593 {
1594 char *end;
1595
1596 if (minlen)
1597 dontbother = minlen - 1;
1aa99e6b 1598 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1599 /* for multiline we only have to try after newlines */
1600 if (prog->check_substr) {
1601 if (s == startpos)
1602 goto after_try;
1603 while (1) {
1604 if (regtry(prog, s))
1605 goto got_it;
1606 after_try:
1607 if (s >= end)
1608 goto phooey;
1609 if (prog->reganch & RE_USE_INTUIT) {
1610 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1611 if (!s)
1612 goto phooey;
1613 }
1614 else
1615 s++;
1616 }
1617 } else {
1618 if (s > startpos)
1619 s--;
1620 while (s < end) {
1621 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1622 if (regtry(prog, s))
1623 goto got_it;
1624 }
1625 }
1626 }
1627 }
1628 goto phooey;
1629 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1630 if (regtry(prog, PL_reg_ganch))
1631 goto got_it;
1632 goto phooey;
1633 }
1634
1635 /* Messy cases: unanchored match. */
9041c2e3 1636 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1637 /* we have /x+whatever/ */
1638 /* it must be a one character string (XXXX Except UTF?) */
1639 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1640#ifdef DEBUGGING
1641 int did_match = 0;
1642#endif
1643
1aa99e6b 1644 if (do_utf8) {
6eb5f6b9
JH
1645 while (s < strend) {
1646 if (*s == ch) {
bf93d4cc 1647 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1648 if (regtry(prog, s)) goto got_it;
1649 s += UTF8SKIP(s);
1650 while (s < strend && *s == ch)
1651 s += UTF8SKIP(s);
1652 }
1653 s += UTF8SKIP(s);
1654 }
1655 }
1656 else {
1657 while (s < strend) {
1658 if (*s == ch) {
bf93d4cc 1659 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1660 if (regtry(prog, s)) goto got_it;
1661 s++;
1662 while (s < strend && *s == ch)
1663 s++;
1664 }
1665 s++;
1666 }
1667 }
b7953727 1668 DEBUG_r(if (!did_match)
bf93d4cc 1669 PerlIO_printf(Perl_debug_log,
b7953727
JH
1670 "Did not find anchored character...\n")
1671 );
6eb5f6b9
JH
1672 }
1673 /*SUPPRESS 560*/
1aa99e6b
IH
1674 else if (do_utf8 == (UTF!=0) &&
1675 (prog->anchored_substr != Nullsv
9041c2e3 1676 || (prog->float_substr != Nullsv
1aa99e6b 1677 && prog->float_max_offset < strend - s))) {
9041c2e3 1678 SV *must = prog->anchored_substr
6eb5f6b9 1679 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1680 I32 back_max =
6eb5f6b9 1681 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1682 I32 back_min =
6eb5f6b9 1683 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1684 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1685 -(I32)(CHR_SVLEN(must)
1aa99e6b 1686 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1687 char *last1; /* Last position checked before */
bf93d4cc
GS
1688#ifdef DEBUGGING
1689 int did_match = 0;
1690#endif
6eb5f6b9
JH
1691
1692 if (s > PL_bostr)
1693 last1 = HOPc(s, -1);
1694 else
1695 last1 = s - 1; /* bogus */
1696
1697 /* XXXX check_substr already used to find `s', can optimize if
1698 check_substr==must. */
1699 scream_pos = -1;
1700 dontbother = end_shift;
1701 strend = HOPc(strend, -dontbother);
1702 while ( (s <= last) &&
9041c2e3 1703 ((flags & REXEC_SCREAM)
1aa99e6b 1704 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1705 end_shift, &scream_pos, 0))
1aa99e6b 1706 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1707 (unsigned char*)strend, must,
6eb5f6b9 1708 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1709 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1710 if (HOPc(s, -back_max) > last1) {
1711 last1 = HOPc(s, -back_min);
1712 s = HOPc(s, -back_max);
1713 }
1714 else {
1715 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1716
1717 last1 = HOPc(s, -back_min);
1718 s = t;
1719 }
1aa99e6b 1720 if (do_utf8) {
6eb5f6b9
JH
1721 while (s <= last1) {
1722 if (regtry(prog, s))
1723 goto got_it;
1724 s += UTF8SKIP(s);
1725 }
1726 }
1727 else {
1728 while (s <= last1) {
1729 if (regtry(prog, s))
1730 goto got_it;
1731 s++;
1732 }
1733 }
1734 }
b7953727
JH
1735 DEBUG_r(if (!did_match)
1736 PerlIO_printf(Perl_debug_log,
1737 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1738 ((must == prog->anchored_substr)
1739 ? "anchored" : "floating"),
1740 PL_colors[0],
1741 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1742 SvPVX(must),
b7953727
JH
1743 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1744 );
6eb5f6b9
JH
1745 goto phooey;
1746 }
155aba94 1747 else if ((c = prog->regstclass)) {
66e933ab
GS
1748 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1749 /* don't bother with what can't match */
6eb5f6b9 1750 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1751 DEBUG_r({
1752 SV *prop = sv_newmortal();
1753 regprop(prop, c);
2a782b5b 1754 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1755 });
6eb5f6b9
JH
1756 if (find_byclass(prog, c, s, strend, startpos, 0))
1757 goto got_it;
bf93d4cc 1758 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1759 }
1760 else {
1761 dontbother = 0;
1762 if (prog->float_substr != Nullsv) { /* Trim the end. */
1763 char *last;
d6a28714
JH
1764
1765 if (flags & REXEC_SCREAM) {
1766 last = screaminstr(sv, prog->float_substr, s - strbeg,
1767 end_shift, &scream_pos, 1); /* last one */
1768 if (!last)
ffc61ed2 1769 last = scream_olds; /* Only one occurrence. */
b8c5462f 1770 }
d6a28714
JH
1771 else {
1772 STRLEN len;
1773 char *little = SvPV(prog->float_substr, len);
1774
1775 if (SvTAIL(prog->float_substr)) {
1776 if (memEQ(strend - len + 1, little, len - 1))
1777 last = strend - len + 1;
1778 else if (!PL_multiline)
9041c2e3 1779 last = memEQ(strend - len, little, len)
d6a28714 1780 ? strend - len : Nullch;
b8c5462f 1781 else
d6a28714
JH
1782 goto find_last;
1783 } else {
1784 find_last:
9041c2e3 1785 if (len)
d6a28714 1786 last = rninstr(s, strend, little, little + len);
b8c5462f 1787 else
d6a28714 1788 last = strend; /* matching `$' */
b8c5462f 1789 }
b8c5462f 1790 }
bf93d4cc
GS
1791 if (last == NULL) {
1792 DEBUG_r(PerlIO_printf(Perl_debug_log,
1793 "%sCan't trim the tail, match fails (should not happen)%s\n",
1794 PL_colors[4],PL_colors[5]));
1795 goto phooey; /* Should not happen! */
1796 }
d6a28714
JH
1797 dontbother = strend - last + prog->float_min_offset;
1798 }
1799 if (minlen && (dontbother < minlen))
1800 dontbother = minlen - 1;
1801 strend -= dontbother; /* this one's always in bytes! */
1802 /* We don't know much -- general case. */
1aa99e6b 1803 if (do_utf8) {
d6a28714
JH
1804 for (;;) {
1805 if (regtry(prog, s))
1806 goto got_it;
1807 if (s >= strend)
1808 break;
b8c5462f 1809 s += UTF8SKIP(s);
d6a28714
JH
1810 };
1811 }
1812 else {
1813 do {
1814 if (regtry(prog, s))
1815 goto got_it;
1816 } while (s++ < strend);
1817 }
1818 }
1819
1820 /* Failure. */
1821 goto phooey;
1822
1823got_it:
1824 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1825
1826 if (PL_reg_eval_set) {
1827 /* Preserve the current value of $^R */
1828 if (oreplsv != GvSV(PL_replgv))
1829 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1830 restored, the value remains
1831 the same. */
acfe0abc 1832 restore_pos(aTHX_ 0);
d6a28714
JH
1833 }
1834
1835 /* make sure $`, $&, $', and $digit will work later */
1836 if ( !(flags & REXEC_NOT_FIRST) ) {
1837 if (RX_MATCH_COPIED(prog)) {
1838 Safefree(prog->subbeg);
1839 RX_MATCH_COPIED_off(prog);
1840 }
1841 if (flags & REXEC_COPY_STR) {
1842 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1843
1844 s = savepvn(strbeg, i);
1845 prog->subbeg = s;
1846 prog->sublen = i;
1847 RX_MATCH_COPIED_on(prog);
1848 }
1849 else {
1850 prog->subbeg = strbeg;
1851 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1852 }
1853 }
9041c2e3 1854
d6a28714
JH
1855 return 1;
1856
1857phooey:
bf93d4cc
GS
1858 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1859 PL_colors[4],PL_colors[5]));
d6a28714 1860 if (PL_reg_eval_set)
acfe0abc 1861 restore_pos(aTHX_ 0);
d6a28714
JH
1862 return 0;
1863}
1864
1865/*
1866 - regtry - try match at specific point
1867 */
1868STATIC I32 /* 0 failure, 1 success */
1869S_regtry(pTHX_ regexp *prog, char *startpos)
1870{
d6a28714
JH
1871 register I32 i;
1872 register I32 *sp;
1873 register I32 *ep;
1874 CHECKPOINT lastcp;
1875
02db2b7b
IZ
1876#ifdef DEBUGGING
1877 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1878#endif
d6a28714
JH
1879 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1880 MAGIC *mg;
1881
1882 PL_reg_eval_set = RS_init;
1883 DEBUG_r(DEBUG_s(
b900a521
JH
1884 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1885 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1886 ));
e8347627 1887 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1888 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1889 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1890 SAVETMPS;
1891 /* Apparently this is not needed, judging by wantarray. */
e8347627 1892 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1893 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1894
1895 if (PL_reg_sv) {
1896 /* Make $_ available to executed code. */
1897 if (PL_reg_sv != DEFSV) {
4d1ff10f 1898 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1899 SAVESPTR(DEFSV);
1900 DEFSV = PL_reg_sv;
b8c5462f 1901 }
d6a28714 1902
9041c2e3 1903 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1904 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1905 /* prepare for quick setting of pos */
14befaf4
DM
1906 sv_magic(PL_reg_sv, (SV*)0,
1907 PERL_MAGIC_regex_global, Nullch, 0);
1908 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1909 mg->mg_len = -1;
b8c5462f 1910 }
d6a28714
JH
1911 PL_reg_magic = mg;
1912 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1913 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1914 }
09687e5a 1915 if (!PL_reg_curpm) {
0f79a09d 1916 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1917#ifdef USE_ITHREADS
1918 {
1919 SV* repointer = newSViv(0);
577e12cc 1920 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1921 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1922 av_push(PL_regex_padav,repointer);
1923 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1924 PL_regex_pad = AvARRAY(PL_regex_padav);
1925 }
1926#endif
1927 }
aaa362c4 1928 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1929 PL_reg_oldcurpm = PL_curpm;
1930 PL_curpm = PL_reg_curpm;
1931 if (RX_MATCH_COPIED(prog)) {
1932 /* Here is a serious problem: we cannot rewrite subbeg,
1933 since it may be needed if this match fails. Thus
1934 $` inside (?{}) could fail... */
1935 PL_reg_oldsaved = prog->subbeg;
1936 PL_reg_oldsavedlen = prog->sublen;
1937 RX_MATCH_COPIED_off(prog);
1938 }
1939 else
1940 PL_reg_oldsaved = Nullch;
1941 prog->subbeg = PL_bostr;
1942 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1943 }
1944 prog->startp[0] = startpos - PL_bostr;
1945 PL_reginput = startpos;
1946 PL_regstartp = prog->startp;
1947 PL_regendp = prog->endp;
1948 PL_reglastparen = &prog->lastparen;
a01268b5 1949 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1950 prog->lastparen = 0;
1951 PL_regsize = 0;
1952 DEBUG_r(PL_reg_starttry = startpos);
1953 if (PL_reg_start_tmpl <= prog->nparens) {
1954 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1955 if(PL_reg_start_tmp)
1956 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1957 else
1958 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1959 }
1960
128e8167
JH
1961#ifdef DEBUGGING
1962 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
1963 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
1964 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
1965#endif
1966
d6a28714
JH
1967 /* XXXX What this code is doing here?!!! There should be no need
1968 to do this again and again, PL_reglastparen should take care of
3dd2943c 1969 this! --ilya*/
dafc8851
JH
1970
1971 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1972 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1973 * PL_reglastparen), is not needed at all by the test suite
1974 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1975 * enough, for building DynaLoader, or otherwise this
1976 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1977 * will happen. Meanwhile, this code *is* needed for the
1978 * above-mentioned test suite tests to succeed. The common theme
1979 * on those tests seems to be returning null fields from matches.
1980 * --jhi */
dafc8851 1981#if 1
d6a28714
JH
1982 sp = prog->startp;
1983 ep = prog->endp;
1984 if (prog->nparens) {
09e8ae3b 1985 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1986 *++sp = -1;
1987 *++ep = -1;
1988 }
1989 }
dafc8851 1990#endif
02db2b7b 1991 REGCP_SET(lastcp);
d6a28714
JH
1992 if (regmatch(prog->program + 1)) {
1993 prog->endp[0] = PL_reginput - PL_bostr;
1994 return 1;
1995 }
02db2b7b 1996 REGCP_UNWIND(lastcp);
d6a28714
JH
1997 return 0;
1998}
1999
02db2b7b
IZ
2000#define RE_UNWIND_BRANCH 1
2001#define RE_UNWIND_BRANCHJ 2
2002
2003union re_unwind_t;
2004
2005typedef struct { /* XX: makes sense to enlarge it... */
2006 I32 type;
2007 I32 prev;
2008 CHECKPOINT lastcp;
2009} re_unwind_generic_t;
2010
2011typedef struct {
2012 I32 type;
2013 I32 prev;
2014 CHECKPOINT lastcp;
2015 I32 lastparen;
2016 regnode *next;
2017 char *locinput;
2018 I32 nextchr;
2019#ifdef DEBUGGING
2020 int regindent;
2021#endif
2022} re_unwind_branch_t;
2023
2024typedef union re_unwind_t {
2025 I32 type;
2026 re_unwind_generic_t generic;
2027 re_unwind_branch_t branch;
2028} re_unwind_t;
2029
8ba1375e
MJD
2030#define sayYES goto yes
2031#define sayNO goto no
2032#define sayYES_FINAL goto yes_final
2033#define sayYES_LOUD goto yes_loud
2034#define sayNO_FINAL goto no_final
2035#define sayNO_SILENT goto do_no
2036#define saySAME(x) if (x) goto yes; else goto no
2037
2038#define REPORT_CODE_OFF 24
2039
d6a28714
JH
2040/*
2041 - regmatch - main matching routine
2042 *
2043 * Conceptually the strategy is simple: check to see whether the current
2044 * node matches, call self recursively to see whether the rest matches,
2045 * and then act accordingly. In practice we make some effort to avoid
2046 * recursion, in particular by going through "ordinary" nodes (that don't
2047 * need to know whether the rest of the match failed) by a loop instead of
2048 * by recursion.
2049 */
2050/* [lwall] I've hoisted the register declarations to the outer block in order to
2051 * maybe save a little bit of pushing and popping on the stack. It also takes
2052 * advantage of machines that use a register save mask on subroutine entry.
2053 */
2054STATIC I32 /* 0 failure, 1 success */
2055S_regmatch(pTHX_ regnode *prog)
2056{
d6a28714
JH
2057 register regnode *scan; /* Current node. */
2058 regnode *next; /* Next node. */
2059 regnode *inner; /* Next node in internal branch. */
2060 register I32 nextchr; /* renamed nextchr - nextchar colides with
2061 function of same name */
2062 register I32 n; /* no or next */
b7953727
JH
2063 register I32 ln = 0; /* len or last */
2064 register char *s = Nullch; /* operand or save */
d6a28714 2065 register char *locinput = PL_reginput;
b7953727 2066 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2067 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2068 I32 unwind = 0;
b7953727 2069#if 0
02db2b7b 2070 I32 firstcp = PL_savestack_ix;
b7953727 2071#endif
53c4c00c 2072 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2073#ifdef DEBUGGING
ce333219
JH
2074 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2075 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2076 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2077#endif
02db2b7b 2078
d6a28714
JH
2079#ifdef DEBUGGING
2080 PL_regindent++;
2081#endif
2082
2083 /* Note that nextchr is a byte even in UTF */
2084 nextchr = UCHARAT(locinput);
2085 scan = prog;
2086 while (scan != NULL) {
8ba1375e 2087
2a782b5b 2088 DEBUG_r( {
d6a28714
JH
2089 SV *prop = sv_newmortal();
2090 int docolor = *PL_colors[0];
2091 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2092 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2093 /* The part of the string before starttry has one color
2094 (pref0_len chars), between starttry and current
2095 position another one (pref_len - pref0_len chars),
2096 after the current position the third one.
2097 We assume that pref0_len <= pref_len, otherwise we
2098 decrease pref0_len. */
9041c2e3 2099 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2100 ? (5 + taill) - l : locinput - PL_bostr;
2101 int pref0_len;
d6a28714 2102
df1ffd02 2103 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2104 pref_len++;
2105 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2106 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2107 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2108 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2109 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2110 l--;
d6a28714
JH
2111 if (pref0_len < 0)
2112 pref0_len = 0;
2113 if (pref0_len > pref_len)
2114 pref0_len = pref_len;
2115 regprop(prop, scan);
2a782b5b
JH
2116 {
2117 char *s0 =
df1ffd02 2118 do_utf8 ?
2a782b5b
JH
2119 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2120 pref0_len, 60, 0) :
2121 locinput - pref_len;
df1ffd02
JH
2122 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2123 char *s1 = do_utf8 ?
2a782b5b
JH
2124 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2125 pref_len - pref0_len, 60, 0) :
2126 locinput - pref_len + pref0_len;
df1ffd02
JH
2127 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2128 char *s2 = do_utf8 ?
2a782b5b
JH
2129 pv_uni_display(dsv2, (U8*)locinput,
2130 PL_regeol - locinput, 60, 0) :
2131 locinput;
df1ffd02 2132 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2133 PerlIO_printf(Perl_debug_log,
2134 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2135 (IV)(locinput - PL_bostr),
2136 PL_colors[4],
2137 len0, s0,
2138 PL_colors[5],
2139 PL_colors[2],
2140 len1, s1,
2141 PL_colors[3],
2142 (docolor ? "" : "> <"),
2143 PL_colors[0],
2144 len2, s2,
2145 PL_colors[1],
2146 15 - l - pref_len + 1,
2147 "",
2148 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2149 SvPVX(prop));
2150 }
2151 });
d6a28714
JH
2152
2153 next = scan + NEXT_OFF(scan);
2154 if (next == scan)
2155 next = NULL;
2156
2157 switch (OP(scan)) {
2158 case BOL:
12d33761
HS
2159 if (locinput == PL_bostr || (PL_multiline &&
2160 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2161 {
2162 /* regtill = regbol; */
b8c5462f
JH
2163 break;
2164 }
d6a28714
JH
2165 sayNO;
2166 case MBOL:
12d33761
HS
2167 if (locinput == PL_bostr ||
2168 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2169 {
b8c5462f
JH
2170 break;
2171 }
d6a28714
JH
2172 sayNO;
2173 case SBOL:
c2a73568 2174 if (locinput == PL_bostr)
b8c5462f 2175 break;
d6a28714
JH
2176 sayNO;
2177 case GPOS:
2178 if (locinput == PL_reg_ganch)
2179 break;
2180 sayNO;
2181 case EOL:
2182 if (PL_multiline)
2183 goto meol;
2184 else
2185 goto seol;
2186 case MEOL:
2187 meol:
2188 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2189 sayNO;
b8c5462f 2190 break;
d6a28714
JH
2191 case SEOL:
2192 seol:
2193 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2194 sayNO;
d6a28714 2195 if (PL_regeol - locinput > 1)
b8c5462f 2196 sayNO;
b8c5462f 2197 break;
d6a28714
JH
2198 case EOS:
2199 if (PL_regeol != locinput)
b8c5462f 2200 sayNO;
d6a28714 2201 break;
ffc61ed2 2202 case SANY:
d6a28714 2203 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2204 sayNO;
f33976b4
DB
2205 if (do_utf8) {
2206 locinput += PL_utf8skip[nextchr];
2207 if (locinput > PL_regeol)
2208 sayNO;
2209 nextchr = UCHARAT(locinput);
2210 }
2211 else
2212 nextchr = UCHARAT(++locinput);
2213 break;
2214 case CANY:
2215 if (!nextchr && locinput >= PL_regeol)
2216 sayNO;
b8c5462f 2217 nextchr = UCHARAT(++locinput);
a0d0e21e 2218 break;
ffc61ed2 2219 case REG_ANY:
1aa99e6b
IH
2220 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2221 sayNO;
2222 if (do_utf8) {
b8c5462f 2223 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2224 if (locinput > PL_regeol)
2225 sayNO;
a0ed51b3 2226 nextchr = UCHARAT(locinput);
a0ed51b3 2227 }
1aa99e6b
IH
2228 else
2229 nextchr = UCHARAT(++locinput);
a0ed51b3 2230 break;
d6a28714 2231 case EXACT:
cd439c50
IZ
2232 s = STRING(scan);
2233 ln = STR_LEN(scan);
1aa99e6b 2234 if (do_utf8 != (UTF!=0)) {
5ff6fc6d 2235 /* The target and the pattern have differing "utf8ness". */
1aa99e6b
IH
2236 char *l = locinput;
2237 char *e = s + ln;
2238 STRLEN len;
a72c7584 2239
5ff6fc6d
JH
2240 if (do_utf8) {
2241 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2242 while (s < e) {
2243 if (l >= PL_regeol)
5ff6fc6d
JH
2244 sayNO;
2245 if (NATIVE_TO_UNI(*(U8*)s) !=
2246 utf8_to_uvchr((U8*)l, &len))
2247 sayNO;
2248 l += len;
2249 s ++;
1aa99e6b 2250 }
5ff6fc6d
JH
2251 }
2252 else {
2253 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2254 while (s < e) {
2255 if (l >= PL_regeol)
2256 sayNO;
5ff6fc6d
JH
2257 if (NATIVE_TO_UNI(*((U8*)l)) !=
2258 utf8_to_uvchr((U8*)s, &len))
1aa99e6b 2259 sayNO;
a72c7584
JH
2260 s += len;
2261 l ++;
1aa99e6b 2262 }
5ff6fc6d 2263 }
1aa99e6b
IH
2264 locinput = l;
2265 nextchr = UCHARAT(locinput);
2266 break;
2267 }
5ff6fc6d 2268 /* The target and the pattern have the same "utf8ness". */
d6a28714
JH
2269 /* Inline the first character, for speed. */
2270 if (UCHARAT(s) != nextchr)
2271 sayNO;
2272 if (PL_regeol - locinput < ln)
2273 sayNO;
2274 if (ln > 1 && memNE(s, locinput, ln))
2275 sayNO;
2276 locinput += ln;
2277 nextchr = UCHARAT(locinput);
2278 break;
2279 case EXACTFL:
b8c5462f
JH
2280 PL_reg_flags |= RF_tainted;
2281 /* FALL THROUGH */
d6a28714 2282 case EXACTF:
cd439c50
IZ
2283 s = STRING(scan);
2284 ln = STR_LEN(scan);
d6a28714 2285
1aa99e6b 2286 if (do_utf8) {
d6a28714 2287 char *l = locinput;
1aa99e6b 2288 char *e;
a2a2844f 2289 STRLEN ulen;
e7ae6809 2290 U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
1aa99e6b 2291 e = s + ln;
d6a28714 2292 while (s < e) {
a2a2844f 2293 if (l >= PL_regeol)
d6a28714 2294 sayNO;
a2a2844f 2295 toLOWER_utf8((U8*)l, tmpbuf, &ulen);
7114a2d2 2296 if (memNE(s, (char*)tmpbuf, ulen))
a2a2844f
JH
2297 sayNO;
2298 s += UTF8SKIP(s);
2299 l += ulen;
b8c5462f 2300 }
d6a28714 2301 locinput = l;
a0ed51b3
LW
2302 nextchr = UCHARAT(locinput);
2303 break;
2304 }
d6a28714
JH
2305
2306 /* Inline the first character, for speed. */
2307 if (UCHARAT(s) != nextchr &&
2308 UCHARAT(s) != ((OP(scan) == EXACTF)
2309 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2310 sayNO;
d6a28714 2311 if (PL_regeol - locinput < ln)
b8c5462f 2312 sayNO;
d6a28714
JH
2313 if (ln > 1 && (OP(scan) == EXACTF
2314 ? ibcmp(s, locinput, ln)
2315 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2316 sayNO;
d6a28714
JH
2317 locinput += ln;
2318 nextchr = UCHARAT(locinput);
a0d0e21e 2319 break;
d6a28714 2320 case ANYOF:
ffc61ed2
JH
2321 if (do_utf8) {
2322 if (!reginclass(scan, (U8*)locinput, do_utf8))
2323 sayNO;
2324 if (locinput >= PL_regeol)
2325 sayNO;
2326 locinput += PL_utf8skip[nextchr];
b8c5462f 2327 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2328 }
2329 else {
2330 if (nextchr < 0)
2331 nextchr = UCHARAT(locinput);
2332 if (!reginclass(scan, (U8*)locinput, do_utf8))
2333 sayNO;
2334 if (!nextchr && locinput >= PL_regeol)
2335 sayNO;
2336 nextchr = UCHARAT(++locinput);
2337 }
b8c5462f 2338 break;
d6a28714 2339 case ALNUML:
b8c5462f
JH
2340 PL_reg_flags |= RF_tainted;
2341 /* FALL THROUGH */
d6a28714 2342 case ALNUM:
b8c5462f 2343 if (!nextchr)
4633a7c4 2344 sayNO;
ffc61ed2 2345 if (do_utf8) {
ad24be35 2346 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2347 if (!(OP(scan) == ALNUM
3568d838 2348 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2349 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2350 {
2351 sayNO;
a0ed51b3 2352 }
b8c5462f 2353 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2354 nextchr = UCHARAT(locinput);
2355 break;
2356 }
ffc61ed2 2357 if (!(OP(scan) == ALNUM
d6a28714 2358 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2359 sayNO;
b8c5462f 2360 nextchr = UCHARAT(++locinput);
a0d0e21e 2361 break;
d6a28714 2362 case NALNUML:
b8c5462f
JH
2363 PL_reg_flags |= RF_tainted;
2364 /* FALL THROUGH */
d6a28714
JH
2365 case NALNUM:
2366 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2367 sayNO;
ffc61ed2 2368 if (do_utf8) {
8269fa76 2369 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2370 if (OP(scan) == NALNUM
3568d838 2371 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2372 : isALNUM_LC_utf8((U8*)locinput))
2373 {
b8c5462f 2374 sayNO;
d6a28714 2375 }
b8c5462f
JH
2376 locinput += PL_utf8skip[nextchr];
2377 nextchr = UCHARAT(locinput);
2378 break;
2379 }
ffc61ed2 2380 if (OP(scan) == NALNUM
d6a28714 2381 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2382 sayNO;
76e3520e 2383 nextchr = UCHARAT(++locinput);
a0d0e21e 2384 break;
d6a28714
JH
2385 case BOUNDL:
2386 case NBOUNDL:
3280af22 2387 PL_reg_flags |= RF_tainted;
bbce6d69 2388 /* FALL THROUGH */
d6a28714
JH
2389 case BOUND:
2390 case NBOUND:
2391 /* was last char in word? */
ffc61ed2 2392 if (do_utf8) {
12d33761
HS
2393 if (locinput == PL_bostr)
2394 ln = '\n';
ffc61ed2
JH
2395 else {
2396 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2397
2b9d42f0 2398 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2399 }
2400 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2401 ln = isALNUM_uni(ln);
8269fa76 2402 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2403 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2404 }
2405 else {
9041c2e3 2406 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2407 n = isALNUM_LC_utf8((U8*)locinput);
2408 }
a0ed51b3 2409 }
d6a28714 2410 else {
12d33761
HS
2411 ln = (locinput != PL_bostr) ?
2412 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2413 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2414 ln = isALNUM(ln);
2415 n = isALNUM(nextchr);
2416 }
2417 else {
2418 ln = isALNUM_LC(ln);
2419 n = isALNUM_LC(nextchr);
2420 }
d6a28714 2421 }
ffc61ed2
JH
2422 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2423 OP(scan) == BOUNDL))
2424 sayNO;
a0ed51b3 2425 break;
d6a28714 2426 case SPACEL:
3280af22 2427 PL_reg_flags |= RF_tainted;
bbce6d69 2428 /* FALL THROUGH */
d6a28714 2429 case SPACE:
9442cb0e 2430 if (!nextchr)
4633a7c4 2431 sayNO;
1aa99e6b 2432 if (do_utf8) {
fd400ab9 2433 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2434 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2435 if (!(OP(scan) == SPACE
3568d838 2436 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2437 : isSPACE_LC_utf8((U8*)locinput)))
2438 {
2439 sayNO;
2440 }
2441 locinput += PL_utf8skip[nextchr];
2442 nextchr = UCHARAT(locinput);
2443 break;
d6a28714 2444 }
ffc61ed2
JH
2445 if (!(OP(scan) == SPACE
2446 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2447 sayNO;
2448 nextchr = UCHARAT(++locinput);
2449 }
2450 else {
2451 if (!(OP(scan) == SPACE
2452 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2453 sayNO;
2454 nextchr = UCHARAT(++locinput);
a0ed51b3 2455 }
a0ed51b3 2456 break;
d6a28714 2457 case NSPACEL:
3280af22 2458 PL_reg_flags |= RF_tainted;
bbce6d69 2459 /* FALL THROUGH */
d6a28714 2460 case NSPACE:
9442cb0e 2461 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2462 sayNO;
1aa99e6b 2463 if (do_utf8) {
8269fa76 2464 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2465 if (OP(scan) == NSPACE
3568d838 2466 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2467 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2468 {
2469 sayNO;
2470 }
2471 locinput += PL_utf8skip[nextchr];
2472 nextchr = UCHARAT(locinput);
2473 break;
a0ed51b3 2474 }
ffc61ed2 2475 if (OP(scan) == NSPACE
d6a28714 2476 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2477 sayNO;
76e3520e 2478 nextchr = UCHARAT(++locinput);
a0d0e21e 2479 break;
d6a28714 2480 case DIGITL:
a0ed51b3
LW
2481 PL_reg_flags |= RF_tainted;
2482 /* FALL THROUGH */
d6a28714 2483 case DIGIT:
9442cb0e 2484 if (!nextchr)
a0ed51b3 2485 sayNO;
1aa99e6b 2486 if (do_utf8) {
8269fa76 2487 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2488 if (!(OP(scan) == DIGIT
3568d838 2489 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2490 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2491 {
a0ed51b3 2492 sayNO;
dfe13c55 2493 }
6f06b55f 2494 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2495 nextchr = UCHARAT(locinput);
2496 break;
2497 }
ffc61ed2 2498 if (!(OP(scan) == DIGIT
9442cb0e 2499 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2500 sayNO;
2501 nextchr = UCHARAT(++locinput);
2502 break;
d6a28714 2503 case NDIGITL:
b8c5462f
JH
2504 PL_reg_flags |= RF_tainted;
2505 /* FALL THROUGH */
d6a28714 2506 case NDIGIT:
9442cb0e 2507 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2508 sayNO;
1aa99e6b 2509 if (do_utf8) {
8269fa76 2510 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2511 if (OP(scan) == NDIGIT
3568d838 2512 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2513 : isDIGIT_LC_utf8((U8*)locinput))
2514 {
a0ed51b3 2515 sayNO;
9442cb0e 2516 }
6f06b55f 2517 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2518 nextchr = UCHARAT(locinput);
2519 break;
2520 }
ffc61ed2 2521 if (OP(scan) == NDIGIT
9442cb0e 2522 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2523 sayNO;
2524 nextchr = UCHARAT(++locinput);
2525 break;
2526 case CLUMP:
8269fa76 2527 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2528 if (locinput >= PL_regeol ||
2529 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2530 sayNO;
6f06b55f 2531 locinput += PL_utf8skip[nextchr];
3568d838
JH
2532 while (locinput < PL_regeol &&
2533 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2534 locinput += UTF8SKIP(locinput);
2535 if (locinput > PL_regeol)
2536 sayNO;
2537 nextchr = UCHARAT(locinput);
2538 break;
c8756f30 2539 case REFFL:
3280af22 2540 PL_reg_flags |= RF_tainted;
c8756f30 2541 /* FALL THROUGH */
c277df42 2542 case REF:
c8756f30 2543 case REFF:
c277df42 2544 n = ARG(scan); /* which paren pair */
cf93c79d 2545 ln = PL_regstartp[n];
2c2d71f5 2546 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2547 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2548 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2549 if (ln == PL_regendp[n])
a0d0e21e 2550 break;
a0ed51b3 2551
cf93c79d 2552 s = PL_bostr + ln;
1aa99e6b 2553 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2554 char *l = locinput;
cf93c79d 2555 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2556 /*
2557 * Note that we can't do the "other character" lookup trick as
2558 * in the 8-bit case (no pun intended) because in Unicode we
2559 * have to map both upper and title case to lower case.
2560 */
2561 if (OP(scan) == REFF) {
a2a2844f 2562 STRLEN ulen1, ulen2;
e7ae6809
JH
2563 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2564 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2565 while (s < e) {
2566 if (l >= PL_regeol)
2567 sayNO;
a2a2844f
JH
2568 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2569 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2570 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2571 sayNO;
a2a2844f
JH
2572 s += ulen1;
2573 l += ulen2;
a0ed51b3
LW
2574 }
2575 }
2576 locinput = l;
2577 nextchr = UCHARAT(locinput);
2578 break;
2579 }
2580
a0d0e21e 2581 /* Inline the first character, for speed. */
76e3520e 2582 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2583 (OP(scan) == REF ||
2584 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2585 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2586 sayNO;
cf93c79d 2587 ln = PL_regendp[n] - ln;
3280af22 2588 if (locinput + ln > PL_regeol)
4633a7c4 2589 sayNO;
c8756f30
AK
2590 if (ln > 1 && (OP(scan) == REF
2591 ? memNE(s, locinput, ln)
2592 : (OP(scan) == REFF
2593 ? ibcmp(s, locinput, ln)
2594 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2595 sayNO;
a0d0e21e 2596 locinput += ln;
76e3520e 2597 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2598 break;
2599
2600 case NOTHING:
c277df42 2601 case TAIL:
a0d0e21e
LW
2602 break;
2603 case BACK:
2604 break;
c277df42
IZ
2605 case EVAL:
2606 {
2607 dSP;
533c011a 2608 OP_4tree *oop = PL_op;
3280af22
NIS
2609 COP *ocurcop = PL_curcop;
2610 SV **ocurpad = PL_curpad;
c277df42 2611 SV *ret;
9041c2e3 2612
c277df42 2613 n = ARG(scan);
533c011a 2614 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2615 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2616 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2617 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2618
8e5e9ebe
RGS
2619 {
2620 SV **before = SP;
2621 CALLRUNOPS(aTHX); /* Scalar context. */
2622 SPAGAIN;
2623 if (SP == before)
2624 ret = Nullsv; /* protect against empty (?{}) blocks. */
2625 else {
2626 ret = POPs;
2627 PUTBACK;
2628 }
2629 }
2630
0f5d15d6
IZ
2631 PL_op = oop;
2632 PL_curpad = ocurpad;
2633 PL_curcop = ocurcop;
c277df42 2634 if (logical) {
0f5d15d6
IZ
2635 if (logical == 2) { /* Postponed subexpression. */
2636 regexp *re;
22c35a8c 2637 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2638 re_cc_state state;
0f5d15d6
IZ
2639 CHECKPOINT cp, lastcp;
2640
2641 if(SvROK(ret) || SvRMAGICAL(ret)) {
2642 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2643
2644 if(SvMAGICAL(sv))
14befaf4 2645 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2646 }
2647 if (mg) {
2648 re = (regexp *)mg->mg_obj;
df0003d4 2649 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2650 }
2651 else {
2652 STRLEN len;
2653 char *t = SvPV(ret, len);
2654 PMOP pm;
2655 char *oprecomp = PL_regprecomp;
2656 I32 osize = PL_regsize;
2657 I32 onpar = PL_regnpar;
2658
5fcd1c1b 2659 Zero(&pm, 1, PMOP);
cea2e8a9 2660 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2661 if (!(SvFLAGS(ret)
0f5d15d6 2662 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2663 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2664 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2665 PL_regprecomp = oprecomp;
2666 PL_regsize = osize;
2667 PL_regnpar = onpar;
2668 }
2669 DEBUG_r(
9041c2e3 2670 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2671 "Entering embedded `%s%.60s%s%s'\n",
2672 PL_colors[0],
2673 re->precomp,
2674 PL_colors[1],
2675 (strlen(re->precomp) > 60 ? "..." : ""))
2676 );
2677 state.node = next;
2678 state.prev = PL_reg_call_cc;
2679 state.cc = PL_regcc;
2680 state.re = PL_reg_re;
2681
2ab05381 2682 PL_regcc = 0;
9041c2e3 2683
0f5d15d6 2684 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2685 REGCP_SET(lastcp);
0f5d15d6
IZ
2686 cache_re(re);
2687 state.ss = PL_savestack_ix;
2688 *PL_reglastparen = 0;
a01268b5 2689 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2690 PL_reg_call_cc = &state;
2691 PL_reginput = locinput;
2c2d71f5
JH
2692
2693 /* XXXX This is too dramatic a measure... */
2694 PL_reg_maxiter = 0;
2695
0f5d15d6 2696 if (regmatch(re->program + 1)) {
2c914db6
IZ
2697 /* Even though we succeeded, we need to restore
2698 global variables, since we may be wrapped inside
2699 SUSPEND, thus the match may be not finished yet. */
2700
2701 /* XXXX Do this only if SUSPENDed? */
2702 PL_reg_call_cc = state.prev;
2703 PL_regcc = state.cc;
2704 PL_reg_re = state.re;
2705 cache_re(PL_reg_re);
2706
2707 /* XXXX This is too dramatic a measure... */
2708 PL_reg_maxiter = 0;
2709
2710 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2711 ReREFCNT_dec(re);
2712 regcpblow(cp);
2713 sayYES;
2714 }
0f5d15d6 2715 ReREFCNT_dec(re);
02db2b7b 2716 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2717 regcppop();
2718 PL_reg_call_cc = state.prev;
2719 PL_regcc = state.cc;
2720 PL_reg_re = state.re;
d3790889 2721 cache_re(PL_reg_re);
2c2d71f5
JH
2722
2723 /* XXXX This is too dramatic a measure... */
2724 PL_reg_maxiter = 0;
2725
8e514ae6 2726 logical = 0;
0f5d15d6
IZ
2727 sayNO;
2728 }
c277df42 2729 sw = SvTRUE(ret);
0f5d15d6 2730 logical = 0;
a0ed51b3
LW
2731 }
2732 else
3280af22 2733 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2734 break;
2735 }
a0d0e21e 2736 case OPEN:
c277df42 2737 n = ARG(scan); /* which paren pair */
3280af22
NIS
2738 PL_reg_start_tmp[n] = locinput;
2739 if (n > PL_regsize)
2740 PL_regsize = n;
a0d0e21e
LW
2741 break;
2742 case CLOSE:
c277df42 2743 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2744 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2745 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2746 if (n > *PL_reglastparen)
2747 *PL_reglastparen = n;
a01268b5 2748 *PL_reglastcloseparen = n;
a0d0e21e 2749 break;
c277df42
IZ
2750 case GROUPP:
2751 n = ARG(scan); /* which paren pair */
cf93c79d 2752 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2753 break;
2754 case IFTHEN:
2c2d71f5 2755 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2756 if (sw)
2757 next = NEXTOPER(NEXTOPER(scan));
2758 else {
2759 next = scan + ARG(scan);
2760 if (OP(next) == IFTHEN) /* Fake one. */
2761 next = NEXTOPER(NEXTOPER(next));
2762 }
2763 break;
2764 case LOGICAL:
0f5d15d6 2765 logical = scan->flags;
c277df42 2766 break;
2ab05381
IZ
2767/*******************************************************************
2768 PL_regcc contains infoblock about the innermost (...)* loop, and
2769 a pointer to the next outer infoblock.
2770
2771 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2772
2773 1) After matching X, regnode for CURLYX is processed;
2774
9041c2e3 2775 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2776 regmatch() recursively with the starting point at WHILEM node;
2777
2778 3) Each hit of WHILEM node tries to match A and Z (in the order
2779 depending on the current iteration, min/max of {min,max} and
2780 greediness). The information about where are nodes for "A"
2781 and "Z" is read from the infoblock, as is info on how many times "A"
2782 was already matched, and greediness.
2783
2784 4) After A matches, the same WHILEM node is hit again.
2785
2786 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2787 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2788 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2789 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2790 of the external loop.
2791
2792 Currently present infoblocks form a tree with a stem formed by PL_curcc
2793 and whatever it mentions via ->next, and additional attached trees
2794 corresponding to temporarily unset infoblocks as in "5" above.
2795
9041c2e3 2796 In the following picture infoblocks for outer loop of
2ab05381
IZ
2797 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2798 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2799 infoblocks are drawn below the "reset" infoblock.
2800
2801 In fact in the picture below we do not show failed matches for Z and T
2802 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2803 more obvious *why* one needs to *temporary* unset infoblocks.]
2804
2805 Matched REx position InfoBlocks Comment
2806 (Y(A)*?Z)*?T x
2807 Y(A)*?Z)*?T x <- O
2808 Y (A)*?Z)*?T x <- O
2809 Y A)*?Z)*?T x <- O <- I
2810 YA )*?Z)*?T x <- O <- I
2811 YA A)*?Z)*?T x <- O <- I
2812 YAA )*?Z)*?T x <- O <- I
2813 YAA Z)*?T x <- O # Temporary unset I
2814 I
2815
2816 YAAZ Y(A)*?Z)*?T x <- O
2817 I
2818
2819 YAAZY (A)*?Z)*?T x <- O
2820 I
2821
2822 YAAZY A)*?Z)*?T x <- O <- I
2823 I
2824
2825 YAAZYA )*?Z)*?T x <- O <- I
2826 I
2827
2828 YAAZYA Z)*?T x <- O # Temporary unset I
2829 I,I
2830
2831 YAAZYAZ )*?T x <- O
2832 I,I
2833
2834 YAAZYAZ T x # Temporary unset O
2835 O
2836 I,I
2837
2838 YAAZYAZT x
2839 O
2840 I,I
2841 *******************************************************************/
a0d0e21e
LW
2842 case CURLYX: {
2843 CURCUR cc;
3280af22 2844 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2845 /* No need to save/restore up to this paren */
2846 I32 parenfloor = scan->flags;
c277df42
IZ
2847
2848 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2849 next += ARG(next);
3280af22
NIS
2850 cc.oldcc = PL_regcc;
2851 PL_regcc = &cc;
cb434fcc
IZ
2852 /* XXXX Probably it is better to teach regpush to support
2853 parenfloor > PL_regsize... */
2854 if (parenfloor > *PL_reglastparen)
2855 parenfloor = *PL_reglastparen; /* Pessimization... */
2856 cc.parenfloor = parenfloor;
a0d0e21e
LW
2857 cc.cur = -1;
2858 cc.min = ARG1(scan);
2859 cc.max = ARG2(scan);
c277df42 2860 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2861 cc.next = next;
2862 cc.minmod = minmod;
2863 cc.lastloc = 0;
3280af22 2864 PL_reginput = locinput;
a0d0e21e
LW
2865 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2866 regcpblow(cp);
3280af22 2867 PL_regcc = cc.oldcc;
4633a7c4 2868 saySAME(n);
a0d0e21e
LW
2869 }
2870 /* NOT REACHED */
2871 case WHILEM: {
2872 /*
2873 * This is really hard to understand, because after we match
2874 * what we're trying to match, we must make sure the rest of
2c2d71f5 2875 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2876 * to go back UP the parse tree by recursing ever deeper. And
2877 * if it fails, we have to reset our parent's current state
2878 * that we can try again after backing off.
2879 */
2880
c277df42 2881 CHECKPOINT cp, lastcp;
3280af22 2882 CURCUR* cc = PL_regcc;
c277df42
IZ
2883 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2884
4633a7c4 2885 n = cc->cur + 1; /* how many we know we matched */
3280af22 2886 PL_reginput = locinput;
a0d0e21e 2887
c277df42 2888 DEBUG_r(
9041c2e3
NIS
2889 PerlIO_printf(Perl_debug_log,
2890 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2891 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2892 (long)n, (long)cc->min,
c277df42
IZ
2893 (long)cc->max, (long)cc)
2894 );
4633a7c4 2895
a0d0e21e
LW
2896 /* If degenerate scan matches "", assume scan done. */
2897
579cf2c3 2898 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2899 PL_regcc = cc->oldcc;
2ab05381
IZ
2900 if (PL_regcc)
2901 ln = PL_regcc->cur;
c277df42 2902 DEBUG_r(
c3464db5
DD
2903 PerlIO_printf(Perl_debug_log,
2904 "%*s empty match detected, try continuation...\n",
3280af22 2905 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2906 );
a0d0e21e 2907 if (regmatch(cc->next))
4633a7c4 2908 sayYES;
2ab05381
IZ
2909 if (PL_regcc)
2910 PL_regcc->cur = ln;
3280af22 2911 PL_regcc = cc;
4633a7c4 2912 sayNO;
a0d0e21e
LW
2913 }
2914
2915 /* First just match a string of min scans. */
2916
2917 if (n < cc->min) {
2918 cc->cur = n;
2919 cc->lastloc = locinput;
4633a7c4
LW
2920 if (regmatch(cc->scan))
2921 sayYES;
2922 cc->cur = n - 1;
c277df42 2923 cc->lastloc = lastloc;
4633a7c4 2924 sayNO;
a0d0e21e
LW
2925 }
2926
2c2d71f5
JH
2927 if (scan->flags) {
2928 /* Check whether we already were at this position.
2929 Postpone detection until we know the match is not
2930 *that* much linear. */
2931 if (!PL_reg_maxiter) {
2932 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2933 PL_reg_leftiter = PL_reg_maxiter;
2934 }
2935 if (PL_reg_leftiter-- == 0) {
2936 I32 size = (PL_reg_maxiter + 7)/8;
2937 if (PL_reg_poscache) {
2938 if (PL_reg_poscache_size < size) {
2939 Renew(PL_reg_poscache, size, char);
2940 PL_reg_poscache_size = size;
2941 }
2942 Zero(PL_reg_poscache, size, char);
2943 }
2944 else {
2945 PL_reg_poscache_size = size;
2946 Newz(29, PL_reg_poscache, size, char);
2947 }
2948 DEBUG_r(
2949 PerlIO_printf(Perl_debug_log,
2950 "%sDetected a super-linear match, switching on caching%s...\n",
2951 PL_colors[4], PL_colors[5])
2952 );
2953 }
2954 if (PL_reg_leftiter < 0) {
2955 I32 o = locinput - PL_bostr, b;
2956
2957 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2958 b = o % 8;
2959 o /= 8;
2960 if (PL_reg_poscache[o] & (1<<b)) {
2961 DEBUG_r(
2962 PerlIO_printf(Perl_debug_log,
2963 "%*s already tried at this position...\n",
2964 REPORT_CODE_OFF+PL_regindent*2, "")
2965 );
7821416a 2966 sayNO_SILENT;
2c2d71f5
JH
2967 }
2968 PL_reg_poscache[o] |= (1<<b);
2969 }
2970 }
2971
a0d0e21e
LW
2972 /* Prefer next over scan for minimal matching. */
2973
2974 if (cc->minmod) {
3280af22 2975 PL_regcc = cc->oldcc;
2ab05381
IZ
2976 if (PL_regcc)
2977 ln = PL_regcc->cur;
5f05dabc 2978 cp = regcppush(cc->parenfloor);
02db2b7b 2979 REGCP_SET(lastcp);
5f05dabc 2980 if (regmatch(cc->next)) {
c277df42 2981 regcpblow(cp);
4633a7c4 2982 sayYES; /* All done. */
5f05dabc 2983 }
02db2b7b 2984 REGCP_UNWIND(lastcp);
5f05dabc 2985 regcppop();
2ab05381
IZ
2986 if (PL_regcc)
2987 PL_regcc->cur = ln;
3280af22 2988 PL_regcc = cc;
a0d0e21e 2989
c277df42 2990 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2991 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2992 && !(PL_reg_flags & RF_warned)) {
2993 PL_reg_flags |= RF_warned;
e476b1b5 2994 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2995 "Complex regular subexpression recursion",
2996 REG_INFTY - 1);
c277df42 2997 }
4633a7c4 2998 sayNO;
c277df42 2999 }
a687059c 3000
c277df42 3001 DEBUG_r(
c3464db5
DD
3002 PerlIO_printf(Perl_debug_log,
3003 "%*s trying longer...\n",
3280af22 3004 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3005 );
a0d0e21e 3006 /* Try scanning more and see if it helps. */
3280af22 3007 PL_reginput = locinput;
a0d0e21e
LW
3008 cc->cur = n;
3009 cc->lastloc = locinput;
5f05dabc 3010 cp = regcppush(cc->parenfloor);
02db2b7b 3011 REGCP_SET(lastcp);
5f05dabc 3012 if (regmatch(cc->scan)) {
c277df42 3013 regcpblow(cp);
4633a7c4 3014 sayYES;
5f05dabc 3015 }
02db2b7b 3016 REGCP_UNWIND(lastcp);
5f05dabc 3017 regcppop();
4633a7c4 3018 cc->cur = n - 1;
c277df42 3019 cc->lastloc = lastloc;
4633a7c4 3020 sayNO;
a0d0e21e
LW
3021 }
3022
3023 /* Prefer scan over next for maximal matching. */
3024
3025 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3026 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3027 cc->cur = n;
3028 cc->lastloc = locinput;
02db2b7b 3029 REGCP_SET(lastcp);
5f05dabc 3030 if (regmatch(cc->scan)) {
c277df42 3031 regcpblow(cp);
4633a7c4 3032 sayYES;
5f05dabc 3033 }
02db2b7b 3034 REGCP_UNWIND(lastcp);
a0d0e21e 3035 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3036 PL_reginput = locinput;
c277df42 3037 DEBUG_r(
c3464db5
DD
3038 PerlIO_printf(Perl_debug_log,
3039 "%*s failed, try continuation...\n",
3280af22 3040 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3041 );
3042 }
9041c2e3 3043 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3044 && !(PL_reg_flags & RF_warned)) {
3280af22 3045 PL_reg_flags |= RF_warned;
e476b1b5 3046 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3047 "Complex regular subexpression recursion",
3048 REG_INFTY - 1);
a0d0e21e
LW
3049 }
3050
3051 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3052 PL_regcc = cc->oldcc;
2ab05381
IZ
3053 if (PL_regcc)
3054 ln = PL_regcc->cur;
a0d0e21e 3055 if (regmatch(cc->next))
4633a7c4 3056 sayYES;
2ab05381
IZ
3057 if (PL_regcc)
3058 PL_regcc->cur = ln;
3280af22 3059 PL_regcc = cc;
4633a7c4 3060 cc->cur = n - 1;
c277df42 3061 cc->lastloc = lastloc;
4633a7c4 3062 sayNO;
a0d0e21e
LW
3063 }
3064 /* NOT REACHED */
9041c2e3 3065 case BRANCHJ:
c277df42
IZ
3066 next = scan + ARG(scan);
3067 if (next == scan)
3068 next = NULL;
3069 inner = NEXTOPER(NEXTOPER(scan));
3070 goto do_branch;
9041c2e3 3071 case BRANCH:
c277df42
IZ
3072 inner = NEXTOPER(scan);
3073 do_branch:
3074 {
c277df42
IZ
3075 c1 = OP(scan);
3076 if (OP(next) != c1) /* No choice. */
3077 next = inner; /* Avoid recursion. */
a0d0e21e 3078 else {
02db2b7b
IZ
3079 I32 lastparen = *PL_reglastparen;
3080 I32 unwind1;
3081 re_unwind_branch_t *uw;
3082
3083 /* Put unwinding data on stack */
3084 unwind1 = SSNEWt(1,re_unwind_branch_t);
3085 uw = SSPTRt(unwind1,re_unwind_branch_t);
3086 uw->prev = unwind;
3087 unwind = unwind1;
3088 uw->type = ((c1 == BRANCH)
3089 ? RE_UNWIND_BRANCH
3090 : RE_UNWIND_BRANCHJ);
3091 uw->lastparen = lastparen;
3092 uw->next = next;
3093 uw->locinput = locinput;
3094 uw->nextchr = nextchr;
3095#ifdef DEBUGGING
3096 uw->regindent = ++PL_regindent;
3097#endif
c277df42 3098
02db2b7b
IZ
3099 REGCP_SET(uw->lastcp);
3100
3101 /* Now go into the first branch */
3102 next = inner;
a687059c 3103 }
a0d0e21e
LW
3104 }
3105 break;
3106 case MINMOD:
3107 minmod = 1;
3108 break;
c277df42
IZ
3109 case CURLYM:
3110 {
00db4c45 3111 I32 l = 0;
c277df42 3112 CHECKPOINT lastcp;
9041c2e3 3113
c277df42
IZ
3114 /* We suppose that the next guy does not need
3115 backtracking: in particular, it is of constant length,
3116 and has no parenths to influence future backrefs. */
3117 ln = ARG1(scan); /* min to match */
3118 n = ARG2(scan); /* max to match */
c277df42
IZ
3119 paren = scan->flags;
3120 if (paren) {
3280af22
NIS
3121 if (paren > PL_regsize)
3122 PL_regsize = paren;
3123 if (paren > *PL_reglastparen)
3124 *PL_reglastparen = paren;
c277df42 3125 }
dc45a647 3126 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3127 if (paren)
3128 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3129 PL_reginput = locinput;
c277df42
IZ
3130 if (minmod) {
3131 minmod = 0;
3132 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3133 sayNO;
f31a99c8
HS
3134 /* if we matched something zero-length we don't need to
3135 backtrack - capturing parens are already defined, so
3136 the caveat in the maximal case doesn't apply
3137
3138 XXXX if ln == 0, we can redo this check first time
3139 through the following loop
3140 */
3141 if (ln && l == 0)
3142 n = ln; /* don't backtrack */
3280af22 3143 locinput = PL_reginput;
cca55fe3 3144 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3145 regnode *text_node = next;
3146
cca55fe3 3147 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3148
cca55fe3 3149 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3150 else {
cca55fe3
JP
3151 if (PL_regkind[(U8)OP(text_node)] == REF) {
3152 I32 n, ln;
3153 n = ARG(text_node); /* which paren pair */
3154 ln = PL_regstartp[n];
3155 /* assume yes if we haven't seen CLOSEn */
3156 if (
3157 *PL_reglastparen < n ||
3158 ln == -1 ||
3159 ln == PL_regendp[n]
3160 ) {
3161 c1 = c2 = -1000;
3162 goto assume_ok_MM;
3163 }
3164 c1 = *(PL_bostr + ln);
3165 }
3166 else { c1 = (U8)*STRING(text_node); }
af5decee 3167 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3168 c2 = PL_fold[c1];
af5decee 3169 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3170 c2 = PL_fold_locale[c1];
3171 else
3172 c2 = c1;
3173 }
a0ed51b3
LW
3174 }
3175 else
c277df42 3176 c1 = c2 = -1000;
cca55fe3 3177 assume_ok_MM:
02db2b7b 3178 REGCP_SET(lastcp);
5f4b28b2 3179 /* This may be improved if l == 0. */
c277df42
IZ
3180 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3181 /* If it could work, try it. */
3182 if (c1 == -1000 ||
3280af22
NIS
3183 UCHARAT(PL_reginput) == c1 ||
3184 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3185 {
3186 if (paren) {
f31a99c8 3187 if (ln) {
cf93c79d
IZ
3188 PL_regstartp[paren] =
3189 HOPc(PL_reginput, -l) - PL_bostr;
3190 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3191 }
3192 else
cf93c79d 3193 PL_regendp[paren] = -1;
c277df42
IZ
3194 }
3195 if (regmatch(next))
3196 sayYES;
02db2b7b 3197 REGCP_UNWIND(lastcp);
c277df42
IZ
3198 }
3199 /* Couldn't or didn't -- move forward. */
3280af22 3200 PL_reginput = locinput;
c277df42
IZ
3201 if (regrepeat_hard(scan, 1, &l)) {
3202 ln++;
3280af22 3203 locinput = PL_reginput;
c277df42
IZ
3204 }
3205 else
3206 sayNO;
3207 }
a0ed51b3
LW
3208 }
3209 else {
c277df42 3210 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3211 /* if we matched something zero-length we don't need to
3212 backtrack, unless the minimum count is zero and we
3213 are capturing the result - in that case the capture
3214 being defined or not may affect later execution
3215 */
3216 if (n != 0 && l == 0 && !(paren && ln == 0))
3217 ln = n; /* don't backtrack */
3280af22 3218 locinput = PL_reginput;
c277df42 3219 DEBUG_r(
5c0ca799 3220 PerlIO_printf(Perl_debug_log,
faccc32b 3221 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3222 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3223 (IV) n, (IV)l)
c277df42
IZ
3224 );
3225 if (n >= ln) {
cca55fe3 3226 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3227 regnode *text_node = next;
3228
cca55fe3 3229 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3230
cca55fe3 3231 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3232 else {
cca55fe3
JP
3233 if (PL_regkind[(U8)OP(text_node)] == REF) {
3234 I32 n, ln;
3235 n = ARG(text_node); /* which paren pair */
3236 ln = PL_regstartp[n];
3237 /* assume yes if we haven't seen CLOSEn */
3238 if (
3239 *PL_reglastparen < n ||
3240 ln == -1 ||
3241 ln == PL_regendp[n]
3242 ) {
3243 c1 = c2 = -1000;
3244 goto assume_ok_REG;
3245 }
3246 c1 = *(PL_bostr + ln);
3247 }
3248 else { c1 = (U8)*STRING(text_node); }
3249
af5decee 3250 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3251 c2 = PL_fold[c1];
af5decee 3252 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3253 c2 = PL_fold_locale[c1];
3254 else
3255 c2 = c1;
3256 }
a0ed51b3
LW
3257 }
3258 else
c277df42
IZ
3259 c1 = c2 = -1000;
3260 }
cca55fe3 3261 assume_ok_REG:
02db2b7b 3262 REGCP_SET(lastcp);
c277df42
IZ
3263 while (n >= ln) {
3264 /* If it could work, try it. */
3265 if (c1 == -1000 ||
3280af22
NIS
3266 UCHARAT(PL_reginput) == c1 ||
3267 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3268 {
3269 DEBUG_r(
c3464db5 3270 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3271 "%*s trying tail with n=%"IVdf"...\n",
3272 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3273 );
3274 if (paren) {
3275 if (n) {
cf93c79d
IZ
3276 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3277 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3278 }
a0ed51b3 3279 else
cf93c79d 3280 PL_regendp[paren] = -1;
c277df42 3281 }
a0ed51b3
LW
3282 if (regmatch(next))
3283 sayYES;
02db2b7b 3284 REGCP_UNWIND(lastcp);
a0ed51b3 3285 }
c277df42
IZ
3286 /* Couldn't or didn't -- back up. */
3287 n--;
dfe13c55 3288 locinput = HOPc(locinput, -l);
3280af22 3289 PL_reginput = locinput;
c277df42
IZ
3290 }
3291 }
3292 sayNO;
3293 break;
3294 }
3295 case CURLYN:
3296 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3297 if (paren > PL_regsize)
3298 PL_regsize = paren;
3299 if (paren > *PL_reglastparen)
3300 *PL_reglastparen = paren;
c277df42
IZ
3301 ln = ARG1(scan); /* min to match */
3302 n = ARG2(scan); /* max to match */
dc45a647 3303 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3304 goto repeat;
a0d0e21e 3305 case CURLY:
c277df42 3306 paren = 0;
a0d0e21e
LW
3307 ln = ARG1(scan); /* min to match */
3308 n = ARG2(scan); /* max to match */
dc45a647 3309 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3310 goto repeat;
3311 case STAR:
3312 ln = 0;
c277df42 3313 n = REG_INFTY;
a0d0e21e 3314 scan = NEXTOPER(scan);
c277df42 3315 paren = 0;
a0d0e21e
LW
3316 goto repeat;
3317 case PLUS:
c277df42
IZ
3318 ln = 1;
3319 n = REG_INFTY;
3320 scan = NEXTOPER(scan);
3321 paren = 0;
3322 repeat:
a0d0e21e
LW
3323 /*
3324 * Lookahead to avoid useless match attempts
3325 * when we know what character comes next.
3326 */
5f80c4cf
JP
3327
3328 /*
3329 * Used to only do .*x and .*?x, but now it allows
3330 * for )'s, ('s and (?{ ... })'s to be in the way
3331 * of the quantifier and the EXACT-like node. -- japhy
3332 */
3333
cca55fe3 3334 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3335 U8 *s;
3336 regnode *text_node = next;
3337
cca55fe3 3338 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3339
cca55fe3 3340 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3341 else {
cca55fe3
JP
3342 if (PL_regkind[(U8)OP(text_node)] == REF) {
3343 I32 n, ln;
3344 n = ARG(text_node); /* which paren pair */
3345 ln = PL_regstartp[n];
3346 /* assume yes if we haven't seen CLOSEn */
3347 if (
3348 *PL_reglastparen < n ||
3349 ln == -1 ||
3350 ln == PL_regendp[n]
3351 ) {
3352 c1 = c2 = -1000;
3353 goto assume_ok_easy;
3354 }
9246c65e 3355 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3356 }
3357 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3358
3359 if (!UTF) {
3360 c2 = c1 = *s;
f65d3ee7 3361 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3362 c2 = PL_fold[c1];
f65d3ee7 3363 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3364 c2 = PL_fold_locale[c1];
1aa99e6b 3365 }
5f80c4cf 3366 else { /* UTF */
f65d3ee7 3367 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3368 STRLEN ulen1, ulen2;
e7ae6809
JH
3369 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3370 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3371
3372 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3373 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3374
3375 c1 = utf8_to_uvuni(tmpbuf1, 0);
3376 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3377 }
3378 else {
3379 c2 = c1 = utf8_to_uvchr(s, NULL);
3380 }
1aa99e6b
IH
3381 }
3382 }
bbce6d69 3383 }
a0d0e21e 3384 else
bbce6d69 3385 c1 = c2 = -1000;
cca55fe3 3386 assume_ok_easy:
3280af22 3387 PL_reginput = locinput;
a0d0e21e 3388 if (minmod) {
c277df42 3389 CHECKPOINT lastcp;
a0d0e21e
LW
3390 minmod = 0;
3391 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3392 sayNO;
a0ed51b3 3393 locinput = PL_reginput;
02db2b7b 3394 REGCP_SET(lastcp);
0fe9bf95 3395 if (c1 != -1000) {
1aa99e6b 3396 char *e; /* Should not check after this */
0fe9bf95
IZ
3397 char *old = locinput;
3398
1aa99e6b 3399 if (n == REG_INFTY) {
0fe9bf95 3400 e = PL_regeol - 1;
1aa99e6b
IH
3401 if (do_utf8)
3402 while (UTF8_IS_CONTINUATION(*(U8*)e))
3403 e--;
3404 }
3405 else if (do_utf8) {
3406 int m = n - ln;
3407 for (e = locinput;
3408 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3409 e += UTF8SKIP(e);
3410 }
3411 else {
3412 e = locinput + n - ln;
3413 if (e >= PL_regeol)
3414 e = PL_regeol - 1;
3415 }
0fe9bf95 3416 while (1) {
1aa99e6b 3417 int count;
0fe9bf95 3418 /* Find place 'next' could work */
1aa99e6b
IH
3419 if (!do_utf8) {
3420 if (c1 == c2) {
a8e8ab15
JH
3421 while (locinput <= e &&
3422 UCHARAT(locinput) != c1)
1aa99e6b
IH
3423 locinput++;
3424 } else {
9041c2e3 3425 while (locinput <= e
a8e8ab15
JH
3426 && UCHARAT(locinput) != c1
3427 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3428 locinput++;
3429 }
3430 count = locinput - old;
3431 }
3432 else {
3433 STRLEN len;
3434 if (c1 == c2) {
3435 for (count = 0;
3436 locinput <= e &&
9041c2e3 3437 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3438 count++)
3439 locinput += len;
3440
3441 } else {
3442 for (count = 0; locinput <= e; count++) {
9041c2e3 3443 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3444 if (c == c1 || c == c2)
3445 break;
9041c2e3 3446 locinput += len;
1aa99e6b
IH
3447 }
3448 }
0fe9bf95 3449 }
9041c2e3 3450 if (locinput > e)
0fe9bf95
IZ
3451 sayNO;
3452 /* PL_reginput == old now */
3453 if (locinput != old) {
3454 ln = 1; /* Did some */
1aa99e6b 3455 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3456 sayNO;
3457 }
3458 /* PL_reginput == locinput now */
29d1e993 3459 TRYPAREN(paren, ln, locinput);
0fe9bf95 3460 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3461 REGCP_UNWIND(lastcp);
0fe9bf95 3462 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3463 old = locinput;
3464 if (do_utf8)
3465 locinput += UTF8SKIP(locinput);
3466 else
3467 locinput++;
0fe9bf95
IZ
3468 }
3469 }
3470 else
c277df42 3471 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3472 UV c;
3473 if (c1 != -1000) {
3474 if (do_utf8)
9041c2e3 3475 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3476 else
9041c2e3 3477 c = UCHARAT(PL_reginput);
2390ecbc
PP
3478 /* If it could work, try it. */
3479 if (c == c1 || c == c2)
3480 {
3481 TRYPAREN(paren, n, PL_reginput);
3482 REGCP_UNWIND(lastcp);
3483 }
1aa99e6b 3484 }
a0d0e21e 3485 /* If it could work, try it. */
2390ecbc 3486 else if (c1 == -1000)
bbce6d69 3487 {
29d1e993 3488 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3489 REGCP_UNWIND(lastcp);
bbce6d69 3490 }
c277df42 3491 /* Couldn't or didn't -- move forward. */
a0ed51b3 3492 PL_reginput = locinput;
a0d0e21e
LW
3493 if (regrepeat(scan, 1)) {
3494 ln++;
a0ed51b3
LW
3495 locinput = PL_reginput;
3496 }
3497 else
4633a7c4 3498 sayNO;
a0d0e21e
LW
3499 }
3500 }
3501 else {
c277df42 3502 CHECKPOINT lastcp;
a0d0e21e 3503 n = regrepeat(scan, n);
a0ed51b3 3504 locinput = PL_reginput;
22c35a8c 3505 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3506 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3507 ln = n; /* why back off? */
1aeab75a
GS
3508 /* ...because $ and \Z can match before *and* after
3509 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3510 We should back off by one in this case. */
3511 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3512 ln--;
3513 }
02db2b7b 3514 REGCP_SET(lastcp);
c277df42 3515 if (paren) {
8fa7f367 3516 UV c = 0;
c277df42 3517 while (n >= ln) {
1aa99e6b
IH
3518 if (c1 != -1000) {
3519 if (do_utf8)
9041c2e3 3520 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3521 else
9041c2e3 3522 c = UCHARAT(PL_reginput);
1aa99e6b 3523 }
c277df42 3524 /* If it could work, try it. */
1aa99e6b 3525 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3526 {
29d1e993 3527 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3528 REGCP_UNWIND(lastcp);
c277df42
IZ
3529 }
3530 /* Couldn't or didn't -- back up. */
3531 n--;
dfe13c55 3532 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3533 }
a0ed51b3
LW
3534 }
3535 else {
8fa7f367 3536 UV c = 0;
c277df42 3537 while (n >= ln) {
1aa99e6b
IH
3538 if (c1 != -1000) {
3539 if (do_utf8)
9041c2e3 3540 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3541 else
9041c2e3 3542 c = UCHARAT(PL_reginput);
1aa99e6b 3543 }
c277df42 3544 /* If it could work, try it. */
1aa99e6b 3545 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3546 {
29d1e993 3547 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3548 REGCP_UNWIND(lastcp);
c277df42
IZ
3549 }
3550 /* Couldn't or didn't -- back up. */
3551 n--;
dfe13c55 3552 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3553 }
a0d0e21e
LW
3554 }
3555 }
4633a7c4 3556 sayNO;
c277df42 3557 break;
a0d0e21e 3558 case END:
0f5d15d6
IZ
3559 if (PL_reg_call_cc) {
3560 re_cc_state *cur_call_cc = PL_reg_call_cc;
3561 CURCUR *cctmp = PL_regcc;
3562 regexp *re = PL_reg_re;
3563 CHECKPOINT cp, lastcp;
3564
3565 cp = regcppush(0); /* Save *all* the pos