This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
gcc -Wall dewhine.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
cca55fe3
JP
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
135 OP(rn) == PLUS || OP(rn) == MINMOD || \
136 (PL_regkind[(U8)OP(rn)] == CURLY && ARG1(rn) > 0) \
e2d8ce26
JP
137)
138
cca55fe3
JP
139#define HAS_TEXT(rn) ( \
140 PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \
141)
e2d8ce26 142
cca55fe3 143#define FIND_NEXT_IMPT(rn) STMT_START { \
e2d8ce26 144 while (JUMPABLE(rn)) \
cca55fe3
JP
145 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
146 PL_regkind[(U8)OP(rn)] == CURLY) \
e2d8ce26 147 rn = NEXTOPER(NEXTOPER(rn)); \
cca55fe3
JP
148 else if (OP(rn) == PLUS) \
149 rn = NEXTOPER(rn); \
e2d8ce26 150 else rn += NEXT_OFF(rn); \
5f80c4cf 151} STMT_END
74750237 152
acfe0abc 153static void restore_pos(pTHX_ void *arg);
51371543 154
76e3520e 155STATIC CHECKPOINT
cea2e8a9 156S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 157{
3280af22 158 int retval = PL_savestack_ix;
b1ce53c5
JH
159#define REGCP_PAREN_ELEMS 4
160 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
161 int p;
162
e49a9654
IH
163 if (paren_elems_to_push < 0)
164 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
165
a01268b5 166#define REGCP_OTHER_ELEMS 6
b1ce53c5 167 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 168 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 169/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
170 SSPUSHINT(PL_regendp[p]);
171 SSPUSHINT(PL_regstartp[p]);
3280af22 172 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
173 SSPUSHINT(p);
174 }
b1ce53c5 175/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
176 SSPUSHINT(PL_regsize);
177 SSPUSHINT(*PL_reglastparen);
a01268b5 178 SSPUSHINT(*PL_reglastcloseparen);
3280af22 179 SSPUSHPTR(PL_reginput);
41123dfd
JH
180#define REGCP_FRAME_ELEMS 2
181/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
182 * are needed for the regexp context stack bookkeeping. */
183 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 184 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 185
a0d0e21e
LW
186 return retval;
187}
188
c277df42 189/* These are needed since we do not localize EVAL nodes: */
02db2b7b 190# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 191 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 192 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 193
02db2b7b 194# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 195 PerlIO_printf(Perl_debug_log, \
faccc32b 196 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 197 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 198
76e3520e 199STATIC char *
cea2e8a9 200S_regcppop(pTHX)
a0d0e21e 201{
b1ce53c5 202 I32 i;
a0d0e21e
LW
203 U32 paren = 0;
204 char *input;
cf93c79d 205 I32 tmps;
b1ce53c5
JH
206
207 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 208 i = SSPOPINT;
b1ce53c5
JH
209 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
210 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 211 input = (char *) SSPOPPTR;
a01268b5 212 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
213 *PL_reglastparen = SSPOPINT;
214 PL_regsize = SSPOPINT;
b1ce53c5
JH
215
216 /* Now restore the parentheses context. */
41123dfd
JH
217 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
218 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 219 paren = (U32)SSPOPINT;
3280af22 220 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
221 PL_regstartp[paren] = SSPOPINT;
222 tmps = SSPOPINT;
3280af22
NIS
223 if (paren <= *PL_reglastparen)
224 PL_regendp[paren] = tmps;
c277df42 225 DEBUG_r(
c3464db5 226 PerlIO_printf(Perl_debug_log,
b900a521 227 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 228 (UV)paren, (IV)PL_regstartp[paren],
b900a521 229 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 230 (IV)PL_regendp[paren],
3280af22 231 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 232 );
a0d0e21e 233 }
c277df42 234 DEBUG_r(
3280af22 235 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 236 PerlIO_printf(Perl_debug_log,
faccc32b
JH
237 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
238 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
239 }
240 );
daf18116 241#if 1
dafc8851
JH
242 /* It would seem that the similar code in regtry()
243 * already takes care of this, and in fact it is in
244 * a better location to since this code can #if 0-ed out
245 * but the code in regtry() is needed or otherwise tests
246 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
247 * (as of patchlevel 7877) will fail. Then again,
248 * this code seems to be necessary or otherwise
249 * building DynaLoader will fail:
250 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
251 * --jhi */
3280af22
NIS
252 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
253 if (paren > PL_regsize)
cf93c79d
IZ
254 PL_regstartp[paren] = -1;
255 PL_regendp[paren] = -1;
a0d0e21e 256 }
dafc8851 257#endif
a0d0e21e
LW
258 return input;
259}
260
0f5d15d6 261STATIC char *
cea2e8a9 262S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
263{
264 I32 tmp = PL_savestack_ix;
265
266 PL_savestack_ix = ss;
267 regcppop();
268 PL_savestack_ix = tmp;
942e002e 269 return Nullch;
0f5d15d6
IZ
270}
271
272typedef struct re_cc_state
273{
274 I32 ss;
275 regnode *node;
276 struct re_cc_state *prev;
277 CURCUR *cc;
278 regexp *re;
279} re_cc_state;
280
02db2b7b 281#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 282
29d1e993
HS
283#define TRYPAREN(paren, n, input) { \
284 if (paren) { \
285 if (n) { \
286 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
287 PL_regendp[paren] = input - PL_bostr; \
288 } \
289 else \
290 PL_regendp[paren] = -1; \
291 } \
292 if (regmatch(next)) \
293 sayYES; \
294 if (paren && n) \
295 PL_regendp[paren] = -1; \
296}
297
298
a687059c 299/*
e50aee73 300 * pregexec and friends
a687059c
LW
301 */
302
303/*
c277df42 304 - pregexec - match a regexp against a string
a687059c 305 */
c277df42 306I32
864dbfa3 307Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 308 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
309/* strend: pointer to null at end of string */
310/* strbeg: real beginning of string */
311/* minend: end of match must be >=minend after stringarg. */
312/* nosave: For optimizations. */
313{
314 return
9041c2e3 315 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
316 nosave ? 0 : REXEC_COPY_STR);
317}
0f5d15d6
IZ
318
319STATIC void
cea2e8a9 320S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
321{
322 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
323#ifdef DEBUGGING
324 PL_regprogram = prog->program;
325#endif
326 PL_regnpar = prog->nparens;
9041c2e3
NIS
327 PL_regdata = prog->data;
328 PL_reg_re = prog;
0f5d15d6 329}
22e551b9 330
9041c2e3 331/*
cad2e5aa
JH
332 * Need to implement the following flags for reg_anch:
333 *
334 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
335 * USE_INTUIT_ML
336 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
337 * INTUIT_AUTORITATIVE_ML
338 * INTUIT_ONCE_NOML - Intuit can match in one location only.
339 * INTUIT_ONCE_ML
340 *
341 * Another flag for this function: SECOND_TIME (so that float substrs
342 * with giant delta may be not rechecked).
343 */
344
345/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
346
2c2d71f5 347/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
348 Otherwise, only SvCUR(sv) is used to get strbeg. */
349
350/* XXXX We assume that strpos is strbeg unless sv. */
351
6eb5f6b9
JH
352/* XXXX Some places assume that there is a fixed substring.
353 An update may be needed if optimizer marks as "INTUITable"
354 RExen without fixed substrings. Similarly, it is assumed that
355 lengths of all the strings are no more than minlen, thus they
356 cannot come from lookahead.
357 (Or minlen should take into account lookahead.) */
358
2c2d71f5
JH
359/* A failure to find a constant substring means that there is no need to make
360 an expensive call to REx engine, thus we celebrate a failure. Similarly,
361 finding a substring too deep into the string means that less calls to
30944b6d
IZ
362 regtry() should be needed.
363
364 REx compiler's optimizer found 4 possible hints:
365 a) Anchored substring;
366 b) Fixed substring;
367 c) Whether we are anchored (beginning-of-line or \G);
368 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 369 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
370 string which does not contradict any of them.
371 */
2c2d71f5 372
6eb5f6b9
JH
373/* Most of decisions we do here should have been done at compile time.
374 The nodes of the REx which we used for the search should have been
375 deleted from the finite automaton. */
376
cad2e5aa
JH
377char *
378Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
379 char *strend, U32 flags, re_scream_pos_data *data)
380{
b7953727 381 register I32 start_shift = 0;
cad2e5aa 382 /* Should be nonnegative! */
b7953727 383 register I32 end_shift = 0;
2c2d71f5
JH
384 register char *s;
385 register SV *check;
a1933d95 386 char *strbeg;
cad2e5aa
JH
387 char *t;
388 I32 ml_anch;
6eb5f6b9 389 register char *other_last = Nullch; /* other substr checked before this */
b7953727 390 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
391#ifdef DEBUGGING
392 char *i_strpos = strpos;
ce333219 393 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
30944b6d 394#endif
cad2e5aa 395
b8d68ded
JH
396 if (prog->reganch & ROPT_UTF8) {
397 DEBUG_r(PerlIO_printf(Perl_debug_log,
398 "UTF-8 regex...\n"));
399 PL_reg_flags |= RF_utf8;
400 }
401
2a782b5b 402 DEBUG_r({
b8d68ded
JH
403 char *s = PL_reg_match_utf8 ?
404 sv_uni_display(dsv, sv, 60, 0) : strpos;
405 int len = PL_reg_match_utf8 ?
406 strlen(s) : strend - strpos;
2a782b5b
JH
407 if (!PL_colorset)
408 reginitcolors();
b8d68ded
JH
409 if (PL_reg_match_utf8)
410 DEBUG_r(PerlIO_printf(Perl_debug_log,
411 "UTF-8 target...\n"));
2a782b5b
JH
412 PerlIO_printf(Perl_debug_log,
413 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
414 PL_colors[4],PL_colors[5],PL_colors[0],
415 prog->precomp,
416 PL_colors[1],
417 (strlen(prog->precomp) > 60 ? "..." : ""),
418 PL_colors[0],
419 (int)(len > 60 ? 60 : len),
420 s, PL_colors[1],
421 (len > 60 ? "..." : "")
422 );
423 });
cad2e5aa 424
1aa99e6b 425 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
a72c7584
JH
426 DEBUG_r(PerlIO_printf(Perl_debug_log,
427 "String too short... [re_intuit_start]\n"));
cad2e5aa 428 goto fail;
2c2d71f5 429 }
a1933d95 430 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 431 PL_regeol = strend;
653099ff 432 check = prog->check_substr;
2c2d71f5 433 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
434 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
435 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 436 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 437
7e25d62c
JH
438 if (!ml_anch) {
439 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
440 | ROPT_IMPLICIT)) /* not a real BOL */
441 /* SvCUR is not set on references: SvRV and SvPVX overlap */
442 && sv && !SvROK(sv)
443 && (strpos != strbeg)) {
444 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
445 goto fail;
446 }
447 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 448 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 449 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
450 I32 slen;
451
1aa99e6b 452 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
453 if (SvTAIL(check)) {
454 slen = SvCUR(check); /* >= 1 */
cad2e5aa 455
9041c2e3 456 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
457 || (strend - s == slen && strend[-1] != '\n')) {
458 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
459 goto fail_finish;
cad2e5aa
JH
460 }
461 /* Now should match s[0..slen-2] */
462 slen--;
653099ff 463 if (slen && (*SvPVX(check) != *s
cad2e5aa 464 || (slen > 1
653099ff 465 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
466 report_neq:
467 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
468 goto fail_finish;
469 }
cad2e5aa 470 }
653099ff
GS
471 else if (*SvPVX(check) != *s
472 || ((slen = SvCUR(check)) > 1
473 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
474 goto report_neq;
475 goto success_at_start;
7e25d62c 476 }
cad2e5aa 477 }
2c2d71f5 478 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 479 s = strpos;
2c2d71f5 480 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 481 end_shift = prog->minlen - start_shift -
653099ff 482 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 483 if (!ml_anch) {
653099ff
GS
484 I32 end = prog->check_offset_max + CHR_SVLEN(check)
485 - (SvTAIL(check) != 0);
1aa99e6b 486 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
487
488 if (end_shift < eshift)
489 end_shift = eshift;
490 }
cad2e5aa 491 }
2c2d71f5 492 else { /* Can match at random position */
cad2e5aa
JH
493 ml_anch = 0;
494 s = strpos;
2c2d71f5
JH
495 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
496 /* Should be nonnegative! */
497 end_shift = prog->minlen - start_shift -
653099ff 498 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
499 }
500
2c2d71f5 501#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 502 if (end_shift < 0)
6bbae5e6 503 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
504#endif
505
2c2d71f5
JH
506 restart:
507 /* Find a possible match in the region s..strend by looking for
508 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 509 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
510 I32 p = -1; /* Internal iterator of scream. */
511 I32 *pp = data ? data->scream_pos : &p;
512
2c2d71f5
JH
513 if (PL_screamfirst[BmRARE(check)] >= 0
514 || ( BmRARE(check) == '\n'
515 && (BmPREVIOUS(check) == SvCUR(check) - 1)
516 && SvTAIL(check) ))
9041c2e3 517 s = screaminstr(sv, check,
2c2d71f5 518 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 519 else
2c2d71f5 520 goto fail_finish;
cad2e5aa
JH
521 if (data)
522 *data->scream_olds = s;
523 }
f33976b4 524 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
525 s = fbm_instr((U8*)(s + start_shift),
526 (U8*)(strend - end_shift),
527 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 528 else
1aa99e6b
IH
529 s = fbm_instr(HOP3(s, start_shift, strend),
530 HOP3(strend, -end_shift, strbeg),
2c2d71f5 531 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
532
533 /* Update the count-of-usability, remove useless subpatterns,
534 unshift s. */
2c2d71f5
JH
535
536 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
537 (s ? "Found" : "Did not find"),
538 ((check == prog->anchored_substr) ? "anchored" : "floating"),
539 PL_colors[0],
7b0972df
JH
540 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
541 SvPVX(check),
2c2d71f5
JH
542 PL_colors[1], (SvTAIL(check) ? "$" : ""),
543 (s ? " at offset " : "...\n") ) );
544
545 if (!s)
546 goto fail_finish;
547
6eb5f6b9
JH
548 check_at = s;
549
2c2d71f5 550 /* Finish the diagnostic message */
30944b6d 551 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
552
553 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
554 Start with the other substr.
555 XXXX no SCREAM optimization yet - and a very coarse implementation
556 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
557 *always* match. Probably should be marked during compile...
558 Probably it is right to do no SCREAM here...
559 */
560
561 if (prog->float_substr && prog->anchored_substr) {
30944b6d 562 /* Take into account the "other" substring. */
2c2d71f5
JH
563 /* XXXX May be hopelessly wrong for UTF... */
564 if (!other_last)
6eb5f6b9 565 other_last = strpos;
2c2d71f5 566 if (check == prog->float_substr) {
30944b6d
IZ
567 do_other_anchored:
568 {
1aa99e6b 569 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
570 char *s1 = s;
571
2c2d71f5
JH
572 t = s - prog->check_offset_max;
573 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
574 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 575 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 576 && t > strpos)))
30944b6d 577 /* EMPTY */;
2c2d71f5
JH
578 else
579 t = strpos;
1aa99e6b 580 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
581 if (t < other_last) /* These positions already checked */
582 t = other_last;
1aa99e6b 583 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
584 if (last < last1)
585 last1 = last;
586 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
587 /* On end-of-str: see comment below. */
588 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
589 HOP3(HOP3(last1, prog->anchored_offset, strend)
590 + SvCUR(prog->anchored_substr),
591 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
592 prog->anchored_substr,
593 PL_multiline ? FBMrf_MULTILINE : 0);
594 DEBUG_r(PerlIO_printf(Perl_debug_log,
595 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
596 (s ? "Found" : "Contradicts"),
597 PL_colors[0],
7b0972df
JH
598 (int)(SvCUR(prog->anchored_substr)
599 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
600 SvPVX(prog->anchored_substr),
601 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
602 if (!s) {
603 if (last1 >= last2) {
604 DEBUG_r(PerlIO_printf(Perl_debug_log,
605 ", giving up...\n"));
606 goto fail_finish;
607 }
608 DEBUG_r(PerlIO_printf(Perl_debug_log,
609 ", trying floating at offset %ld...\n",
1aa99e6b
IH
610 (long)(HOP3c(s1, 1, strend) - i_strpos)));
611 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
612 s = HOP3c(last, 1, strend);
2c2d71f5
JH
613 goto restart;
614 }
615 else {
616 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 617 (long)(s - i_strpos)));
1aa99e6b
IH
618 t = HOP3c(s, -prog->anchored_offset, strbeg);
619 other_last = HOP3c(s, 1, strend);
30944b6d 620 s = s1;
2c2d71f5
JH
621 if (t == strpos)
622 goto try_at_start;
2c2d71f5
JH
623 goto try_at_offset;
624 }
30944b6d 625 }
2c2d71f5
JH
626 }
627 else { /* Take into account the floating substring. */
628 char *last, *last1;
629 char *s1 = s;
630
1aa99e6b
IH
631 t = HOP3c(s, -start_shift, strbeg);
632 last1 = last =
633 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
634 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
635 last = HOP3c(t, prog->float_max_offset, strend);
636 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
637 if (s < other_last)
638 s = other_last;
2c2d71f5
JH
639 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
640 /* fbm_instr() takes into account exact value of end-of-str
641 if the check is SvTAIL(ed). Since false positives are OK,
642 and end-of-str is not later than strend we are OK. */
643 s = fbm_instr((unsigned char*)s,
644 (unsigned char*)last + SvCUR(prog->float_substr)
645 - (SvTAIL(prog->float_substr)!=0),
646 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
647 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
648 (s ? "Found" : "Contradicts"),
649 PL_colors[0],
7b0972df
JH
650 (int)(SvCUR(prog->float_substr)
651 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
652 SvPVX(prog->float_substr),
653 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
654 if (!s) {
655 if (last1 == last) {
656 DEBUG_r(PerlIO_printf(Perl_debug_log,
657 ", giving up...\n"));
658 goto fail_finish;
659 }
660 DEBUG_r(PerlIO_printf(Perl_debug_log,
661 ", trying anchored starting at offset %ld...\n",
30944b6d 662 (long)(s1 + 1 - i_strpos)));
803ff556 663 other_last = last;
1aa99e6b 664 s = HOP3c(t, 1, strend);
2c2d71f5
JH
665 goto restart;
666 }
667 else {
668 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 669 (long)(s - i_strpos)));
803ff556 670 other_last = s; /* Fix this later. --Hugo */
30944b6d 671 s = s1;
2c2d71f5
JH
672 if (t == strpos)
673 goto try_at_start;
2c2d71f5
JH
674 goto try_at_offset;
675 }
676 }
cad2e5aa 677 }
2c2d71f5
JH
678
679 t = s - prog->check_offset_max;
2c2d71f5
JH
680 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
681 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
682 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
683 && t > strpos))) {
2c2d71f5
JH
684 /* Fixed substring is found far enough so that the match
685 cannot start at strpos. */
686 try_at_offset:
cad2e5aa 687 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
688 /* Eventually fbm_*() should handle this, but often
689 anchored_offset is not 0, so this check will not be wasted. */
690 /* XXXX In the code below we prefer to look for "^" even in
691 presence of anchored substrings. And we search even
692 beyond the found float position. These pessimizations
693 are historical artefacts only. */
694 find_anchor:
2c2d71f5 695 while (t < strend - prog->minlen) {
cad2e5aa 696 if (*t == '\n') {
4ee3650e 697 if (t < check_at - prog->check_offset_min) {
30944b6d 698 if (prog->anchored_substr) {
4ee3650e
GS
699 /* Since we moved from the found position,
700 we definitely contradict the found anchored
30944b6d
IZ
701 substr. Due to the above check we do not
702 contradict "check" substr.
703 Thus we can arrive here only if check substr
704 is float. Redo checking for "other"=="fixed".
705 */
9041c2e3 706 strpos = t + 1;
30944b6d
IZ
707 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
708 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
709 goto do_other_anchored;
710 }
4ee3650e
GS
711 /* We don't contradict the found floating substring. */
712 /* XXXX Why not check for STCLASS? */
cad2e5aa 713 s = t + 1;
2c2d71f5 714 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 715 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
716 goto set_useful;
717 }
4ee3650e
GS
718 /* Position contradicts check-string */
719 /* XXXX probably better to look for check-string
720 than for "\n", so one should lower the limit for t? */
721 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 722 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 723 other_last = strpos = s = t + 1;
cad2e5aa
JH
724 goto restart;
725 }
726 t++;
727 }
2c2d71f5
JH
728 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
729 PL_colors[0],PL_colors[1]));
730 goto fail_finish;
cad2e5aa 731 }
f5952150
GS
732 else {
733 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
734 PL_colors[0],PL_colors[1]));
735 }
cad2e5aa
JH
736 s = t;
737 set_useful:
2c2d71f5 738 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
739 }
740 else {
f5952150 741 /* The found string does not prohibit matching at strpos,
2c2d71f5 742 - no optimization of calling REx engine can be performed,
f5952150
GS
743 unless it was an MBOL and we are not after MBOL,
744 or a future STCLASS check will fail this. */
2c2d71f5
JH
745 try_at_start:
746 /* Even in this situation we may use MBOL flag if strpos is offset
747 wrt the start of the string. */
05b4157f 748 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 749 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
750 /* May be due to an implicit anchor of m{.*foo} */
751 && !(prog->reganch & ROPT_IMPLICIT))
752 {
cad2e5aa
JH
753 t = strpos;
754 goto find_anchor;
755 }
30944b6d 756 DEBUG_r( if (ml_anch)
f5952150
GS
757 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
758 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 759 );
2c2d71f5 760 success_at_start:
30944b6d 761 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 762 && prog->check_substr /* Could be deleted already */
cad2e5aa 763 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
764 && prog->check_substr == prog->float_substr)
765 {
cad2e5aa 766 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 767 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
768 SvREFCNT_dec(prog->check_substr);
769 prog->check_substr = Nullsv; /* disable */
770 prog->float_substr = Nullsv; /* clear */
5e39e1e5 771 check = Nullsv; /* abort */
cad2e5aa 772 s = strpos;
3cf5c195
IZ
773 /* XXXX This is a remnant of the old implementation. It
774 looks wasteful, since now INTUIT can use many
6eb5f6b9 775 other heuristics. */
cad2e5aa
JH
776 prog->reganch &= ~RE_USE_INTUIT;
777 }
778 else
779 s = strpos;
780 }
781
6eb5f6b9
JH
782 /* Last resort... */
783 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
784 if (prog->regstclass) {
785 /* minlen == 0 is possible if regstclass is \b or \B,
786 and the fixed substr is ''$.
787 Since minlen is already taken into account, s+1 is before strend;
788 accidentally, minlen >= 1 guaranties no false positives at s + 1
789 even for \b or \B. But (minlen? 1 : 0) below assumes that
790 regstclass does not come from lookahead... */
791 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
792 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 793 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 794 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 795 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 796 : 1);
6eb5f6b9 797 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
798 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
799 : (prog->float_substr
800 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
801 cl_l, strend)
802 : strend);
a1933d95 803 char *startpos = strbeg;
6eb5f6b9
JH
804
805 t = s;
76384e4a 806 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 807 PL_regdata = prog->data;
76384e4a
GS
808 PL_bostr = startpos;
809 }
f33976b4 810 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
811 if (!s) {
812#ifdef DEBUGGING
b7953727 813 char *what = 0;
6eb5f6b9
JH
814#endif
815 if (endpos == strend) {
816 DEBUG_r( PerlIO_printf(Perl_debug_log,
817 "Could not match STCLASS...\n") );
818 goto fail;
819 }
66e933ab
GS
820 DEBUG_r( PerlIO_printf(Perl_debug_log,
821 "This position contradicts STCLASS...\n") );
653099ff
GS
822 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
823 goto fail;
6eb5f6b9
JH
824 /* Contradict one of substrings */
825 if (prog->anchored_substr) {
6eb5f6b9
JH
826 if (prog->anchored_substr == check) {
827 DEBUG_r( what = "anchored" );
828 hop_and_restart:
1aa99e6b 829 s = HOP3c(t, 1, strend);
66e933ab
GS
830 if (s + start_shift + end_shift > strend) {
831 /* XXXX Should be taken into account earlier? */
832 DEBUG_r( PerlIO_printf(Perl_debug_log,
833 "Could not match STCLASS...\n") );
834 goto fail;
835 }
5e39e1e5
HS
836 if (!check)
837 goto giveup;
6eb5f6b9 838 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 839 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
840 what, (long)(s + start_shift - i_strpos)) );
841 goto restart;
842 }
66e933ab 843 /* Have both, check_string is floating */
6eb5f6b9
JH
844 if (t + start_shift >= check_at) /* Contradicts floating=check */
845 goto retry_floating_check;
846 /* Recheck anchored substring, but not floating... */
9041c2e3 847 s = check_at;
5e39e1e5
HS
848 if (!check)
849 goto giveup;
6eb5f6b9 850 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 851 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
852 (long)(other_last - i_strpos)) );
853 goto do_other_anchored;
854 }
60e71179
GS
855 /* Another way we could have checked stclass at the
856 current position only: */
857 if (ml_anch) {
858 s = t = t + 1;
5e39e1e5
HS
859 if (!check)
860 goto giveup;
60e71179 861 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
862 "Looking for /%s^%s/m starting at offset %ld...\n",
863 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 864 goto try_at_offset;
66e933ab 865 }
60e71179
GS
866 if (!prog->float_substr) /* Could have been deleted */
867 goto fail;
6eb5f6b9
JH
868 /* Check is floating subtring. */
869 retry_floating_check:
870 t = check_at - start_shift;
871 DEBUG_r( what = "floating" );
872 goto hop_and_restart;
873 }
b7953727
JH
874 if (t != s) {
875 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 876 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
877 (long)(t - i_strpos), (long)(s - i_strpos))
878 );
879 }
880 else {
881 DEBUG_r(PerlIO_printf(Perl_debug_log,
882 "Does not contradict STCLASS...\n");
883 );
884 }
6eb5f6b9 885 }
5e39e1e5
HS
886 giveup:
887 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
888 PL_colors[4], (check ? "Guessed" : "Giving up"),
889 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 890 return s;
2c2d71f5
JH
891
892 fail_finish: /* Substring not found */
66e933ab
GS
893 if (prog->check_substr) /* could be removed already */
894 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 895 fail:
2c2d71f5 896 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
897 PL_colors[4],PL_colors[5]));
898 return Nullch;
899}
9661b544 900
6eb5f6b9 901/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
902STATIC char *
903S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 904{
6eb5f6b9
JH
905 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
906 char *m;
d8093b23 907 STRLEN ln;
908 unsigned int c1;
909 unsigned int c2;
6eb5f6b9
JH
910 char *e;
911 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 912 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 913
6eb5f6b9
JH
914 /* We know what class it must start with. */
915 switch (OP(c)) {
6eb5f6b9 916 case ANYOF:
a0ed51b3 917 while (s < strend) {
ffc61ed2 918 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
919 if (tmp && (norun || regtry(prog, s)))
920 goto got_it;
921 else
922 tmp = doevery;
a0ed51b3 923 }
6eb5f6b9
JH
924 else
925 tmp = 1;
ffc61ed2 926 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 927 }
6eb5f6b9 928 break;
f33976b4
DB
929 case CANY:
930 while (s < strend) {
931 if (tmp && (norun || regtry(prog, s)))
932 goto got_it;
933 else
934 tmp = doevery;
935 s++;
936 }
937 break;
6eb5f6b9
JH
938 case EXACTF:
939 m = STRING(c);
940 ln = STR_LEN(c);
1aa99e6b 941 if (UTF) {
a2a2844f 942 STRLEN ulen1, ulen2;
e7ae6809
JH
943 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
944 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
945
946 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
947 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
948
949 c1 = utf8_to_uvuni(tmpbuf1, 0);
950 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
951 }
952 else {
953 c1 = *(U8*)m;
954 c2 = PL_fold[c1];
955 }
6eb5f6b9
JH
956 goto do_exactf;
957 case EXACTFL:
958 m = STRING(c);
959 ln = STR_LEN(c);
d8093b23 960 c1 = *(U8*)m;
6eb5f6b9
JH
961 c2 = PL_fold_locale[c1];
962 do_exactf:
963 e = strend - ln;
b3c9acc1 964
6eb5f6b9
JH
965 if (norun && e < s)
966 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
967
968 if (do_utf8) {
575cac57
JH
969 UV c, f;
970 U8 tmpbuf [UTF8_MAXLEN+1];
971 U8 foldbuf[UTF8_MAXLEN_FOLD+1];
972 STRLEN len, foldlen;
973
cadb39a9
JH
974 /* The ibcmp_utf8() uses to_uni_fold() which is more
975 * correct folding for Unicode than using lowercase.
976 * However, it doesn't work quite fully since the folding
977 * is a one-to-many mapping and the regex optimizer is
978 * unaware of this, so it may throw out good matches.
979 * Fortunately, not getting this right is allowed
980 * for Unicode Regular Expression Support level 1,
981 * only one-to-one matching is required. --jhi */
09091399 982 if (c1 == c2) {
1aa99e6b 983 while (s <= e) {
575cac57
JH
984 c = utf8_to_uvchr((U8*)s, &len);
985 uvchr_to_utf8(tmpbuf, c);
986 to_utf8_fold(tmpbuf, foldbuf, &foldlen);
987 f = utf8_to_uvchr(foldbuf, 0);
988
001dd6ea
JH
989 if ( ((c == c1 && ln == len) ||
990 (f == c1 && ln == foldlen) ||
991 !ibcmp_utf8(s, do_utf8, (I32)(strend - s),
992 m, UTF, (I32)ln))
55da9344 993 && (norun || regtry(prog, s)) )
1aa99e6b
IH
994 goto got_it;
995 s += len;
996 }
09091399
JH
997 }
998 else {
1aa99e6b 999 while (s <= e) {
575cac57 1000 c = utf8_to_uvchr((U8*)s, &len);
254ba52a 1001 uvchr_to_utf8(tmpbuf, c);
2ddfca77
JH
1002 to_utf8_fold(tmpbuf, foldbuf, &foldlen);
1003 f = utf8_to_uvchr(foldbuf, 0);
1004
880bd946
JH
1005 if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1006 c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1007 c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
001dd6ea
JH
1008 if ( (((c == c1 || c == c2) && ln == len) ||
1009 ((f == c1 || f == c2) && ln == foldlen) ||
1010 !ibcmp_utf8(s, do_utf8, (I32)(strend - s),
1011 m, UTF, (I32)ln))
55da9344 1012 && (norun || regtry(prog, s)) )
1aa99e6b
IH
1013 goto got_it;
1014 s += len;
1015 }
09091399 1016 }
1aa99e6b
IH
1017 }
1018 else {
1019 if (c1 == c2)
1020 while (s <= e) {
1021 if ( *(U8*)s == c1
1022 && (ln == 1 || !(OP(c) == EXACTF
1023 ? ibcmp(s, m, ln)
1024 : ibcmp_locale(s, m, ln)))
1025 && (norun || regtry(prog, s)) )
1026 goto got_it;
1027 s++;
1028 }
1029 else
1030 while (s <= e) {
1031 if ( (*(U8*)s == c1 || *(U8*)s == c2)
1032 && (ln == 1 || !(OP(c) == EXACTF
1033 ? ibcmp(s, m, ln)
1034 : ibcmp_locale(s, m, ln)))
1035 && (norun || regtry(prog, s)) )
1036 goto got_it;
1037 s++;
1038 }
b3c9acc1
IZ
1039 }
1040 break;
bbce6d69 1041 case BOUNDL:
3280af22 1042 PL_reg_flags |= RF_tainted;
bbce6d69 1043 /* FALL THROUGH */
a0d0e21e 1044 case BOUND:
ffc61ed2 1045 if (do_utf8) {
12d33761 1046 if (s == PL_bostr)
ffc61ed2
JH
1047 tmp = '\n';
1048 else {
1aa99e6b 1049 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1050
0064a8a9
JH
1051 if (s > (char*)r)
1052 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1053 }
1054 tmp = ((OP(c) == BOUND ?
9041c2e3 1055 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1056 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1057 while (s < strend) {
1058 if (tmp == !(OP(c) == BOUND ?
3568d838 1059 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1060 isALNUM_LC_utf8((U8*)s)))
1061 {
1062 tmp = !tmp;
1063 if ((norun || regtry(prog, s)))
1064 goto got_it;
1065 }
1066 s += UTF8SKIP(s);
a687059c 1067 }
a0d0e21e 1068 }
667bb95a 1069 else {
12d33761 1070 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1071 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1072 while (s < strend) {
1073 if (tmp ==
1074 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1075 tmp = !tmp;
1076 if ((norun || regtry(prog, s)))
1077 goto got_it;
1078 }
1079 s++;
a0ed51b3 1080 }
a0ed51b3 1081 }
6eb5f6b9 1082 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1083 goto got_it;
1084 break;
bbce6d69 1085 case NBOUNDL:
3280af22 1086 PL_reg_flags |= RF_tainted;
bbce6d69 1087 /* FALL THROUGH */
a0d0e21e 1088 case NBOUND:
ffc61ed2 1089 if (do_utf8) {
12d33761 1090 if (s == PL_bostr)
ffc61ed2
JH
1091 tmp = '\n';
1092 else {
1aa99e6b 1093 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1094
0064a8a9
JH
1095 if (s > (char*)r)
1096 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1097 }
1098 tmp = ((OP(c) == NBOUND ?
9041c2e3 1099 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1100 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1101 while (s < strend) {
1102 if (tmp == !(OP(c) == NBOUND ?
3568d838 1103 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1104 isALNUM_LC_utf8((U8*)s)))
1105 tmp = !tmp;
1106 else if ((norun || regtry(prog, s)))
1107 goto got_it;
1108 s += UTF8SKIP(s);
1109 }
a0d0e21e 1110 }
667bb95a 1111 else {
12d33761 1112 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1113 tmp = ((OP(c) == NBOUND ?
1114 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1115 while (s < strend) {
1116 if (tmp ==
1117 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1118 tmp = !tmp;
1119 else if ((norun || regtry(prog, s)))
1120 goto got_it;
1121 s++;
1122 }
a0ed51b3 1123 }
6eb5f6b9 1124 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1125 goto got_it;
1126 break;
a0d0e21e 1127 case ALNUM:
ffc61ed2 1128 if (do_utf8) {
8269fa76 1129 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1130 while (s < strend) {
3568d838 1131 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1132 if (tmp && (norun || regtry(prog, s)))
1133 goto got_it;
1134 else
1135 tmp = doevery;
1136 }
bbce6d69 1137 else
ffc61ed2
JH
1138 tmp = 1;
1139 s += UTF8SKIP(s);
bbce6d69 1140 }
bbce6d69 1141 }
ffc61ed2
JH
1142 else {
1143 while (s < strend) {
1144 if (isALNUM(*s)) {
1145 if (tmp && (norun || regtry(prog, s)))
1146 goto got_it;
1147 else
1148 tmp = doevery;
1149 }
a0ed51b3 1150 else
ffc61ed2
JH
1151 tmp = 1;
1152 s++;
a0ed51b3 1153 }
a0ed51b3
LW
1154 }
1155 break;
bbce6d69 1156 case ALNUML:
3280af22 1157 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1158 if (do_utf8) {
1159 while (s < strend) {
1160 if (isALNUM_LC_utf8((U8*)s)) {
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
a687059c 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s += UTF8SKIP(s);
a0d0e21e 1169 }
a0d0e21e 1170 }
ffc61ed2
JH
1171 else {
1172 while (s < strend) {
1173 if (isALNUM_LC(*s)) {
1174 if (tmp && (norun || regtry(prog, s)))
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
a0ed51b3 1179 else
ffc61ed2
JH
1180 tmp = 1;
1181 s++;
a0ed51b3 1182 }
a0ed51b3
LW
1183 }
1184 break;
a0d0e21e 1185 case NALNUM:
ffc61ed2 1186 if (do_utf8) {
8269fa76 1187 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1188 while (s < strend) {
3568d838 1189 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1190 if (tmp && (norun || regtry(prog, s)))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
bbce6d69 1195 else
ffc61ed2
JH
1196 tmp = 1;
1197 s += UTF8SKIP(s);
bbce6d69 1198 }
bbce6d69 1199 }
ffc61ed2
JH
1200 else {
1201 while (s < strend) {
1202 if (!isALNUM(*s)) {
1203 if (tmp && (norun || regtry(prog, s)))
1204 goto got_it;
1205 else
1206 tmp = doevery;
1207 }
a0ed51b3 1208 else
ffc61ed2
JH
1209 tmp = 1;
1210 s++;
a0ed51b3 1211 }
a0ed51b3
LW
1212 }
1213 break;
bbce6d69 1214 case NALNUML:
3280af22 1215 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1216 if (do_utf8) {
1217 while (s < strend) {
1218 if (!isALNUM_LC_utf8((U8*)s)) {
1219 if (tmp && (norun || regtry(prog, s)))
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
a687059c 1224 else
ffc61ed2
JH
1225 tmp = 1;
1226 s += UTF8SKIP(s);
a687059c 1227 }
a0d0e21e 1228 }
ffc61ed2
JH
1229 else {
1230 while (s < strend) {
1231 if (!isALNUM_LC(*s)) {
1232 if (tmp && (norun || regtry(prog, s)))
1233 goto got_it;
1234 else
1235 tmp = doevery;
1236 }
a0ed51b3 1237 else
ffc61ed2
JH
1238 tmp = 1;
1239 s++;
a0ed51b3 1240 }
a0ed51b3
LW
1241 }
1242 break;
a0d0e21e 1243 case SPACE:
ffc61ed2 1244 if (do_utf8) {
8269fa76 1245 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1246 while (s < strend) {
3568d838 1247 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1248 if (tmp && (norun || regtry(prog, s)))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a0d0e21e 1253 else
ffc61ed2
JH
1254 tmp = 1;
1255 s += UTF8SKIP(s);
2304df62 1256 }
a0d0e21e 1257 }
ffc61ed2
JH
1258 else {
1259 while (s < strend) {
1260 if (isSPACE(*s)) {
1261 if (tmp && (norun || regtry(prog, s)))
1262 goto got_it;
1263 else
1264 tmp = doevery;
1265 }
a0ed51b3 1266 else
ffc61ed2
JH
1267 tmp = 1;
1268 s++;
a0ed51b3 1269 }
a0ed51b3
LW
1270 }
1271 break;
bbce6d69 1272 case SPACEL:
3280af22 1273 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1274 if (do_utf8) {
1275 while (s < strend) {
1276 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
bbce6d69 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s += UTF8SKIP(s);
bbce6d69 1285 }
bbce6d69 1286 }
ffc61ed2
JH
1287 else {
1288 while (s < strend) {
1289 if (isSPACE_LC(*s)) {
1290 if (tmp && (norun || regtry(prog, s)))
1291 goto got_it;
1292 else
1293 tmp = doevery;
1294 }
a0ed51b3 1295 else
ffc61ed2
JH
1296 tmp = 1;
1297 s++;
a0ed51b3 1298 }
a0ed51b3
LW
1299 }
1300 break;
a0d0e21e 1301 case NSPACE:
ffc61ed2 1302 if (do_utf8) {
8269fa76 1303 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1304 while (s < strend) {
3568d838 1305 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a0d0e21e 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s += UTF8SKIP(s);
a687059c 1314 }
a0d0e21e 1315 }
ffc61ed2
JH
1316 else {
1317 while (s < strend) {
1318 if (!isSPACE(*s)) {
1319 if (tmp && (norun || regtry(prog, s)))
1320 goto got_it;
1321 else
1322 tmp = doevery;
1323 }
a0ed51b3 1324 else
ffc61ed2
JH
1325 tmp = 1;
1326 s++;
a0ed51b3 1327 }
a0ed51b3
LW
1328 }
1329 break;
bbce6d69 1330 case NSPACEL:
3280af22 1331 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1332 if (do_utf8) {
1333 while (s < strend) {
1334 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
bbce6d69 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s += UTF8SKIP(s);
bbce6d69 1343 }
bbce6d69 1344 }
ffc61ed2
JH
1345 else {
1346 while (s < strend) {
1347 if (!isSPACE_LC(*s)) {
1348 if (tmp && (norun || regtry(prog, s)))
1349 goto got_it;
1350 else
1351 tmp = doevery;
1352 }
a0ed51b3 1353 else
ffc61ed2
JH
1354 tmp = 1;
1355 s++;
a0ed51b3 1356 }
a0ed51b3
LW
1357 }
1358 break;
a0d0e21e 1359 case DIGIT:
ffc61ed2 1360 if (do_utf8) {
8269fa76 1361 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1362 while (s < strend) {
3568d838 1363 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
a0d0e21e 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s += UTF8SKIP(s);
2b69d0c2 1372 }
a0d0e21e 1373 }
ffc61ed2
JH
1374 else {
1375 while (s < strend) {
1376 if (isDIGIT(*s)) {
1377 if (tmp && (norun || regtry(prog, s)))
1378 goto got_it;
1379 else
1380 tmp = doevery;
1381 }
a0ed51b3 1382 else
ffc61ed2
JH
1383 tmp = 1;
1384 s++;
a0ed51b3 1385 }
a0ed51b3
LW
1386 }
1387 break;
b8c5462f
JH
1388 case DIGITL:
1389 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1390 if (do_utf8) {
1391 while (s < strend) {
1392 if (isDIGIT_LC_utf8((U8*)s)) {
1393 if (tmp && (norun || regtry(prog, s)))
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
b8c5462f 1398 else
ffc61ed2
JH
1399 tmp = 1;
1400 s += UTF8SKIP(s);
b8c5462f 1401 }
b8c5462f 1402 }
ffc61ed2
JH
1403 else {
1404 while (s < strend) {
1405 if (isDIGIT_LC(*s)) {
1406 if (tmp && (norun || regtry(prog, s)))
1407 goto got_it;
1408 else
1409 tmp = doevery;
1410 }
b8c5462f 1411 else
ffc61ed2
JH
1412 tmp = 1;
1413 s++;
b8c5462f 1414 }
b8c5462f
JH
1415 }
1416 break;
a0d0e21e 1417 case NDIGIT:
ffc61ed2 1418 if (do_utf8) {
8269fa76 1419 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1420 while (s < strend) {
3568d838 1421 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1422 if (tmp && (norun || regtry(prog, s)))
1423 goto got_it;
1424 else
1425 tmp = doevery;
1426 }
a0d0e21e 1427 else
ffc61ed2
JH
1428 tmp = 1;
1429 s += UTF8SKIP(s);
a687059c 1430 }
a0d0e21e 1431 }
ffc61ed2
JH
1432 else {
1433 while (s < strend) {
1434 if (!isDIGIT(*s)) {
1435 if (tmp && (norun || regtry(prog, s)))
1436 goto got_it;
1437 else
1438 tmp = doevery;
1439 }
a0ed51b3 1440 else
ffc61ed2
JH
1441 tmp = 1;
1442 s++;
a0ed51b3 1443 }
a0ed51b3
LW
1444 }
1445 break;
b8c5462f
JH
1446 case NDIGITL:
1447 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1448 if (do_utf8) {
1449 while (s < strend) {
1450 if (!isDIGIT_LC_utf8((U8*)s)) {
1451 if (tmp && (norun || regtry(prog, s)))
1452 goto got_it;
1453 else
1454 tmp = doevery;
1455 }
b8c5462f 1456 else
ffc61ed2
JH
1457 tmp = 1;
1458 s += UTF8SKIP(s);
b8c5462f 1459 }
a0ed51b3 1460 }
ffc61ed2
JH
1461 else {
1462 while (s < strend) {
1463 if (!isDIGIT_LC(*s)) {
1464 if (tmp && (norun || regtry(prog, s)))
1465 goto got_it;
1466 else
1467 tmp = doevery;
1468 }
cf93c79d 1469 else
ffc61ed2
JH
1470 tmp = 1;
1471 s++;
b8c5462f 1472 }
b8c5462f
JH
1473 }
1474 break;
b3c9acc1 1475 default:
3c3eec57
GS
1476 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1477 break;
d6a28714 1478 }
6eb5f6b9
JH
1479 return 0;
1480 got_it:
1481 return s;
1482}
1483
1484/*
1485 - regexec_flags - match a regexp against a string
1486 */
1487I32
1488Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1489 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1490/* strend: pointer to null at end of string */
1491/* strbeg: real beginning of string */
1492/* minend: end of match must be >=minend after stringarg. */
1493/* data: May be used for some additional optimizations. */
1494/* nosave: For optimizations. */
1495{
6eb5f6b9
JH
1496 register char *s;
1497 register regnode *c;
1498 register char *startpos = stringarg;
6eb5f6b9
JH
1499 I32 minlen; /* must match at least this many chars */
1500 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1501 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1502 constant substr. */ /* CC */
1503 I32 end_shift = 0; /* Same for the end. */ /* CC */
1504 I32 scream_pos = -1; /* Internal iterator of scream. */
1505 char *scream_olds;
1506 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1507 bool do_utf8 = DO_UTF8(sv);
2a782b5b 1508#ifdef DEBUGGING
ce333219 1509 SV *dsv = PERL_DEBUG_PAD_ZERO(0);
2a782b5b 1510#endif
6eb5f6b9
JH
1511
1512 PL_regcc = 0;
1513
1514 cache_re(prog);
1515#ifdef DEBUGGING
aea4f609 1516 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1517#endif
1518
1519 /* Be paranoid... */
1520 if (prog == NULL || startpos == NULL) {
1521 Perl_croak(aTHX_ "NULL regexp parameter");
1522 return 0;
1523 }
1524
1525 minlen = prog->minlen;
a72c7584
JH
1526 if (strend - startpos < minlen) {
1527 DEBUG_r(PerlIO_printf(Perl_debug_log,
1528 "String too short [regexec_flags]...\n"));
1529 goto phooey;
1aa99e6b 1530 }
6eb5f6b9 1531
6eb5f6b9
JH
1532 /* Check validity of program. */
1533 if (UCHARAT(prog->program) != REG_MAGIC) {
1534 Perl_croak(aTHX_ "corrupted regexp program");
1535 }
1536
1537 PL_reg_flags = 0;
1538 PL_reg_eval_set = 0;
1539 PL_reg_maxiter = 0;
1540
1541 if (prog->reganch & ROPT_UTF8)
1542 PL_reg_flags |= RF_utf8;
1543
1544 /* Mark beginning of line for ^ and lookbehind. */
1545 PL_regbol = startpos;
1546 PL_bostr = strbeg;
1547 PL_reg_sv = sv;
1548
1549 /* Mark end of line for $ (and such) */
1550 PL_regeol = strend;
1551
1552 /* see how far we have to get to not match where we matched before */
1553 PL_regtill = startpos+minend;
1554
1555 /* We start without call_cc context. */
1556 PL_reg_call_cc = 0;
1557
1558 /* If there is a "must appear" string, look for it. */
1559 s = startpos;
1560
1561 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1562 MAGIC *mg;
1563
1564 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1565 PL_reg_ganch = startpos;
1566 else if (sv && SvTYPE(sv) >= SVt_PVMG
1567 && SvMAGIC(sv)
14befaf4
DM
1568 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1569 && mg->mg_len >= 0) {
6eb5f6b9
JH
1570 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1571 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1572 if (s > PL_reg_ganch)
6eb5f6b9
JH
1573 goto phooey;
1574 s = PL_reg_ganch;
1575 }
1576 }
1577 else /* pos() not defined */
1578 PL_reg_ganch = strbeg;
1579 }
1580
699c3c34
JH
1581 if (do_utf8 == (UTF!=0) &&
1582 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1583 re_scream_pos_data d;
1584
1585 d.scream_olds = &scream_olds;
1586 d.scream_pos = &scream_pos;
1587 s = re_intuit_start(prog, sv, s, strend, flags, &d);
3fa9c3d7
JH
1588 if (!s) {
1589 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
6eb5f6b9 1590 goto phooey; /* not present */
3fa9c3d7 1591 }
6eb5f6b9
JH
1592 }
1593
2a782b5b 1594 DEBUG_r({
df1ffd02
JH
1595 char *s = do_utf8 ? sv_uni_display(dsv, sv, 60, 0) : startpos;
1596 int len = do_utf8 ? strlen(s) : strend - startpos;
2a782b5b
JH
1597 if (!PL_colorset)
1598 reginitcolors();
1599 PerlIO_printf(Perl_debug_log,
1600 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1601 PL_colors[4],PL_colors[5],PL_colors[0],
1602 prog->precomp,
1603 PL_colors[1],
1604 (strlen(prog->precomp) > 60 ? "..." : ""),
1605 PL_colors[0],
1606 (int)(len > 60 ? 60 : len),
1607 s, PL_colors[1],
1608 (len > 60 ? "..." : "")
1609 );
1610 });
6eb5f6b9
JH
1611
1612 /* Simplest case: anchored match need be tried only once. */
1613 /* [unless only anchor is BOL and multiline is set] */
1614 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1615 if (s == startpos && regtry(prog, startpos))
1616 goto got_it;
1617 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1618 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1619 {
1620 char *end;
1621
1622 if (minlen)
1623 dontbother = minlen - 1;
1aa99e6b 1624 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1625 /* for multiline we only have to try after newlines */
1626 if (prog->check_substr) {
1627 if (s == startpos)
1628 goto after_try;
1629 while (1) {
1630 if (regtry(prog, s))
1631 goto got_it;
1632 after_try:
1633 if (s >= end)
1634 goto phooey;
1635 if (prog->reganch & RE_USE_INTUIT) {
1636 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1637 if (!s)
1638 goto phooey;
1639 }
1640 else
1641 s++;
1642 }
1643 } else {
1644 if (s > startpos)
1645 s--;
1646 while (s < end) {
1647 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1648 if (regtry(prog, s))
1649 goto got_it;
1650 }
1651 }
1652 }
1653 }
1654 goto phooey;
1655 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1656 if (regtry(prog, PL_reg_ganch))
1657 goto got_it;
1658 goto phooey;
1659 }
1660
1661 /* Messy cases: unanchored match. */
9041c2e3 1662 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1663 /* we have /x+whatever/ */
1664 /* it must be a one character string (XXXX Except UTF?) */
1665 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1666#ifdef DEBUGGING
1667 int did_match = 0;
1668#endif
1669
1aa99e6b 1670 if (do_utf8) {
6eb5f6b9
JH
1671 while (s < strend) {
1672 if (*s == ch) {
bf93d4cc 1673 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1674 if (regtry(prog, s)) goto got_it;
1675 s += UTF8SKIP(s);
1676 while (s < strend && *s == ch)
1677 s += UTF8SKIP(s);
1678 }
1679 s += UTF8SKIP(s);
1680 }
1681 }
1682 else {
1683 while (s < strend) {
1684 if (*s == ch) {
bf93d4cc 1685 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1686 if (regtry(prog, s)) goto got_it;
1687 s++;
1688 while (s < strend && *s == ch)
1689 s++;
1690 }
1691 s++;
1692 }
1693 }
b7953727 1694 DEBUG_r(if (!did_match)
bf93d4cc 1695 PerlIO_printf(Perl_debug_log,
b7953727
JH
1696 "Did not find anchored character...\n")
1697 );
6eb5f6b9
JH
1698 }
1699 /*SUPPRESS 560*/
1aa99e6b
IH
1700 else if (do_utf8 == (UTF!=0) &&
1701 (prog->anchored_substr != Nullsv
9041c2e3 1702 || (prog->float_substr != Nullsv
1aa99e6b 1703 && prog->float_max_offset < strend - s))) {
9041c2e3 1704 SV *must = prog->anchored_substr
6eb5f6b9 1705 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1706 I32 back_max =
6eb5f6b9 1707 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1708 I32 back_min =
6eb5f6b9 1709 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1710 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1711 -(I32)(CHR_SVLEN(must)
1aa99e6b 1712 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1713 char *last1; /* Last position checked before */
bf93d4cc
GS
1714#ifdef DEBUGGING
1715 int did_match = 0;
1716#endif
6eb5f6b9
JH
1717
1718 if (s > PL_bostr)
1719 last1 = HOPc(s, -1);
1720 else
1721 last1 = s - 1; /* bogus */
1722
1723 /* XXXX check_substr already used to find `s', can optimize if
1724 check_substr==must. */
1725 scream_pos = -1;
1726 dontbother = end_shift;
1727 strend = HOPc(strend, -dontbother);
1728 while ( (s <= last) &&
9041c2e3 1729 ((flags & REXEC_SCREAM)
1aa99e6b 1730 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1731 end_shift, &scream_pos, 0))
1aa99e6b 1732 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1733 (unsigned char*)strend, must,
6eb5f6b9 1734 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1735 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1736 if (HOPc(s, -back_max) > last1) {
1737 last1 = HOPc(s, -back_min);
1738 s = HOPc(s, -back_max);
1739 }
1740 else {
1741 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1742
1743 last1 = HOPc(s, -back_min);
1744 s = t;
1745 }
1aa99e6b 1746 if (do_utf8) {
6eb5f6b9
JH
1747 while (s <= last1) {
1748 if (regtry(prog, s))
1749 goto got_it;
1750 s += UTF8SKIP(s);
1751 }
1752 }
1753 else {
1754 while (s <= last1) {
1755 if (regtry(prog, s))
1756 goto got_it;
1757 s++;
1758 }
1759 }
1760 }
b7953727
JH
1761 DEBUG_r(if (!did_match)
1762 PerlIO_printf(Perl_debug_log,
1763 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1764 ((must == prog->anchored_substr)
1765 ? "anchored" : "floating"),
1766 PL_colors[0],
1767 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1768 SvPVX(must),
b7953727
JH
1769 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1770 );
6eb5f6b9
JH
1771 goto phooey;
1772 }
155aba94 1773 else if ((c = prog->regstclass)) {
66e933ab
GS
1774 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1775 /* don't bother with what can't match */
6eb5f6b9 1776 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1777 DEBUG_r({
1778 SV *prop = sv_newmortal();
1779 regprop(prop, c);
2a782b5b 1780 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), UTF ? sv_uni_display(dsv, sv, 60, 0) : s);
ffc61ed2 1781 });
6eb5f6b9
JH
1782 if (find_byclass(prog, c, s, strend, startpos, 0))
1783 goto got_it;
bf93d4cc 1784 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1785 }
1786 else {
1787 dontbother = 0;
1788 if (prog->float_substr != Nullsv) { /* Trim the end. */
1789 char *last;
d6a28714
JH
1790
1791 if (flags & REXEC_SCREAM) {
1792 last = screaminstr(sv, prog->float_substr, s - strbeg,
1793 end_shift, &scream_pos, 1); /* last one */
1794 if (!last)
ffc61ed2 1795 last = scream_olds; /* Only one occurrence. */
b8c5462f 1796 }
d6a28714
JH
1797 else {
1798 STRLEN len;
1799 char *little = SvPV(prog->float_substr, len);
1800
1801 if (SvTAIL(prog->float_substr)) {
1802 if (memEQ(strend - len + 1, little, len - 1))
1803 last = strend - len + 1;
1804 else if (!PL_multiline)
9041c2e3 1805 last = memEQ(strend - len, little, len)
d6a28714 1806 ? strend - len : Nullch;
b8c5462f 1807 else
d6a28714
JH
1808 goto find_last;
1809 } else {
1810 find_last:
9041c2e3 1811 if (len)
d6a28714 1812 last = rninstr(s, strend, little, little + len);
b8c5462f 1813 else
d6a28714 1814 last = strend; /* matching `$' */
b8c5462f 1815 }
b8c5462f 1816 }
bf93d4cc
GS
1817 if (last == NULL) {
1818 DEBUG_r(PerlIO_printf(Perl_debug_log,
1819 "%sCan't trim the tail, match fails (should not happen)%s\n",
1820 PL_colors[4],PL_colors[5]));
1821 goto phooey; /* Should not happen! */
1822 }
d6a28714
JH
1823 dontbother = strend - last + prog->float_min_offset;
1824 }
1825 if (minlen && (dontbother < minlen))
1826 dontbother = minlen - 1;
1827 strend -= dontbother; /* this one's always in bytes! */
1828 /* We don't know much -- general case. */
1aa99e6b 1829 if (do_utf8) {
d6a28714
JH
1830 for (;;) {
1831 if (regtry(prog, s))
1832 goto got_it;
1833 if (s >= strend)
1834 break;
b8c5462f 1835 s += UTF8SKIP(s);
d6a28714
JH
1836 };
1837 }
1838 else {
1839 do {
1840 if (regtry(prog, s))
1841 goto got_it;
1842 } while (s++ < strend);
1843 }
1844 }
1845
1846 /* Failure. */
1847 goto phooey;
1848
1849got_it:
1850 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1851
1852 if (PL_reg_eval_set) {
1853 /* Preserve the current value of $^R */
1854 if (oreplsv != GvSV(PL_replgv))
1855 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1856 restored, the value remains
1857 the same. */
acfe0abc 1858 restore_pos(aTHX_ 0);
d6a28714
JH
1859 }
1860
1861 /* make sure $`, $&, $', and $digit will work later */
1862 if ( !(flags & REXEC_NOT_FIRST) ) {
1863 if (RX_MATCH_COPIED(prog)) {
1864 Safefree(prog->subbeg);
1865 RX_MATCH_COPIED_off(prog);
1866 }
1867 if (flags & REXEC_COPY_STR) {
1868 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1869
1870 s = savepvn(strbeg, i);
1871 prog->subbeg = s;
1872 prog->sublen = i;
1873 RX_MATCH_COPIED_on(prog);
1874 }
1875 else {
1876 prog->subbeg = strbeg;
1877 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1878 }
1879 }
9041c2e3 1880
d6a28714
JH
1881 return 1;
1882
1883phooey:
bf93d4cc
GS
1884 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1885 PL_colors[4],PL_colors[5]));
d6a28714 1886 if (PL_reg_eval_set)
acfe0abc 1887 restore_pos(aTHX_ 0);
d6a28714
JH
1888 return 0;
1889}
1890
1891/*
1892 - regtry - try match at specific point
1893 */
1894STATIC I32 /* 0 failure, 1 success */
1895S_regtry(pTHX_ regexp *prog, char *startpos)
1896{
d6a28714
JH
1897 register I32 i;
1898 register I32 *sp;
1899 register I32 *ep;
1900 CHECKPOINT lastcp;
1901
02db2b7b
IZ
1902#ifdef DEBUGGING
1903 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1904#endif
d6a28714
JH
1905 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1906 MAGIC *mg;
1907
1908 PL_reg_eval_set = RS_init;
1909 DEBUG_r(DEBUG_s(
b900a521
JH
1910 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1911 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1912 ));
e8347627 1913 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1914 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1915 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1916 SAVETMPS;
1917 /* Apparently this is not needed, judging by wantarray. */
e8347627 1918 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1919 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1920
1921 if (PL_reg_sv) {
1922 /* Make $_ available to executed code. */
1923 if (PL_reg_sv != DEFSV) {
4d1ff10f 1924 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1925 SAVESPTR(DEFSV);
1926 DEFSV = PL_reg_sv;
b8c5462f 1927 }
d6a28714 1928
9041c2e3 1929 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1930 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1931 /* prepare for quick setting of pos */
14befaf4
DM
1932 sv_magic(PL_reg_sv, (SV*)0,
1933 PERL_MAGIC_regex_global, Nullch, 0);
1934 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1935 mg->mg_len = -1;
b8c5462f 1936 }
d6a28714
JH
1937 PL_reg_magic = mg;
1938 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1939 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1940 }
09687e5a 1941 if (!PL_reg_curpm) {
0f79a09d 1942 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1943#ifdef USE_ITHREADS
1944 {
1945 SV* repointer = newSViv(0);
577e12cc 1946 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1947 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1948 av_push(PL_regex_padav,repointer);
1949 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1950 PL_regex_pad = AvARRAY(PL_regex_padav);
1951 }
1952#endif
1953 }
aaa362c4 1954 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1955 PL_reg_oldcurpm = PL_curpm;
1956 PL_curpm = PL_reg_curpm;
1957 if (RX_MATCH_COPIED(prog)) {
1958 /* Here is a serious problem: we cannot rewrite subbeg,
1959 since it may be needed if this match fails. Thus
1960 $` inside (?{}) could fail... */
1961 PL_reg_oldsaved = prog->subbeg;
1962 PL_reg_oldsavedlen = prog->sublen;
1963 RX_MATCH_COPIED_off(prog);
1964 }
1965 else
1966 PL_reg_oldsaved = Nullch;
1967 prog->subbeg = PL_bostr;
1968 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1969 }
1970 prog->startp[0] = startpos - PL_bostr;
1971 PL_reginput = startpos;
1972 PL_regstartp = prog->startp;
1973 PL_regendp = prog->endp;
1974 PL_reglastparen = &prog->lastparen;
a01268b5 1975 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1976 prog->lastparen = 0;
1977 PL_regsize = 0;
1978 DEBUG_r(PL_reg_starttry = startpos);
1979 if (PL_reg_start_tmpl <= prog->nparens) {
1980 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1981 if(PL_reg_start_tmp)
1982 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1983 else
1984 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1985 }
1986
128e8167
JH
1987#ifdef DEBUGGING
1988 sv_setpvn(PERL_DEBUG_PAD(0), "", 0);
1989 sv_setpvn(PERL_DEBUG_PAD(1), "", 0);
1990 sv_setpvn(PERL_DEBUG_PAD(2), "", 0);
1991#endif
1992
d6a28714
JH
1993 /* XXXX What this code is doing here?!!! There should be no need
1994 to do this again and again, PL_reglastparen should take care of
3dd2943c 1995 this! --ilya*/
dafc8851
JH
1996
1997 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1998 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1999 * PL_reglastparen), is not needed at all by the test suite
2000 * (op/regexp, op/pat, op/split), but that code is needed, oddly
2001 * enough, for building DynaLoader, or otherwise this
2002 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
2003 * will happen. Meanwhile, this code *is* needed for the
2004 * above-mentioned test suite tests to succeed. The common theme
2005 * on those tests seems to be returning null fields from matches.
2006 * --jhi */
dafc8851 2007#if 1
d6a28714
JH
2008 sp = prog->startp;
2009 ep = prog->endp;
2010 if (prog->nparens) {
09e8ae3b 2011 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
2012 *++sp = -1;
2013 *++ep = -1;
2014 }
2015 }
dafc8851 2016#endif
02db2b7b 2017 REGCP_SET(lastcp);
d6a28714
JH
2018 if (regmatch(prog->program + 1)) {
2019 prog->endp[0] = PL_reginput - PL_bostr;
2020 return 1;
2021 }
02db2b7b 2022 REGCP_UNWIND(lastcp);
d6a28714
JH
2023 return 0;
2024}
2025
02db2b7b
IZ
2026#define RE_UNWIND_BRANCH 1
2027#define RE_UNWIND_BRANCHJ 2
2028
2029union re_unwind_t;
2030
2031typedef struct { /* XX: makes sense to enlarge it... */
2032 I32 type;
2033 I32 prev;
2034 CHECKPOINT lastcp;
2035} re_unwind_generic_t;
2036
2037typedef struct {
2038 I32 type;
2039 I32 prev;
2040 CHECKPOINT lastcp;
2041 I32 lastparen;
2042 regnode *next;
2043 char *locinput;
2044 I32 nextchr;
2045#ifdef DEBUGGING
2046 int regindent;
2047#endif
2048} re_unwind_branch_t;
2049
2050typedef union re_unwind_t {
2051 I32 type;
2052 re_unwind_generic_t generic;
2053 re_unwind_branch_t branch;
2054} re_unwind_t;
2055
8ba1375e
MJD
2056#define sayYES goto yes
2057#define sayNO goto no
2058#define sayYES_FINAL goto yes_final
2059#define sayYES_LOUD goto yes_loud
2060#define sayNO_FINAL goto no_final
2061#define sayNO_SILENT goto do_no
2062#define saySAME(x) if (x) goto yes; else goto no
2063
2064#define REPORT_CODE_OFF 24
2065
d6a28714
JH
2066/*
2067 - regmatch - main matching routine
2068 *
2069 * Conceptually the strategy is simple: check to see whether the current
2070 * node matches, call self recursively to see whether the rest matches,
2071 * and then act accordingly. In practice we make some effort to avoid
2072 * recursion, in particular by going through "ordinary" nodes (that don't
2073 * need to know whether the rest of the match failed) by a loop instead of
2074 * by recursion.
2075 */
2076/* [lwall] I've hoisted the register declarations to the outer block in order to
2077 * maybe save a little bit of pushing and popping on the stack. It also takes
2078 * advantage of machines that use a register save mask on subroutine entry.
2079 */
2080STATIC I32 /* 0 failure, 1 success */
2081S_regmatch(pTHX_ regnode *prog)
2082{
d6a28714
JH
2083 register regnode *scan; /* Current node. */
2084 regnode *next; /* Next node. */
2085 regnode *inner; /* Next node in internal branch. */
2086 register I32 nextchr; /* renamed nextchr - nextchar colides with
2087 function of same name */
2088 register I32 n; /* no or next */
b7953727
JH
2089 register I32 ln = 0; /* len or last */
2090 register char *s = Nullch; /* operand or save */
d6a28714 2091 register char *locinput = PL_reginput;
b7953727 2092 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2093 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2094 I32 unwind = 0;
b7953727 2095#if 0
02db2b7b 2096 I32 firstcp = PL_savestack_ix;
b7953727 2097#endif
53c4c00c 2098 register bool do_utf8 = PL_reg_match_utf8;
2a782b5b 2099#ifdef DEBUGGING
ce333219
JH
2100 SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
2101 SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
2102 SV *dsv2 = PERL_DEBUG_PAD_ZERO(2);
2a782b5b 2103#endif
02db2b7b 2104
d6a28714
JH
2105#ifdef DEBUGGING
2106 PL_regindent++;
2107#endif
2108
2109 /* Note that nextchr is a byte even in UTF */
2110 nextchr = UCHARAT(locinput);
2111 scan = prog;
2112 while (scan != NULL) {
8ba1375e 2113
2a782b5b 2114 DEBUG_r( {
d6a28714
JH
2115 SV *prop = sv_newmortal();
2116 int docolor = *PL_colors[0];
2117 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2118 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2119 /* The part of the string before starttry has one color
2120 (pref0_len chars), between starttry and current
2121 position another one (pref_len - pref0_len chars),
2122 after the current position the third one.
2123 We assume that pref0_len <= pref_len, otherwise we
2124 decrease pref0_len. */
9041c2e3 2125 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2126 ? (5 + taill) - l : locinput - PL_bostr;
2127 int pref0_len;
d6a28714 2128
df1ffd02 2129 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
1aa99e6b
IH
2130 pref_len++;
2131 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2132 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2133 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2134 ? (5 + taill) - pref_len : PL_regeol - locinput);
df1ffd02 2135 while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
1aa99e6b 2136 l--;
d6a28714
JH
2137 if (pref0_len < 0)
2138 pref0_len = 0;
2139 if (pref0_len > pref_len)
2140 pref0_len = pref_len;
2141 regprop(prop, scan);
2a782b5b
JH
2142 {
2143 char *s0 =
df1ffd02 2144 do_utf8 ?
2a782b5b
JH
2145 pv_uni_display(dsv0, (U8*)(locinput - pref_len),
2146 pref0_len, 60, 0) :
2147 locinput - pref_len;
df1ffd02
JH
2148 int len0 = do_utf8 ? strlen(s0) : pref0_len;
2149 char *s1 = do_utf8 ?
2a782b5b
JH
2150 pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
2151 pref_len - pref0_len, 60, 0) :
2152 locinput - pref_len + pref0_len;
df1ffd02
JH
2153 int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
2154 char *s2 = do_utf8 ?
2a782b5b
JH
2155 pv_uni_display(dsv2, (U8*)locinput,
2156 PL_regeol - locinput, 60, 0) :
2157 locinput;
df1ffd02 2158 int len2 = do_utf8 ? strlen(s2) : l;
2a782b5b
JH
2159 PerlIO_printf(Perl_debug_log,
2160 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
2161 (IV)(locinput - PL_bostr),
2162 PL_colors[4],
2163 len0, s0,
2164 PL_colors[5],
2165 PL_colors[2],
2166 len1, s1,
2167 PL_colors[3],
2168 (docolor ? "" : "> <"),
2169 PL_colors[0],
2170 len2, s2,
2171 PL_colors[1],
2172 15 - l - pref_len + 1,
2173 "",
2174 (IV)(scan - PL_regprogram), PL_regindent*2, "",
2175 SvPVX(prop));
2176 }
2177 });
d6a28714
JH
2178
2179 next = scan + NEXT_OFF(scan);
2180 if (next == scan)
2181 next = NULL;
2182
2183 switch (OP(scan)) {
2184 case BOL:
12d33761
HS
2185 if (locinput == PL_bostr || (PL_multiline &&
2186 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2187 {
2188 /* regtill = regbol; */
b8c5462f
JH
2189 break;
2190 }
d6a28714
JH
2191 sayNO;
2192 case MBOL:
12d33761
HS
2193 if (locinput == PL_bostr ||
2194 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2195 {
b8c5462f
JH
2196 break;
2197 }
d6a28714
JH
2198 sayNO;
2199 case SBOL:
c2a73568 2200 if (locinput == PL_bostr)
b8c5462f 2201 break;
d6a28714
JH
2202 sayNO;
2203 case GPOS:
2204 if (locinput == PL_reg_ganch)
2205 break;
2206 sayNO;
2207 case EOL:
2208 if (PL_multiline)
2209 goto meol;
2210 else
2211 goto seol;
2212 case MEOL:
2213 meol:
2214 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2215 sayNO;
b8c5462f 2216 break;
d6a28714
JH
2217 case SEOL:
2218 seol:
2219 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2220 sayNO;
d6a28714 2221 if (PL_regeol - locinput > 1)
b8c5462f 2222 sayNO;
b8c5462f 2223 break;
d6a28714
JH
2224 case EOS:
2225 if (PL_regeol != locinput)
b8c5462f 2226 sayNO;
d6a28714 2227 break;
ffc61ed2 2228 case SANY:
d6a28714 2229 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2230 sayNO;
f33976b4
DB
2231 if (do_utf8) {
2232 locinput += PL_utf8skip[nextchr];
2233 if (locinput > PL_regeol)
2234 sayNO;
2235 nextchr = UCHARAT(locinput);
2236 }
2237 else
2238 nextchr = UCHARAT(++locinput);
2239 break;
2240 case CANY:
2241 if (!nextchr && locinput >= PL_regeol)
2242 sayNO;
b8c5462f 2243 nextchr = UCHARAT(++locinput);
a0d0e21e 2244 break;
ffc61ed2 2245 case REG_ANY:
1aa99e6b
IH
2246 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2247 sayNO;
2248 if (do_utf8) {
b8c5462f 2249 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2250 if (locinput > PL_regeol)
2251 sayNO;
a0ed51b3 2252 nextchr = UCHARAT(locinput);
a0ed51b3 2253 }
1aa99e6b
IH
2254 else
2255 nextchr = UCHARAT(++locinput);
a0ed51b3 2256 break;
d6a28714 2257 case EXACT:
cd439c50
IZ
2258 s = STRING(scan);
2259 ln = STR_LEN(scan);
1aa99e6b 2260 if (do_utf8 != (UTF!=0)) {
bc517b45 2261 /* The target and the pattern have differing utf8ness. */
1aa99e6b
IH
2262 char *l = locinput;
2263 char *e = s + ln;
bc517b45 2264 STRLEN ulen;
a72c7584 2265
5ff6fc6d
JH
2266 if (do_utf8) {
2267 /* The target is utf8, the pattern is not utf8. */
1aa99e6b
IH
2268 while (s < e) {
2269 if (l >= PL_regeol)
5ff6fc6d
JH
2270 sayNO;
2271 if (NATIVE_TO_UNI(*(U8*)s) !=
bc517b45 2272 utf8_to_uvchr((U8*)l, &ulen))
5ff6fc6d 2273 sayNO;
bc517b45 2274 l += ulen;
5ff6fc6d 2275 s ++;
1aa99e6b 2276 }
5ff6fc6d
JH
2277 }
2278 else {
2279 /* The target is not utf8, the pattern is utf8. */
1aa99e6b
IH
2280 while (s < e) {
2281 if (l >= PL_regeol)
2282 sayNO;
5ff6fc6d 2283 if (NATIVE_TO_UNI(*((U8*)l)) !=
bc517b45 2284 utf8_to_uvchr((U8*)s, &ulen))
1aa99e6b 2285 sayNO;
bc517b45 2286 s += ulen;
a72c7584 2287 l ++;
1aa99e6b 2288 }
5ff6fc6d 2289 }
1aa99e6b
IH
2290 locinput = l;
2291 nextchr = UCHARAT(locinput);
2292 break;
2293 }
bc517b45 2294 /* The target and the pattern have the same utf8ness. */
d6a28714
JH
2295 /* Inline the first character, for speed. */
2296 if (UCHARAT(s) != nextchr)
2297 sayNO;
2298 if (PL_regeol - locinput < ln)
2299 sayNO;
2300 if (ln > 1 && memNE(s, locinput, ln))
2301 sayNO;
2302 locinput += ln;
2303 nextchr = UCHARAT(locinput);
2304 break;
2305 case EXACTFL:
b8c5462f
JH
2306 PL_reg_flags |= RF_tainted;
2307 /* FALL THROUGH */
d6a28714 2308 case EXACTF:
cd439c50
IZ
2309 s = STRING(scan);
2310 ln = STR_LEN(scan);
d6a28714 2311
bc517b45 2312 {
d6a28714 2313 char *l = locinput;
bc517b45
JH
2314 char *e = s + ln;
2315 U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
2316
2317 if (do_utf8 != (UTF!=0)) {
2318 /* The target and the pattern have differing utf8ness. */
2319 STRLEN ulen1, ulen2;
2320 UV cs, cl;
2321
2322 if (do_utf8) {
2323 /* The target is utf8, the pattern is not utf8. */
2324 while (s < e) {
2325 if (l >= PL_regeol)
2326 sayNO;
2327
2328 cs = to_uni_fold(NATIVE_TO_UNI(*(U8*)s),
2329 (U8*)s, &ulen1);
2330 cl = utf8_to_uvchr((U8*)l, &ulen2);
2331
2332 if (cs != cl) {
2333 cl = to_uni_fold(cl, (U8*)l, &ulen2);
2334 if (ulen1 != ulen2 || cs != cl)
2335 sayNO;
2336 }
2337 l += ulen1;
2338 s ++;
2339 }
2340 }
2341 else {
2342 /* The target is not utf8, the pattern is utf8. */
2343 while (s < e) {
2344 if (l >= PL_regeol)
2345 sayNO;
2346
2347 cs = utf8_to_uvchr((U8*)s, &ulen1);
2348
2349 cl = to_uni_fold(NATIVE_TO_UNI(*(U8*)l),
2350 (U8*)l, &ulen2);
2351
2352 if (cs != cl) {
2353 cs = to_uni_fold(cs, (U8*)s, &ulen1);
2354 if (ulen1 != ulen2 || cs != cl)
2355 sayNO;
2356 }
2357 l ++;
2358 s += ulen1;
2359 }
2360 }
2361 locinput = l;
2362 nextchr = UCHARAT(locinput);
2363 break;
2364 }
2365
2366 if (do_utf8 && UTF) {
2367 /* Both the target and the pattern are utf8. */
2368 STRLEN ulen;
2369
2370 while (s < e) {
2371 if (l >= PL_regeol)
2372 sayNO;
2373 if (UTF8SKIP(s) != UTF8SKIP(l) ||
2374 memNE(s, (char*)l, UTF8SKIP(s))) {
2375 to_utf8_fold((U8*)l, tmpbuf, &ulen);
2376 if (UTF8SKIP(s) != ulen ||
2377 memNE(s, (char*)tmpbuf, ulen))
2378 sayNO;
2379 }
2380 l += UTF8SKIP(l);
2381 s += UTF8SKIP(s);
2382 }
2383 locinput = l;
2384 nextchr = UCHARAT(locinput);
2385 break;
b8c5462f 2386 }
a0ed51b3 2387 }
d6a28714 2388
bc517b45
JH
2389 /* Neither the target and the pattern are utf8. */
2390
d6a28714
JH
2391 /* Inline the first character, for speed. */
2392 if (UCHARAT(s) != nextchr &&
2393 UCHARAT(s) != ((OP(scan) == EXACTF)
2394 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2395 sayNO;
d6a28714 2396 if (PL_regeol - locinput < ln)
b8c5462f 2397 sayNO;
d6a28714
JH
2398 if (ln > 1 && (OP(scan) == EXACTF
2399 ? ibcmp(s, locinput, ln)
2400 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2401 sayNO;
d6a28714
JH
2402 locinput += ln;
2403 nextchr = UCHARAT(locinput);
a0d0e21e 2404 break;
d6a28714 2405 case ANYOF:
ffc61ed2
JH
2406 if (do_utf8) {
2407 if (!reginclass(scan, (U8*)locinput, do_utf8))
2408 sayNO;
2409 if (locinput >= PL_regeol)
2410 sayNO;
2411 locinput += PL_utf8skip[nextchr];
b8c5462f 2412 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2413 }
2414 else {
2415 if (nextchr < 0)
2416 nextchr = UCHARAT(locinput);
2417 if (!reginclass(scan, (U8*)locinput, do_utf8))
2418 sayNO;
2419 if (!nextchr && locinput >= PL_regeol)
2420 sayNO;
2421 nextchr = UCHARAT(++locinput);
2422 }
b8c5462f 2423 break;
d6a28714 2424 case ALNUML:
b8c5462f
JH
2425 PL_reg_flags |= RF_tainted;
2426 /* FALL THROUGH */
d6a28714 2427 case ALNUM:
b8c5462f 2428 if (!nextchr)
4633a7c4 2429 sayNO;
ffc61ed2 2430 if (do_utf8) {
ad24be35 2431 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2432 if (!(OP(scan) == ALNUM
3568d838 2433 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2434 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2435 {
2436 sayNO;
a0ed51b3 2437 }
b8c5462f 2438 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2439 nextchr = UCHARAT(locinput);
2440 break;
2441 }
ffc61ed2 2442 if (!(OP(scan) == ALNUM
d6a28714 2443 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2444 sayNO;
b8c5462f 2445 nextchr = UCHARAT(++locinput);
a0d0e21e 2446 break;
d6a28714 2447 case NALNUML:
b8c5462f
JH
2448 PL_reg_flags |= RF_tainted;
2449 /* FALL THROUGH */
d6a28714
JH
2450 case NALNUM:
2451 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2452 sayNO;
ffc61ed2 2453 if (do_utf8) {
8269fa76 2454 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2455 if (OP(scan) == NALNUM
3568d838 2456 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2457 : isALNUM_LC_utf8((U8*)locinput))
2458 {
b8c5462f 2459 sayNO;
d6a28714 2460 }
b8c5462f
JH
2461 locinput += PL_utf8skip[nextchr];
2462 nextchr = UCHARAT(locinput);
2463 break;
2464 }
ffc61ed2 2465 if (OP(scan) == NALNUM
d6a28714 2466 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2467 sayNO;
76e3520e 2468 nextchr = UCHARAT(++locinput);
a0d0e21e 2469 break;
d6a28714
JH
2470 case BOUNDL:
2471 case NBOUNDL:
3280af22 2472 PL_reg_flags |= RF_tainted;
bbce6d69 2473 /* FALL THROUGH */
d6a28714
JH
2474 case BOUND:
2475 case NBOUND:
2476 /* was last char in word? */
ffc61ed2 2477 if (do_utf8) {
12d33761
HS
2478 if (locinput == PL_bostr)
2479 ln = '\n';
ffc61ed2
JH
2480 else {
2481 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2482
2b9d42f0 2483 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2484 }
2485 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2486 ln = isALNUM_uni(ln);
8269fa76 2487 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2488 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2489 }
2490 else {
9041c2e3 2491 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2492 n = isALNUM_LC_utf8((U8*)locinput);
2493 }
a0ed51b3 2494 }
d6a28714 2495 else {
12d33761
HS
2496 ln = (locinput != PL_bostr) ?
2497 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2498 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2499 ln = isALNUM(ln);
2500 n = isALNUM(nextchr);
2501 }
2502 else {
2503 ln = isALNUM_LC(ln);
2504 n = isALNUM_LC(nextchr);
2505 }
d6a28714 2506 }
ffc61ed2
JH
2507 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2508 OP(scan) == BOUNDL))
2509 sayNO;
a0ed51b3 2510 break;
d6a28714 2511 case SPACEL:
3280af22 2512 PL_reg_flags |= RF_tainted;
bbce6d69 2513 /* FALL THROUGH */
d6a28714 2514 case SPACE:
9442cb0e 2515 if (!nextchr)
4633a7c4 2516 sayNO;
1aa99e6b 2517 if (do_utf8) {
fd400ab9 2518 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2519 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2520 if (!(OP(scan) == SPACE
3568d838 2521 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2522 : isSPACE_LC_utf8((U8*)locinput)))
2523 {
2524 sayNO;
2525 }
2526 locinput += PL_utf8skip[nextchr];
2527 nextchr = UCHARAT(locinput);
2528 break;
d6a28714 2529 }
ffc61ed2
JH
2530 if (!(OP(scan) == SPACE
2531 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2532 sayNO;
2533 nextchr = UCHARAT(++locinput);
2534 }
2535 else {
2536 if (!(OP(scan) == SPACE
2537 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2538 sayNO;
2539 nextchr = UCHARAT(++locinput);
a0ed51b3 2540 }
a0ed51b3 2541 break;
d6a28714 2542 case NSPACEL:
3280af22 2543 PL_reg_flags |= RF_tainted;
bbce6d69 2544 /* FALL THROUGH */
d6a28714 2545 case NSPACE:
9442cb0e 2546 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2547 sayNO;
1aa99e6b 2548 if (do_utf8) {
8269fa76 2549 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2550 if (OP(scan) == NSPACE
3568d838 2551 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2552 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2553 {
2554 sayNO;
2555 }
2556 locinput += PL_utf8skip[nextchr];
2557 nextchr = UCHARAT(locinput);
2558 break;
a0ed51b3 2559 }
ffc61ed2 2560 if (OP(scan) == NSPACE
d6a28714 2561 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2562 sayNO;
76e3520e 2563 nextchr = UCHARAT(++locinput);
a0d0e21e 2564 break;
d6a28714 2565 case DIGITL:
a0ed51b3
LW
2566 PL_reg_flags |= RF_tainted;
2567 /* FALL THROUGH */
d6a28714 2568 case DIGIT:
9442cb0e 2569 if (!nextchr)
a0ed51b3 2570 sayNO;
1aa99e6b 2571 if (do_utf8) {
8269fa76 2572 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2573 if (!(OP(scan) == DIGIT
3568d838 2574 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2575 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2576 {
a0ed51b3 2577 sayNO;
dfe13c55 2578 }
6f06b55f 2579 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2580 nextchr = UCHARAT(locinput);
2581 break;
2582 }
ffc61ed2 2583 if (!(OP(scan) == DIGIT
9442cb0e 2584 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2585 sayNO;
2586 nextchr = UCHARAT(++locinput);
2587 break;
d6a28714 2588 case NDIGITL:
b8c5462f
JH
2589 PL_reg_flags |= RF_tainted;
2590 /* FALL THROUGH */
d6a28714 2591 case NDIGIT:
9442cb0e 2592 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2593 sayNO;
1aa99e6b 2594 if (do_utf8) {
8269fa76 2595 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2596 if (OP(scan) == NDIGIT
3568d838 2597 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2598 : isDIGIT_LC_utf8((U8*)locinput))
2599 {
a0ed51b3 2600 sayNO;
9442cb0e 2601 }
6f06b55f 2602 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2603 nextchr = UCHARAT(locinput);
2604 break;
2605 }
ffc61ed2 2606 if (OP(scan) == NDIGIT
9442cb0e 2607 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2608 sayNO;
2609 nextchr = UCHARAT(++locinput);
2610 break;
2611 case CLUMP:
b7c83a7e 2612 if (locinput >= PL_regeol)
a0ed51b3 2613 sayNO;
b7c83a7e
JH
2614 if (do_utf8) {
2615 LOAD_UTF8_CHARCLASS(mark,"~");
2616 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2617 sayNO;
2618 locinput += PL_utf8skip[nextchr];
2619 while (locinput < PL_regeol &&
2620 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
2621 locinput += UTF8SKIP(locinput);
2622 if (locinput > PL_regeol)
2623 sayNO;
eb08e2da
JH
2624 }
2625 else
2626 locinput++;
a0ed51b3
LW
2627 nextchr = UCHARAT(locinput);
2628 break;
c8756f30 2629 case REFFL:
3280af22 2630 PL_reg_flags |= RF_tainted;
c8756f30 2631 /* FALL THROUGH */
c277df42 2632 case REF:
c8756f30 2633 case REFF:
c277df42 2634 n = ARG(scan); /* which paren pair */
cf93c79d 2635 ln = PL_regstartp[n];
2c2d71f5 2636 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2637 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2638 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2639 if (ln == PL_regendp[n])
a0d0e21e 2640 break;
a0ed51b3 2641
cf93c79d 2642 s = PL_bostr + ln;
1aa99e6b 2643 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2644 char *l = locinput;
cf93c79d 2645 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2646 /*
2647 * Note that we can't do the "other character" lookup trick as
2648 * in the 8-bit case (no pun intended) because in Unicode we
2649 * have to map both upper and title case to lower case.
2650 */
2651 if (OP(scan) == REFF) {
a2a2844f 2652 STRLEN ulen1, ulen2;
e7ae6809
JH
2653 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
2654 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a0ed51b3
LW
2655 while (s < e) {
2656 if (l >= PL_regeol)
2657 sayNO;
a2a2844f
JH
2658 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2659 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
7114a2d2 2660 if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
a0ed51b3 2661 sayNO;
a2a2844f
JH
2662 s += ulen1;
2663 l += ulen2;
a0ed51b3
LW
2664 }
2665 }
2666 locinput = l;
2667 nextchr = UCHARAT(locinput);
2668 break;
2669 }
2670
a0d0e21e 2671 /* Inline the first character, for speed. */
76e3520e 2672 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2673 (OP(scan) == REF ||
2674 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2675 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2676 sayNO;
cf93c79d 2677 ln = PL_regendp[n] - ln;
3280af22 2678 if (locinput + ln > PL_regeol)
4633a7c4 2679 sayNO;
c8756f30
AK
2680 if (ln > 1 && (OP(scan) == REF
2681 ? memNE(s, locinput, ln)
2682 : (OP(scan) == REFF
2683 ? ibcmp(s, locinput, ln)
2684 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2685 sayNO;
a0d0e21e 2686 locinput += ln;
76e3520e 2687 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2688 break;
2689
2690 case NOTHING:
c277df42 2691 case TAIL:
a0d0e21e
LW
2692 break;
2693 case BACK:
2694 break;
c277df42
IZ
2695 case EVAL:
2696 {
2697 dSP;
533c011a 2698 OP_4tree *oop = PL_op;
3280af22
NIS
2699 COP *ocurcop = PL_curcop;
2700 SV **ocurpad = PL_curpad;
c277df42 2701 SV *ret;
9041c2e3 2702
c277df42 2703 n = ARG(scan);
533c011a 2704 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2705 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2706 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2707 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2708
8e5e9ebe
RGS
2709 {
2710 SV **before = SP;
2711 CALLRUNOPS(aTHX); /* Scalar context. */
2712 SPAGAIN;
2713 if (SP == before)
2714 ret = Nullsv; /* protect against empty (?{}) blocks. */
2715 else {
2716 ret = POPs;
2717 PUTBACK;
2718 }
2719 }
2720
0f5d15d6
IZ
2721 PL_op = oop;
2722 PL_curpad = ocurpad;
2723 PL_curcop = ocurcop;
c277df42 2724 if (logical) {
0f5d15d6
IZ
2725 if (logical == 2) { /* Postponed subexpression. */
2726 regexp *re;
22c35a8c 2727 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2728 re_cc_state state;
0f5d15d6
IZ
2729 CHECKPOINT cp, lastcp;
2730
2731 if(SvROK(ret) || SvRMAGICAL(ret)) {
2732 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2733
2734 if(SvMAGICAL(sv))
14befaf4 2735 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2736 }
2737 if (mg) {
2738 re = (regexp *)mg->mg_obj;
df0003d4 2739 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2740 }
2741 else {
2742 STRLEN len;
2743 char *t = SvPV(ret, len);
2744 PMOP pm;
2745 char *oprecomp = PL_regprecomp;
2746 I32 osize = PL_regsize;
2747 I32 onpar = PL_regnpar;
2748
5fcd1c1b 2749 Zero(&pm, 1, PMOP);
cea2e8a9 2750 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2751 if (!(SvFLAGS(ret)
0f5d15d6 2752 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2753 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2754 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2755 PL_regprecomp = oprecomp;
2756 PL_regsize = osize;
2757 PL_regnpar = onpar;
2758 }
2759 DEBUG_r(
9041c2e3 2760 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2761 "Entering embedded `%s%.60s%s%s'\n",
2762 PL_colors[0],
2763 re->precomp,
2764 PL_colors[1],
2765 (strlen(re->precomp) > 60 ? "..." : ""))
2766 );
2767 state.node = next;
2768 state.prev = PL_reg_call_cc;
2769 state.cc = PL_regcc;
2770 state.re = PL_reg_re;
2771
2ab05381 2772 PL_regcc = 0;
9041c2e3 2773
0f5d15d6 2774 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2775 REGCP_SET(lastcp);
0f5d15d6
IZ
2776 cache_re(re);
2777 state.ss = PL_savestack_ix;
2778 *PL_reglastparen = 0;
a01268b5 2779 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2780 PL_reg_call_cc = &state;
2781 PL_reginput = locinput;
2c2d71f5
JH
2782
2783 /* XXXX This is too dramatic a measure... */
2784 PL_reg_maxiter = 0;
2785
0f5d15d6 2786 if (regmatch(re->program + 1)) {
2c914db6
IZ
2787 /* Even though we succeeded, we need to restore
2788 global variables, since we may be wrapped inside
2789 SUSPEND, thus the match may be not finished yet. */
2790
2791 /* XXXX Do this only if SUSPENDed? */
2792 PL_reg_call_cc = state.prev;
2793 PL_regcc = state.cc;
2794 PL_reg_re = state.re;
2795 cache_re(PL_reg_re);
2796
2797 /* XXXX This is too dramatic a measure... */
2798 PL_reg_maxiter = 0;
2799
2800 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2801 ReREFCNT_dec(re);
2802 regcpblow(cp);
2803 sayYES;
2804 }
0f5d15d6 2805 ReREFCNT_dec(re);
02db2b7b 2806 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2807 regcppop();
2808 PL_reg_call_cc = state.prev;
2809 PL_regcc = state.cc;
2810 PL_reg_re = state.re;
d3790889 2811 cache_re(PL_reg_re);
2c2d71f5
JH
2812
2813 /* XXXX This is too dramatic a measure... */
2814 PL_reg_maxiter = 0;
2815
8e514ae6 2816 logical = 0;
0f5d15d6
IZ
2817 sayNO;
2818 }
c277df42 2819 sw = SvTRUE(ret);
0f5d15d6 2820 logical = 0;
a0ed51b3
LW
2821 }
2822 else
3280af22 2823 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2824 break;
2825 }
a0d0e21e 2826 case OPEN:
c277df42 2827 n = ARG(scan); /* which paren pair */
3280af22
NIS
2828 PL_reg_start_tmp[n] = locinput;
2829 if (n > PL_regsize)
2830 PL_regsize = n;
a0d0e21e
LW
2831 break;
2832 case CLOSE:
c277df42 2833 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2834 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2835 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2836 if (n > *PL_reglastparen)
2837 *PL_reglastparen = n;
a01268b5 2838 *PL_reglastcloseparen = n;
a0d0e21e 2839 break;
c277df42
IZ
2840 case GROUPP:
2841 n = ARG(scan); /* which paren pair */
cf93c79d 2842 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2843 break;
2844 case IFTHEN:
2c2d71f5 2845 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2846 if (sw)
2847 next = NEXTOPER(NEXTOPER(scan));
2848 else {
2849 next = scan + ARG(scan);
2850 if (OP(next) == IFTHEN) /* Fake one. */
2851 next = NEXTOPER(NEXTOPER(next));
2852 }
2853 break;
2854 case LOGICAL:
0f5d15d6 2855 logical = scan->flags;
c277df42 2856 break;
2ab05381
IZ
2857/*******************************************************************
2858 PL_regcc contains infoblock about the innermost (...)* loop, and
2859 a pointer to the next outer infoblock.
2860
2861 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2862
2863 1) After matching X, regnode for CURLYX is processed;
2864
9041c2e3 2865 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2866 regmatch() recursively with the starting point at WHILEM node;
2867
2868 3) Each hit of WHILEM node tries to match A and Z (in the order
2869 depending on the current iteration, min/max of {min,max} and
2870 greediness). The information about where are nodes for "A"
2871 and "Z" is read from the infoblock, as is info on how many times "A"
2872 was already matched, and greediness.
2873
2874 4) After A matches, the same WHILEM node is hit again.
2875
2876 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2877 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2878 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2879 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2880 of the external loop.
2881
2882 Currently present infoblocks form a tree with a stem formed by PL_curcc
2883 and whatever it mentions via ->next, and additional attached trees
2884 corresponding to temporarily unset infoblocks as in "5" above.
2885
9041c2e3 2886 In the following picture infoblocks for outer loop of
2ab05381
IZ
2887 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2888 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2889 infoblocks are drawn below the "reset" infoblock.
2890
2891 In fact in the picture below we do not show failed matches for Z and T
2892 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2893 more obvious *why* one needs to *temporary* unset infoblocks.]
2894
2895 Matched REx position InfoBlocks Comment
2896 (Y(A)*?Z)*?T x
2897 Y(A)*?Z)*?T x <- O
2898 Y (A)*?Z)*?T x <- O
2899 Y A)*?Z)*?T x <- O <- I
2900 YA )*?Z)*?T x <- O <- I
2901 YA A)*?Z)*?T x <- O <- I
2902 YAA )*?Z)*?T x <- O <- I
2903 YAA Z)*?T x <- O # Temporary unset I
2904 I
2905
2906 YAAZ Y(A)*?Z)*?T x <- O
2907 I
2908
2909 YAAZY (A)*?Z)*?T x <- O
2910 I
2911
2912 YAAZY A)*?Z)*?T x <- O <- I
2913 I
2914
2915 YAAZYA )*?Z)*?T x <- O <- I
2916 I
2917
2918 YAAZYA Z)*?T x <- O # Temporary unset I
2919 I,I
2920
2921 YAAZYAZ )*?T x <- O
2922 I,I
2923
2924 YAAZYAZ T x # Temporary unset O
2925 O
2926 I,I
2927
2928 YAAZYAZT x
2929 O
2930 I,I
2931 *******************************************************************/
a0d0e21e
LW
2932 case CURLYX: {
2933 CURCUR cc;
3280af22 2934 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2935 /* No need to save/restore up to this paren */
2936 I32 parenfloor = scan->flags;
c277df42
IZ
2937
2938 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2939 next += ARG(next);
3280af22
NIS
2940 cc.oldcc = PL_regcc;
2941 PL_regcc = &cc;
cb434fcc
IZ
2942 /* XXXX Probably it is better to teach regpush to support
2943 parenfloor > PL_regsize... */
2944 if (parenfloor > *PL_reglastparen)
2945 parenfloor = *PL_reglastparen; /* Pessimization... */
2946 cc.parenfloor = parenfloor;
a0d0e21e
LW
2947 cc.cur = -1;
2948 cc.min = ARG1(scan);
2949 cc.max = ARG2(scan);
c277df42 2950 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2951 cc.next = next;
2952 cc.minmod = minmod;
2953 cc.lastloc = 0;
3280af22 2954 PL_reginput = locinput;
a0d0e21e
LW
2955 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2956 regcpblow(cp);
3280af22 2957 PL_regcc = cc.oldcc;
4633a7c4 2958 saySAME(n);
a0d0e21e
LW
2959 }
2960 /* NOT REACHED */
2961 case WHILEM: {
2962 /*
2963 * This is really hard to understand, because after we match
2964 * what we're trying to match, we must make sure the rest of
2c2d71f5 2965 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2966 * to go back UP the parse tree by recursing ever deeper. And
2967 * if it fails, we have to reset our parent's current state
2968 * that we can try again after backing off.
2969 */
2970
c277df42 2971 CHECKPOINT cp, lastcp;
3280af22 2972 CURCUR* cc = PL_regcc;
c277df42
IZ
2973 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2974
4633a7c4 2975 n = cc->cur + 1; /* how many we know we matched */
3280af22 2976 PL_reginput = locinput;
a0d0e21e 2977
c277df42 2978 DEBUG_r(
9041c2e3
NIS
2979 PerlIO_printf(Perl_debug_log,
2980 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2981 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2982 (long)n, (long)cc->min,
c277df42
IZ
2983 (long)cc->max, (long)cc)
2984 );
4633a7c4 2985
a0d0e21e
LW
2986 /* If degenerate scan matches "", assume scan done. */
2987
579cf2c3 2988 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2989 PL_regcc = cc->oldcc;
2ab05381
IZ
2990 if (PL_regcc)
2991 ln = PL_regcc->cur;
c277df42 2992 DEBUG_r(
c3464db5
DD
2993 PerlIO_printf(Perl_debug_log,
2994 "%*s empty match detected, try continuation...\n",
3280af22 2995 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2996 );
a0d0e21e 2997 if (regmatch(cc->next))
4633a7c4 2998 sayYES;
2ab05381
IZ
2999 if (PL_regcc)
3000 PL_regcc->cur = ln;
3280af22 3001 PL_regcc = cc;
4633a7c4 3002 sayNO;
a0d0e21e
LW
3003 }
3004
3005 /* First just match a string of min scans. */
3006
3007 if (n < cc->min) {
3008 cc->cur = n;
3009 cc->lastloc = locinput;
4633a7c4
LW
3010 if (regmatch(cc->scan))
3011 sayYES;
3012 cc->cur = n - 1;
c277df42 3013 cc->lastloc = lastloc;
4633a7c4 3014 sayNO;
a0d0e21e
LW
3015 }
3016
2c2d71f5
JH
3017 if (scan->flags) {
3018 /* Check whether we already were at this position.
3019 Postpone detection until we know the match is not
3020 *that* much linear. */
3021 if (!PL_reg_maxiter) {
3022 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
3023 PL_reg_leftiter = PL_reg_maxiter;
3024 }
3025 if (PL_reg_leftiter-- == 0) {
3026 I32 size = (PL_reg_maxiter + 7)/8;
3027 if (PL_reg_poscache) {
3028 if (PL_reg_poscache_size < size) {
3029 Renew(PL_reg_poscache, size, char);
3030 PL_reg_poscache_size = size;
3031 }
3032 Zero(PL_reg_poscache, size, char);
3033 }
3034 else {
3035 PL_reg_poscache_size = size;
3036 Newz(29, PL_reg_poscache, size, char);
3037 }
3038 DEBUG_r(
3039 PerlIO_printf(Perl_debug_log,
3040 "%sDetected a super-linear match, switching on caching%s...\n",
3041 PL_colors[4], PL_colors[5])
3042 );
3043 }
3044 if (PL_reg_leftiter < 0) {
3045 I32 o = locinput - PL_bostr, b;
3046
3047 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
3048 b = o % 8;
3049 o /= 8;
3050 if (PL_reg_poscache[o] & (1<<b)) {
3051 DEBUG_r(
3052 PerlIO_printf(Perl_debug_log,
3053 "%*s already tried at this position...\n",
3054 REPORT_CODE_OFF+PL_regindent*2, "")
3055 );
7821416a 3056 sayNO_SILENT;
2c2d71f5
JH
3057 }
3058 PL_reg_poscache[o] |= (1<<b);
3059 }
3060 }
3061
a0d0e21e
LW
3062 /* Prefer next over scan for minimal matching. */
3063
3064 if (cc->minmod) {
3280af22 3065 PL_regcc = cc->oldcc;
2ab05381
IZ
3066 if (PL_regcc)
3067 ln = PL_regcc->cur;
5f05dabc 3068 cp = regcppush(cc->parenfloor);
02db2b7b 3069 REGCP_SET(lastcp);
5f05dabc 3070 if (regmatch(cc->next)) {
c277df42 3071 regcpblow(cp);
4633a7c4 3072 sayYES; /* All done. */
5f05dabc 3073 }
02db2b7b 3074 REGCP_UNWIND(lastcp);
5f05dabc 3075 regcppop();
2ab05381
IZ
3076 if (PL_regcc)
3077 PL_regcc->cur = ln;
3280af22 3078 PL_regcc = cc;
a0d0e21e 3079
c277df42 3080 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 3081 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
3082 && !(PL_reg_flags & RF_warned)) {
3083 PL_reg_flags |= RF_warned;
e476b1b5 3084 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
3085 "Complex regular subexpression recursion",
3086 REG_INFTY - 1);
c277df42 3087 }
4633a7c4 3088 sayNO;
c277df42 3089 }
a687059c 3090
c277df42 3091 DEBUG_r(
c3464db5
DD
3092 PerlIO_printf(Perl_debug_log,
3093 "%*s trying longer...\n",
3280af22 3094 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 3095 );
a0d0e21e 3096 /* Try scanning more and see if it helps. */
3280af22 3097 PL_reginput = locinput;
a0d0e21e
LW
3098 cc->cur = n;
3099 cc->lastloc = locinput;
5f05dabc 3100 cp = regcppush(cc->parenfloor);
02db2b7b 3101 REGCP_SET(lastcp);
5f05dabc 3102 if (regmatch(cc->scan)) {
c277df42 3103 regcpblow(cp);
4633a7c4 3104 sayYES;
5f05dabc 3105 }
02db2b7b 3106 REGCP_UNWIND(lastcp);
5f05dabc 3107 regcppop();
4633a7c4 3108 cc->cur = n - 1;
c277df42 3109 cc->lastloc = lastloc;
4633a7c4 3110 sayNO;
a0d0e21e
LW
3111 }
3112
3113 /* Prefer scan over next for maximal matching. */
3114
3115 if (n < cc->max) { /* More greed allowed? */
5f05dabc 3116 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
3117 cc->cur = n;
3118 cc->lastloc = locinput;
02db2b7b 3119 REGCP_SET(lastcp);
5f05dabc 3120 if (regmatch(cc->scan)) {
c277df42 3121 regcpblow(cp);
4633a7c4 3122 sayYES;
5f05dabc 3123 }
02db2b7b 3124 REGCP_UNWIND(lastcp);
a0d0e21e 3125 regcppop(); /* Restore some previous $<digit>s? */
3280af22 3126 PL_reginput = locinput;
c277df42 3127 DEBUG_r(
c3464db5
DD
3128 PerlIO_printf(Perl_debug_log,
3129 "%*s failed, try continuation...\n",
3280af22 3130 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
3131 );
3132 }
9041c2e3 3133 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 3134 && !(PL_reg_flags & RF_warned)) {
3280af22 3135 PL_reg_flags |= RF_warned;
e476b1b5 3136 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
3137 "Complex regular subexpression recursion",
3138 REG_INFTY - 1);
a0d0e21e
LW
3139 }
3140
3141 /* Failed deeper matches of scan, so see if this one works. */
3280af22 3142 PL_regcc = cc->oldcc;
2ab05381
IZ
3143 if (PL_regcc)
3144 ln = PL_regcc->cur;
a0d0e21e 3145 if (regmatch(cc->next))
4633a7c4 3146 sayYES;
2ab05381
IZ
3147 if (PL_regcc)
3148 PL_regcc->cur = ln;
3280af22 3149 PL_regcc = cc;
4633a7c4 3150 cc->cur = n - 1;
c277df42 3151 cc->lastloc = lastloc;
4633a7c4 3152 sayNO;
a0d0e21e
LW
3153 }
3154 /* NOT REACHED */
9041c2e3 3155 case BRANCHJ:
c277df42
IZ
3156 next = scan + ARG(scan);
3157 if (next == scan)
3158 next = NULL;
3159 inner = NEXTOPER(NEXTOPER(scan));
3160 goto do_branch;
9041c2e3 3161 case BRANCH:
c277df42
IZ
3162 inner = NEXTOPER(scan);
3163 do_branch:
3164 {
c277df42
IZ
3165 c1 = OP(scan);
3166 if (OP(next) != c1) /* No choice. */
3167 next = inner; /* Avoid recursion. */
a0d0e21e 3168 else {
02db2b7b
IZ
3169 I32 lastparen = *PL_reglastparen;
3170 I32 unwind1;
3171 re_unwind_branch_t *uw;
3172
3173 /* Put unwinding data on stack */
3174 unwind1 = SSNEWt(1,re_unwind_branch_t);
3175 uw = SSPTRt(unwind1,re_unwind_branch_t);
3176 uw->prev = unwind;
3177 unwind = unwind1;
3178 uw->type = ((c1 == BRANCH)
3179 ? RE_UNWIND_BRANCH
3180 : RE_UNWIND_BRANCHJ);
3181 uw->lastparen = lastparen;
3182 uw->next = next;
3183 uw->locinput = locinput;
3184 uw->nextchr = nextchr;
3185#ifdef DEBUGGING
3186 uw->regindent = ++PL_regindent;
3187#endif
c277df42 3188
02db2b7b
IZ
3189 REGCP_SET(uw->lastcp);
3190
3191 /* Now go into the first branch */
3192 next = inner;
a687059c 3193 }
a0d0e21e
LW
3194 }
3195 break;
3196 case MINMOD:
3197 minmod = 1;
3198 break;
c277df42
IZ
3199 case CURLYM:
3200 {
00db4c45 3201 I32 l = 0;
c277df42 3202 CHECKPOINT lastcp;
9041c2e3 3203
c277df42
IZ
3204 /* We suppose that the next guy does not need
3205 backtracking: in particular, it is of constant length,
3206 and has no parenths to influence future backrefs. */
3207 ln = ARG1(scan); /* min to match */
3208 n = ARG2(scan); /* max to match */
c277df42
IZ
3209 paren = scan->flags;
3210 if (paren) {
3280af22
NIS
3211 if (paren > PL_regsize)
3212 PL_regsize = paren;
3213 if (paren > *PL_reglastparen)
3214 *PL_reglastparen = paren;
c277df42 3215 }
dc45a647 3216 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3217 if (paren)
3218 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3219 PL_reginput = locinput;
c277df42
IZ
3220 if (minmod) {
3221 minmod = 0;
3222 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3223 sayNO;
f31a99c8
HS
3224 /* if we matched something zero-length we don't need to
3225 backtrack - capturing parens are already defined, so
3226 the caveat in the maximal case doesn't apply
3227
3228 XXXX if ln == 0, we can redo this check first time
3229 through the following loop
3230 */
3231 if (ln && l == 0)
3232 n = ln; /* don't backtrack */
3280af22 3233 locinput = PL_reginput;
cca55fe3 3234 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3235 regnode *text_node = next;
3236
cca55fe3 3237 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3238
cca55fe3 3239 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3240 else {
cca55fe3
JP
3241 if (PL_regkind[(U8)OP(text_node)] == REF) {
3242 I32 n, ln;
3243 n = ARG(text_node); /* which paren pair */
3244 ln = PL_regstartp[n];
3245 /* assume yes if we haven't seen CLOSEn */
3246 if (
3247 *PL_reglastparen < n ||
3248 ln == -1 ||
3249 ln == PL_regendp[n]
3250 ) {
3251 c1 = c2 = -1000;
3252 goto assume_ok_MM;
3253 }
3254 c1 = *(PL_bostr + ln);
3255 }
3256 else { c1 = (U8)*STRING(text_node); }
af5decee 3257 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3258 c2 = PL_fold[c1];
af5decee 3259 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3260 c2 = PL_fold_locale[c1];
3261 else
3262 c2 = c1;
3263 }
a0ed51b3
LW
3264 }
3265 else
c277df42 3266 c1 = c2 = -1000;
cca55fe3 3267 assume_ok_MM:
02db2b7b 3268 REGCP_SET(lastcp);
5f4b28b2 3269 /* This may be improved if l == 0. */
c277df42
IZ
3270 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3271 /* If it could work, try it. */
3272 if (c1 == -1000 ||
3280af22
NIS
3273 UCHARAT(PL_reginput) == c1 ||
3274 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3275 {
3276 if (paren) {
f31a99c8 3277 if (ln) {
cf93c79d
IZ
3278 PL_regstartp[paren] =
3279 HOPc(PL_reginput, -l) - PL_bostr;
3280 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3281 }
3282 else
cf93c79d 3283 PL_regendp[paren] = -1;
c277df42
IZ
3284 }
3285 if (regmatch(next))
3286 sayYES;
02db2b7b 3287 REGCP_UNWIND(lastcp);
c277df42
IZ
3288 }
3289 /* Couldn't or didn't -- move forward. */
3280af22 3290 PL_reginput = locinput;
c277df42
IZ
3291 if (regrepeat_hard(scan, 1, &l)) {
3292 ln++;
3280af22 3293 locinput = PL_reginput;
c277df42
IZ
3294 }
3295 else
3296 sayNO;
3297 }
a0ed51b3
LW
3298 }
3299 else {
c277df42 3300 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3301 /* if we matched something zero-length we don't need to
3302 backtrack, unless the minimum count is zero and we
3303 are capturing the result - in that case the capture
3304 being defined or not may affect later execution
3305 */
3306 if (n != 0 && l == 0 && !(paren && ln == 0))
3307 ln = n; /* don't backtrack */
3280af22 3308 locinput = PL_reginput;
c277df42 3309 DEBUG_r(
5c0ca799 3310 PerlIO_printf(Perl_debug_log,
faccc32b 3311 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3312 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3313 (IV) n, (IV)l)
c277df42
IZ
3314 );
3315 if (n >= ln) {
cca55fe3 3316 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3317 regnode *text_node = next;
3318
cca55fe3 3319 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3320
cca55fe3 3321 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3322 else {
cca55fe3
JP
3323 if (PL_regkind[(U8)OP(text_node)] == REF) {
3324 I32 n, ln;
3325 n = ARG(text_node); /* which paren pair */
3326 ln = PL_regstartp[n];
3327 /* assume yes if we haven't seen CLOSEn */
3328 if (
3329 *PL_reglastparen < n ||
3330 ln == -1 ||
3331 ln == PL_regendp[n]
3332 ) {
3333 c1 = c2 = -1000;
3334 goto assume_ok_REG;
3335 }
3336 c1 = *(PL_bostr + ln);
3337 }
3338 else { c1 = (U8)*STRING(text_node); }
3339
af5decee 3340 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3341 c2 = PL_fold[c1];
af5decee 3342 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf
JP
3343 c2 = PL_fold_locale[c1];
3344 else
3345 c2 = c1;
3346 }
a0ed51b3
LW
3347 }
3348 else
c277df42
IZ
3349 c1 = c2 = -1000;
3350 }
cca55fe3 3351 assume_ok_REG:
02db2b7b 3352 REGCP_SET(lastcp);
c277df42
IZ
3353 while (n >= ln) {
3354 /* If it could work, try it. */
3355 if (c1 == -1000 ||
3280af22
NIS
3356 UCHARAT(PL_reginput) == c1 ||
3357 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3358 {
3359 DEBUG_r(
c3464db5 3360 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3361 "%*s trying tail with n=%"IVdf"...\n",
3362 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3363 );
3364 if (paren) {
3365 if (n) {
cf93c79d
IZ
3366 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3367 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3368 }
a0ed51b3 3369 else
cf93c79d 3370 PL_regendp[paren] = -1;
c277df42 3371 }
a0ed51b3
LW
3372 if (regmatch(next))
3373 sayYES;
02db2b7b 3374 REGCP_UNWIND(lastcp);
a0ed51b3 3375 }
c277df42
IZ
3376 /* Couldn't or didn't -- back up. */
3377 n--;
dfe13c55 3378 locinput = HOPc(locinput, -l);
3280af22 3379 PL_reginput = locinput;
c277df42
IZ
3380 }
3381 }
3382 sayNO;
3383 break;
3384 }
3385 case CURLYN:
3386 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3387 if (paren > PL_regsize)
3388 PL_regsize = paren;
3389 if (paren > *PL_reglastparen)
3390 *PL_reglastparen = paren;
c277df42
IZ
3391 ln = ARG1(scan); /* min to match */
3392 n = ARG2(scan); /* max to match */
dc45a647 3393 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3394 goto repeat;
a0d0e21e 3395 case CURLY:
c277df42 3396 paren = 0;
a0d0e21e
LW
3397 ln = ARG1(scan); /* min to match */
3398 n = ARG2(scan); /* max to match */
dc45a647 3399 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3400 goto repeat;
3401 case STAR:
3402 ln = 0;
c277df42 3403 n = REG_INFTY;
a0d0e21e 3404 scan = NEXTOPER(scan);
c277df42 3405 paren = 0;
a0d0e21e
LW
3406 goto repeat;
3407 case PLUS:
c277df42
IZ
3408 ln = 1;
3409 n = REG_INFTY;
3410 scan = NEXTOPER(scan);
3411 paren = 0;
3412 repeat:
a0d0e21e
LW
3413 /*
3414 * Lookahead to avoid useless match attempts
3415 * when we know what character comes next.
3416 */
5f80c4cf
JP
3417
3418 /*
3419 * Used to only do .*x and .*?x, but now it allows
3420 * for )'s, ('s and (?{ ... })'s to be in the way
3421 * of the quantifier and the EXACT-like node. -- japhy
3422 */
3423
cca55fe3 3424 if (HAS_TEXT(next) || JUMPABLE(next)) {
5f80c4cf
JP
3425 U8 *s;
3426 regnode *text_node = next;
3427
cca55fe3 3428 if (! HAS_TEXT(text_node)) FIND_NEXT_IMPT(text_node);
5f80c4cf 3429
cca55fe3 3430 if (! HAS_TEXT(text_node)) c1 = c2 = -1000;
5f80c4cf 3431 else {
cca55fe3
JP
3432 if (PL_regkind[(U8)OP(text_node)] == REF) {
3433 I32 n, ln;
3434 n = ARG(text_node); /* which paren pair */
3435 ln = PL_regstartp[n];
3436 /* assume yes if we haven't seen CLOSEn */
3437 if (
3438 *PL_reglastparen < n ||
3439 ln == -1 ||
3440 ln == PL_regendp[n]
3441 ) {
3442 c1 = c2 = -1000;
3443 goto assume_ok_easy;
3444 }
9246c65e 3445 s = (U8*)PL_bostr + ln;
cca55fe3
JP
3446 }
3447 else { s = (U8*)STRING(text_node); }
5f80c4cf
JP
3448
3449 if (!UTF) {
3450 c2 = c1 = *s;
f65d3ee7 3451 if (OP(text_node) == EXACTF || OP(text_node) == REFF)
5f80c4cf 3452 c2 = PL_fold[c1];
f65d3ee7 3453 else if (OP(text_node) == EXACTFL || OP(text_node) == REFFL)
5f80c4cf 3454 c2 = PL_fold_locale[c1];
1aa99e6b 3455 }
5f80c4cf 3456 else { /* UTF */
f65d3ee7 3457 if (OP(text_node) == EXACTF || OP(text_node) == REFF) {
a2a2844f 3458 STRLEN ulen1, ulen2;
e7ae6809
JH
3459 U8 tmpbuf1[UTF8_MAXLEN_UCLC+1];
3460 U8 tmpbuf2[UTF8_MAXLEN_UCLC+1];
a2a2844f
JH
3461
3462 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3463 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3464
3465 c1 = utf8_to_uvuni(tmpbuf1, 0);
3466 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3467 }
3468 else {
3469 c2 = c1 = utf8_to_uvchr(s, NULL);
3470 }
1aa99e6b
IH
3471 }
3472 }
bbce6d69 3473 }
a0d0e21e 3474 else
bbce6d69 3475 c1 = c2 = -1000;
cca55fe3 3476 assume_ok_easy:
3280af22 3477 PL_reginput = locinput;
a0d0e21e 3478 if (minmod) {
c277df42 3479 CHECKPOINT lastcp;
a0d0e21e
LW
3480 minmod = 0;
3481 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3482 sayNO;
a0ed51b3 3483 locinput = PL_reginput;
02db2b7b 3484 REGCP_SET(lastcp);
0fe9bf95 3485 if (c1 != -1000) {
1aa99e6b 3486 char *e; /* Should not check after this */
0fe9bf95
IZ
3487 char *old = locinput;
3488
1aa99e6b 3489 if (n == REG_INFTY) {
0fe9bf95 3490 e = PL_regeol - 1;
1aa99e6b
IH
3491 if (do_utf8)
3492 while (UTF8_IS_CONTINUATION(*(U8*)e))
3493 e--;
3494 }
3495 else if (do_utf8) {
3496 int m = n - ln;
3497 for (e = locinput;
3498 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3499 e += UTF8SKIP(e);
3500 }
3501 else {
3502 e = locinput + n - ln;
3503 if (e >= PL_regeol)
3504 e = PL_regeol - 1;
3505 }
0fe9bf95 3506 while (1) {
1aa99e6b 3507 int count;
0fe9bf95 3508 /* Find place 'next' could work */
1aa99e6b
IH
3509 if (!do_utf8) {
3510 if (c1 == c2) {
a8e8ab15
JH
3511 while (locinput <= e &&
3512 UCHARAT(locinput) != c1)
1aa99e6b
IH
3513 locinput++;
3514 } else {
9041c2e3 3515 while (locinput <= e
a8e8ab15
JH
3516 && UCHARAT(locinput) != c1
3517 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3518 locinput++;
3519 }
3520 count = locinput - old;
3521 }
3522 else {
3523 STRLEN len;
3524 if (c1 == c2) {
3525 for (count = 0;
3526 locinput <= e &&
9041c2e3 3527 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3528 count++)
3529 locinput += len;
3530
3531 } else {
3532 for (count = 0; locinput <= e; count++) {
9041c2e3 3533 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3534 if (c == c1 || c == c2)
3535 break;
9041c2e3 3536 locinput += len;
1aa99e6b
IH
3537 }
3538 }
0fe9bf95 3539 }
9041c2e3 3540 if (locinput > e)
0fe9bf95
IZ
3541 sayNO;
3542 /* PL_reginput == old now */
3543 if (locinput != old) {
3544 ln = 1; /* Did some */
1aa99e6b 3545 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3546 sayNO;
3547 }
3548 /* PL_reginput == locinput now */
29d1e993 3549 TRYPAREN(paren, ln, locinput);
0fe9bf95 3550 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3551 REGCP_UNWIND(lastcp);
0fe9bf95 3552 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3553 old = locinput;
3554 if (do_utf8)
3555 locinput += UTF8SKIP(locinput);
3556 else
3557 locinput++;
0fe9bf95
IZ
3558 }
3559 }
3560 else
c277df42 3561 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3562 UV c;
3563 if (c1 != -1000) {
3564 if (do_utf8)
9041c2e3 3565 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3566 else
9041c2e3 3567 c = UCHARAT(PL_reginput);
2390ecbc
PP
3568 /* If it could work, try it. */
3569 if (c == c1 || c == c2)
3570 {
3571 TRYPAREN(paren, n, PL_reginput);
3572 REGCP_UNWIND(lastcp);
3573 }
1aa99e6b 3574 }
a0d0e21e 3575 /* If it could work, try it. */
2390ecbc 3576 else if (c1 == -1000)
bbce6d69 3577 {
29d1e993 3578 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3579 REGCP_UNWIND(lastcp);
bbce6d69 3580 }
c277df42 3581 /* Couldn't or didn't -- move forward. */
a0ed51b3 3582 PL_reginput = locinput;
a0d0e21e
LW
3583 if (regrepeat(scan, 1)) {
3584 ln++;
a0ed51b3
LW
3585 locinput = PL_reginput;
3586 }
3587 else
4633a7c4 3588 sayNO;
a0d0e21e
LW
3589 }
3590 }
3591 else {
c277df42 3592 CHECKPOINT lastcp;