This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Win32 update for t/io/fs.t (may effect NetWare)
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
e49a9654
IH
147 if (paren_elems_to_push < 0)
148 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
149
a01268b5 150#define REGCP_OTHER_ELEMS 6
b1ce53c5 151 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 152 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 153/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
154 SSPUSHINT(PL_regendp[p]);
155 SSPUSHINT(PL_regstartp[p]);
3280af22 156 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
157 SSPUSHINT(p);
158 }
b1ce53c5 159/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
160 SSPUSHINT(PL_regsize);
161 SSPUSHINT(*PL_reglastparen);
a01268b5 162 SSPUSHINT(*PL_reglastcloseparen);
3280af22 163 SSPUSHPTR(PL_reginput);
41123dfd
JH
164#define REGCP_FRAME_ELEMS 2
165/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
166 * are needed for the regexp context stack bookkeeping. */
167 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 168 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 169
a0d0e21e
LW
170 return retval;
171}
172
c277df42 173/* These are needed since we do not localize EVAL nodes: */
02db2b7b 174# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 175 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 176 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 177
02db2b7b 178# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 179 PerlIO_printf(Perl_debug_log, \
faccc32b 180 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 181 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 182
76e3520e 183STATIC char *
cea2e8a9 184S_regcppop(pTHX)
a0d0e21e 185{
b1ce53c5 186 I32 i;
a0d0e21e
LW
187 U32 paren = 0;
188 char *input;
cf93c79d 189 I32 tmps;
b1ce53c5
JH
190
191 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 192 i = SSPOPINT;
b1ce53c5
JH
193 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
194 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 195 input = (char *) SSPOPPTR;
a01268b5 196 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
197 *PL_reglastparen = SSPOPINT;
198 PL_regsize = SSPOPINT;
b1ce53c5
JH
199
200 /* Now restore the parentheses context. */
41123dfd
JH
201 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
202 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 203 paren = (U32)SSPOPINT;
3280af22 204 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
205 PL_regstartp[paren] = SSPOPINT;
206 tmps = SSPOPINT;
3280af22
NIS
207 if (paren <= *PL_reglastparen)
208 PL_regendp[paren] = tmps;
c277df42 209 DEBUG_r(
c3464db5 210 PerlIO_printf(Perl_debug_log,
b900a521 211 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 212 (UV)paren, (IV)PL_regstartp[paren],
b900a521 213 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 214 (IV)PL_regendp[paren],
3280af22 215 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 216 );
a0d0e21e 217 }
c277df42 218 DEBUG_r(
3280af22 219 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 220 PerlIO_printf(Perl_debug_log,
faccc32b
JH
221 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
222 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
223 }
224 );
daf18116 225#if 1
dafc8851
JH
226 /* It would seem that the similar code in regtry()
227 * already takes care of this, and in fact it is in
228 * a better location to since this code can #if 0-ed out
229 * but the code in regtry() is needed or otherwise tests
230 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
231 * (as of patchlevel 7877) will fail. Then again,
232 * this code seems to be necessary or otherwise
233 * building DynaLoader will fail:
234 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
235 * --jhi */
3280af22
NIS
236 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
237 if (paren > PL_regsize)
cf93c79d
IZ
238 PL_regstartp[paren] = -1;
239 PL_regendp[paren] = -1;
a0d0e21e 240 }
dafc8851 241#endif
a0d0e21e
LW
242 return input;
243}
244
0f5d15d6 245STATIC char *
cea2e8a9 246S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
247{
248 I32 tmp = PL_savestack_ix;
249
250 PL_savestack_ix = ss;
251 regcppop();
252 PL_savestack_ix = tmp;
942e002e 253 return Nullch;
0f5d15d6
IZ
254}
255
256typedef struct re_cc_state
257{
258 I32 ss;
259 regnode *node;
260 struct re_cc_state *prev;
261 CURCUR *cc;
262 regexp *re;
263} re_cc_state;
264
02db2b7b 265#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 266
29d1e993
HS
267#define TRYPAREN(paren, n, input) { \
268 if (paren) { \
269 if (n) { \
270 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
271 PL_regendp[paren] = input - PL_bostr; \
272 } \
273 else \
274 PL_regendp[paren] = -1; \
275 } \
276 if (regmatch(next)) \
277 sayYES; \
278 if (paren && n) \
279 PL_regendp[paren] = -1; \
280}
281
282
a687059c 283/*
e50aee73 284 * pregexec and friends
a687059c
LW
285 */
286
287/*
c277df42 288 - pregexec - match a regexp against a string
a687059c 289 */
c277df42 290I32
864dbfa3 291Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 292 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
293/* strend: pointer to null at end of string */
294/* strbeg: real beginning of string */
295/* minend: end of match must be >=minend after stringarg. */
296/* nosave: For optimizations. */
297{
298 return
9041c2e3 299 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
300 nosave ? 0 : REXEC_COPY_STR);
301}
0f5d15d6
IZ
302
303STATIC void
cea2e8a9 304S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
305{
306 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
307#ifdef DEBUGGING
308 PL_regprogram = prog->program;
309#endif
310 PL_regnpar = prog->nparens;
9041c2e3
NIS
311 PL_regdata = prog->data;
312 PL_reg_re = prog;
0f5d15d6 313}
22e551b9 314
9041c2e3 315/*
cad2e5aa
JH
316 * Need to implement the following flags for reg_anch:
317 *
318 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
319 * USE_INTUIT_ML
320 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
321 * INTUIT_AUTORITATIVE_ML
322 * INTUIT_ONCE_NOML - Intuit can match in one location only.
323 * INTUIT_ONCE_ML
324 *
325 * Another flag for this function: SECOND_TIME (so that float substrs
326 * with giant delta may be not rechecked).
327 */
328
329/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
330
2c2d71f5 331/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
332 Otherwise, only SvCUR(sv) is used to get strbeg. */
333
334/* XXXX We assume that strpos is strbeg unless sv. */
335
6eb5f6b9
JH
336/* XXXX Some places assume that there is a fixed substring.
337 An update may be needed if optimizer marks as "INTUITable"
338 RExen without fixed substrings. Similarly, it is assumed that
339 lengths of all the strings are no more than minlen, thus they
340 cannot come from lookahead.
341 (Or minlen should take into account lookahead.) */
342
2c2d71f5
JH
343/* A failure to find a constant substring means that there is no need to make
344 an expensive call to REx engine, thus we celebrate a failure. Similarly,
345 finding a substring too deep into the string means that less calls to
30944b6d
IZ
346 regtry() should be needed.
347
348 REx compiler's optimizer found 4 possible hints:
349 a) Anchored substring;
350 b) Fixed substring;
351 c) Whether we are anchored (beginning-of-line or \G);
352 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 353 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
354 string which does not contradict any of them.
355 */
2c2d71f5 356
6eb5f6b9
JH
357/* Most of decisions we do here should have been done at compile time.
358 The nodes of the REx which we used for the search should have been
359 deleted from the finite automaton. */
360
cad2e5aa
JH
361char *
362Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
363 char *strend, U32 flags, re_scream_pos_data *data)
364{
b7953727 365 register I32 start_shift = 0;
cad2e5aa 366 /* Should be nonnegative! */
b7953727 367 register I32 end_shift = 0;
2c2d71f5
JH
368 register char *s;
369 register SV *check;
a1933d95 370 char *strbeg;
cad2e5aa
JH
371 char *t;
372 I32 ml_anch;
6eb5f6b9 373 register char *other_last = Nullch; /* other substr checked before this */
b7953727 374 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
375#ifdef DEBUGGING
376 char *i_strpos = strpos;
377#endif
cad2e5aa
JH
378
379 DEBUG_r( if (!PL_colorset) reginitcolors() );
380 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 381 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
382 PL_colors[4],PL_colors[5],PL_colors[0],
383 prog->precomp,
384 PL_colors[1],
385 (strlen(prog->precomp) > 60 ? "..." : ""),
386 PL_colors[0],
b900a521 387 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
388 strpos, PL_colors[1],
389 (strend - strpos > 60 ? "..." : ""))
390 );
391
1aa99e6b
IH
392 if (prog->reganch & ROPT_UTF8)
393 PL_reg_flags |= RF_utf8;
394
395 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 396 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 397 goto fail;
2c2d71f5 398 }
a1933d95 399 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 400 PL_regeol = strend;
653099ff 401 check = prog->check_substr;
2c2d71f5 402 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
403 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
404 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 405 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 406
7e25d62c
JH
407 if (!ml_anch) {
408 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
409 | ROPT_IMPLICIT)) /* not a real BOL */
410 /* SvCUR is not set on references: SvRV and SvPVX overlap */
411 && sv && !SvROK(sv)
412 && (strpos != strbeg)) {
413 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
414 goto fail;
415 }
416 if (prog->check_offset_min == prog->check_offset_max &&
417 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 418 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
419 I32 slen;
420
1aa99e6b 421 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
422 if (SvTAIL(check)) {
423 slen = SvCUR(check); /* >= 1 */
cad2e5aa 424
9041c2e3 425 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
426 || (strend - s == slen && strend[-1] != '\n')) {
427 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
428 goto fail_finish;
cad2e5aa
JH
429 }
430 /* Now should match s[0..slen-2] */
431 slen--;
653099ff 432 if (slen && (*SvPVX(check) != *s
cad2e5aa 433 || (slen > 1
653099ff 434 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
435 report_neq:
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
437 goto fail_finish;
438 }
cad2e5aa 439 }
653099ff
GS
440 else if (*SvPVX(check) != *s
441 || ((slen = SvCUR(check)) > 1
442 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
443 goto report_neq;
444 goto success_at_start;
7e25d62c 445 }
cad2e5aa 446 }
2c2d71f5 447 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 448 s = strpos;
2c2d71f5 449 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 450 end_shift = prog->minlen - start_shift -
653099ff 451 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 452 if (!ml_anch) {
653099ff
GS
453 I32 end = prog->check_offset_max + CHR_SVLEN(check)
454 - (SvTAIL(check) != 0);
1aa99e6b 455 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
456
457 if (end_shift < eshift)
458 end_shift = eshift;
459 }
cad2e5aa 460 }
2c2d71f5 461 else { /* Can match at random position */
cad2e5aa
JH
462 ml_anch = 0;
463 s = strpos;
2c2d71f5
JH
464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
465 /* Should be nonnegative! */
466 end_shift = prog->minlen - start_shift -
653099ff 467 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
468 }
469
2c2d71f5 470#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 471 if (end_shift < 0)
6bbae5e6 472 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
473#endif
474
2c2d71f5
JH
475 restart:
476 /* Find a possible match in the region s..strend by looking for
477 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 478 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
479 I32 p = -1; /* Internal iterator of scream. */
480 I32 *pp = data ? data->scream_pos : &p;
481
2c2d71f5
JH
482 if (PL_screamfirst[BmRARE(check)] >= 0
483 || ( BmRARE(check) == '\n'
484 && (BmPREVIOUS(check) == SvCUR(check) - 1)
485 && SvTAIL(check) ))
9041c2e3 486 s = screaminstr(sv, check,
2c2d71f5 487 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 488 else
2c2d71f5 489 goto fail_finish;
cad2e5aa
JH
490 if (data)
491 *data->scream_olds = s;
492 }
3baa4c62
JH
493 else if (prog->reganch & ROPT_SANY_SEEN)
494 s = fbm_instr((U8*)(s + start_shift),
495 (U8*)(strend - end_shift),
496 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 497 else
1aa99e6b
IH
498 s = fbm_instr(HOP3(s, start_shift, strend),
499 HOP3(strend, -end_shift, strbeg),
2c2d71f5 500 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
501
502 /* Update the count-of-usability, remove useless subpatterns,
503 unshift s. */
2c2d71f5
JH
504
505 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
506 (s ? "Found" : "Did not find"),
507 ((check == prog->anchored_substr) ? "anchored" : "floating"),
508 PL_colors[0],
7b0972df
JH
509 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
510 SvPVX(check),
2c2d71f5
JH
511 PL_colors[1], (SvTAIL(check) ? "$" : ""),
512 (s ? " at offset " : "...\n") ) );
513
514 if (!s)
515 goto fail_finish;
516
6eb5f6b9
JH
517 check_at = s;
518
2c2d71f5 519 /* Finish the diagnostic message */
30944b6d 520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
521
522 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
523 Start with the other substr.
524 XXXX no SCREAM optimization yet - and a very coarse implementation
525 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
526 *always* match. Probably should be marked during compile...
527 Probably it is right to do no SCREAM here...
528 */
529
530 if (prog->float_substr && prog->anchored_substr) {
30944b6d 531 /* Take into account the "other" substring. */
2c2d71f5
JH
532 /* XXXX May be hopelessly wrong for UTF... */
533 if (!other_last)
6eb5f6b9 534 other_last = strpos;
2c2d71f5 535 if (check == prog->float_substr) {
30944b6d
IZ
536 do_other_anchored:
537 {
1aa99e6b 538 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
539 char *s1 = s;
540
2c2d71f5
JH
541 t = s - prog->check_offset_max;
542 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
543 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 544 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 545 && t > strpos)))
30944b6d 546 /* EMPTY */;
2c2d71f5
JH
547 else
548 t = strpos;
1aa99e6b 549 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
550 if (t < other_last) /* These positions already checked */
551 t = other_last;
1aa99e6b 552 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
553 if (last < last1)
554 last1 = last;
555 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
556 /* On end-of-str: see comment below. */
557 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
558 HOP3(HOP3(last1, prog->anchored_offset, strend)
559 + SvCUR(prog->anchored_substr),
560 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
561 prog->anchored_substr,
562 PL_multiline ? FBMrf_MULTILINE : 0);
563 DEBUG_r(PerlIO_printf(Perl_debug_log,
564 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
565 (s ? "Found" : "Contradicts"),
566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(prog->anchored_substr)
568 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
569 SvPVX(prog->anchored_substr),
570 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
571 if (!s) {
572 if (last1 >= last2) {
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", giving up...\n"));
575 goto fail_finish;
576 }
577 DEBUG_r(PerlIO_printf(Perl_debug_log,
578 ", trying floating at offset %ld...\n",
1aa99e6b
IH
579 (long)(HOP3c(s1, 1, strend) - i_strpos)));
580 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
581 s = HOP3c(last, 1, strend);
2c2d71f5
JH
582 goto restart;
583 }
584 else {
585 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 586 (long)(s - i_strpos)));
1aa99e6b
IH
587 t = HOP3c(s, -prog->anchored_offset, strbeg);
588 other_last = HOP3c(s, 1, strend);
30944b6d 589 s = s1;
2c2d71f5
JH
590 if (t == strpos)
591 goto try_at_start;
2c2d71f5
JH
592 goto try_at_offset;
593 }
30944b6d 594 }
2c2d71f5
JH
595 }
596 else { /* Take into account the floating substring. */
597 char *last, *last1;
598 char *s1 = s;
599
1aa99e6b
IH
600 t = HOP3c(s, -start_shift, strbeg);
601 last1 = last =
602 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
603 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
604 last = HOP3c(t, prog->float_max_offset, strend);
605 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
606 if (s < other_last)
607 s = other_last;
2c2d71f5
JH
608 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
609 /* fbm_instr() takes into account exact value of end-of-str
610 if the check is SvTAIL(ed). Since false positives are OK,
611 and end-of-str is not later than strend we are OK. */
612 s = fbm_instr((unsigned char*)s,
613 (unsigned char*)last + SvCUR(prog->float_substr)
614 - (SvTAIL(prog->float_substr)!=0),
615 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
616 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
617 (s ? "Found" : "Contradicts"),
618 PL_colors[0],
7b0972df
JH
619 (int)(SvCUR(prog->float_substr)
620 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
621 SvPVX(prog->float_substr),
622 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
623 if (!s) {
624 if (last1 == last) {
625 DEBUG_r(PerlIO_printf(Perl_debug_log,
626 ", giving up...\n"));
627 goto fail_finish;
628 }
629 DEBUG_r(PerlIO_printf(Perl_debug_log,
630 ", trying anchored starting at offset %ld...\n",
30944b6d 631 (long)(s1 + 1 - i_strpos)));
803ff556 632 other_last = last;
1aa99e6b 633 s = HOP3c(t, 1, strend);
2c2d71f5
JH
634 goto restart;
635 }
636 else {
637 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 638 (long)(s - i_strpos)));
803ff556 639 other_last = s; /* Fix this later. --Hugo */
30944b6d 640 s = s1;
2c2d71f5
JH
641 if (t == strpos)
642 goto try_at_start;
2c2d71f5
JH
643 goto try_at_offset;
644 }
645 }
cad2e5aa 646 }
2c2d71f5
JH
647
648 t = s - prog->check_offset_max;
2c2d71f5
JH
649 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
650 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
651 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
652 && t > strpos))) {
2c2d71f5
JH
653 /* Fixed substring is found far enough so that the match
654 cannot start at strpos. */
655 try_at_offset:
cad2e5aa 656 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
657 /* Eventually fbm_*() should handle this, but often
658 anchored_offset is not 0, so this check will not be wasted. */
659 /* XXXX In the code below we prefer to look for "^" even in
660 presence of anchored substrings. And we search even
661 beyond the found float position. These pessimizations
662 are historical artefacts only. */
663 find_anchor:
2c2d71f5 664 while (t < strend - prog->minlen) {
cad2e5aa 665 if (*t == '\n') {
4ee3650e 666 if (t < check_at - prog->check_offset_min) {
30944b6d 667 if (prog->anchored_substr) {
4ee3650e
GS
668 /* Since we moved from the found position,
669 we definitely contradict the found anchored
30944b6d
IZ
670 substr. Due to the above check we do not
671 contradict "check" substr.
672 Thus we can arrive here only if check substr
673 is float. Redo checking for "other"=="fixed".
674 */
9041c2e3 675 strpos = t + 1;
30944b6d
IZ
676 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
677 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
678 goto do_other_anchored;
679 }
4ee3650e
GS
680 /* We don't contradict the found floating substring. */
681 /* XXXX Why not check for STCLASS? */
cad2e5aa 682 s = t + 1;
2c2d71f5 683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 684 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
685 goto set_useful;
686 }
4ee3650e
GS
687 /* Position contradicts check-string */
688 /* XXXX probably better to look for check-string
689 than for "\n", so one should lower the limit for t? */
690 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 691 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 692 other_last = strpos = s = t + 1;
cad2e5aa
JH
693 goto restart;
694 }
695 t++;
696 }
2c2d71f5
JH
697 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
698 PL_colors[0],PL_colors[1]));
699 goto fail_finish;
cad2e5aa 700 }
f5952150
GS
701 else {
702 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
703 PL_colors[0],PL_colors[1]));
704 }
cad2e5aa
JH
705 s = t;
706 set_useful:
2c2d71f5 707 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
708 }
709 else {
f5952150 710 /* The found string does not prohibit matching at strpos,
2c2d71f5 711 - no optimization of calling REx engine can be performed,
f5952150
GS
712 unless it was an MBOL and we are not after MBOL,
713 or a future STCLASS check will fail this. */
2c2d71f5
JH
714 try_at_start:
715 /* Even in this situation we may use MBOL flag if strpos is offset
716 wrt the start of the string. */
05b4157f 717 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 718 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
719 /* May be due to an implicit anchor of m{.*foo} */
720 && !(prog->reganch & ROPT_IMPLICIT))
721 {
cad2e5aa
JH
722 t = strpos;
723 goto find_anchor;
724 }
30944b6d 725 DEBUG_r( if (ml_anch)
f5952150
GS
726 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
727 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 728 );
2c2d71f5 729 success_at_start:
30944b6d 730 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 731 && prog->check_substr /* Could be deleted already */
cad2e5aa 732 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
733 && prog->check_substr == prog->float_substr)
734 {
cad2e5aa 735 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 736 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
737 SvREFCNT_dec(prog->check_substr);
738 prog->check_substr = Nullsv; /* disable */
739 prog->float_substr = Nullsv; /* clear */
5e39e1e5 740 check = Nullsv; /* abort */
cad2e5aa 741 s = strpos;
3cf5c195
IZ
742 /* XXXX This is a remnant of the old implementation. It
743 looks wasteful, since now INTUIT can use many
6eb5f6b9 744 other heuristics. */
cad2e5aa
JH
745 prog->reganch &= ~RE_USE_INTUIT;
746 }
747 else
748 s = strpos;
749 }
750
6eb5f6b9
JH
751 /* Last resort... */
752 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
753 if (prog->regstclass) {
754 /* minlen == 0 is possible if regstclass is \b or \B,
755 and the fixed substr is ''$.
756 Since minlen is already taken into account, s+1 is before strend;
757 accidentally, minlen >= 1 guaranties no false positives at s + 1
758 even for \b or \B. But (minlen? 1 : 0) below assumes that
759 regstclass does not come from lookahead... */
760 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
761 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 762 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 763 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 764 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 765 : 1);
6eb5f6b9 766 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
767 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
768 : (prog->float_substr
769 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
770 cl_l, strend)
771 : strend);
a1933d95 772 char *startpos = strbeg;
6eb5f6b9
JH
773
774 t = s;
76384e4a 775 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 776 PL_regdata = prog->data;
76384e4a
GS
777 PL_bostr = startpos;
778 }
6eb5f6b9
JH
779 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
780 if (!s) {
781#ifdef DEBUGGING
b7953727 782 char *what = 0;
6eb5f6b9
JH
783#endif
784 if (endpos == strend) {
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Could not match STCLASS...\n") );
787 goto fail;
788 }
66e933ab
GS
789 DEBUG_r( PerlIO_printf(Perl_debug_log,
790 "This position contradicts STCLASS...\n") );
653099ff
GS
791 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
792 goto fail;
6eb5f6b9
JH
793 /* Contradict one of substrings */
794 if (prog->anchored_substr) {
6eb5f6b9
JH
795 if (prog->anchored_substr == check) {
796 DEBUG_r( what = "anchored" );
797 hop_and_restart:
1aa99e6b 798 s = HOP3c(t, 1, strend);
66e933ab
GS
799 if (s + start_shift + end_shift > strend) {
800 /* XXXX Should be taken into account earlier? */
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Could not match STCLASS...\n") );
803 goto fail;
804 }
5e39e1e5
HS
805 if (!check)
806 goto giveup;
6eb5f6b9 807 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 808 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
809 what, (long)(s + start_shift - i_strpos)) );
810 goto restart;
811 }
66e933ab 812 /* Have both, check_string is floating */
6eb5f6b9
JH
813 if (t + start_shift >= check_at) /* Contradicts floating=check */
814 goto retry_floating_check;
815 /* Recheck anchored substring, but not floating... */
9041c2e3 816 s = check_at;
5e39e1e5
HS
817 if (!check)
818 goto giveup;
6eb5f6b9 819 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 820 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
821 (long)(other_last - i_strpos)) );
822 goto do_other_anchored;
823 }
60e71179
GS
824 /* Another way we could have checked stclass at the
825 current position only: */
826 if (ml_anch) {
827 s = t = t + 1;
5e39e1e5
HS
828 if (!check)
829 goto giveup;
60e71179 830 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
831 "Looking for /%s^%s/m starting at offset %ld...\n",
832 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 833 goto try_at_offset;
66e933ab 834 }
60e71179
GS
835 if (!prog->float_substr) /* Could have been deleted */
836 goto fail;
6eb5f6b9
JH
837 /* Check is floating subtring. */
838 retry_floating_check:
839 t = check_at - start_shift;
840 DEBUG_r( what = "floating" );
841 goto hop_and_restart;
842 }
b7953727
JH
843 if (t != s) {
844 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 845 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
846 (long)(t - i_strpos), (long)(s - i_strpos))
847 );
848 }
849 else {
850 DEBUG_r(PerlIO_printf(Perl_debug_log,
851 "Does not contradict STCLASS...\n");
852 );
853 }
6eb5f6b9 854 }
5e39e1e5
HS
855 giveup:
856 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
857 PL_colors[4], (check ? "Guessed" : "Giving up"),
858 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 859 return s;
2c2d71f5
JH
860
861 fail_finish: /* Substring not found */
66e933ab
GS
862 if (prog->check_substr) /* could be removed already */
863 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 864 fail:
2c2d71f5 865 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
866 PL_colors[4],PL_colors[5]));
867 return Nullch;
868}
9661b544 869
6eb5f6b9 870/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
871STATIC char *
872S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 873{
6eb5f6b9
JH
874 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
875 char *m;
d8093b23 876 STRLEN ln;
877 unsigned int c1;
878 unsigned int c2;
6eb5f6b9
JH
879 char *e;
880 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 881 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 882
6eb5f6b9
JH
883 /* We know what class it must start with. */
884 switch (OP(c)) {
6eb5f6b9 885 case ANYOF:
a0ed51b3 886 while (s < strend) {
ffc61ed2 887 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
888 if (tmp && (norun || regtry(prog, s)))
889 goto got_it;
890 else
891 tmp = doevery;
a0ed51b3 892 }
6eb5f6b9
JH
893 else
894 tmp = 1;
ffc61ed2 895 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 896 }
6eb5f6b9
JH
897 break;
898 case EXACTF:
899 m = STRING(c);
900 ln = STR_LEN(c);
1aa99e6b
IH
901 if (UTF) {
902 c1 = to_utf8_lower((U8*)m);
903 c2 = to_utf8_upper((U8*)m);
904 }
905 else {
906 c1 = *(U8*)m;
907 c2 = PL_fold[c1];
908 }
6eb5f6b9
JH
909 goto do_exactf;
910 case EXACTFL:
911 m = STRING(c);
912 ln = STR_LEN(c);
d8093b23 913 c1 = *(U8*)m;
6eb5f6b9
JH
914 c2 = PL_fold_locale[c1];
915 do_exactf:
916 e = strend - ln;
b3c9acc1 917
6eb5f6b9
JH
918 if (norun && e < s)
919 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
920
921 if (do_utf8) {
922 STRLEN len;
923 if (c1 == c2)
924 while (s <= e) {
9041c2e3 925 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
926 && regtry(prog, s) )
927 goto got_it;
928 s += len;
929 }
930 else
931 while (s <= e) {
9041c2e3 932 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
933 if ( (c == c1 || c == c2) && regtry(prog, s) )
934 goto got_it;
935 s += len;
936 }
937 }
938 else {
939 if (c1 == c2)
940 while (s <= e) {
941 if ( *(U8*)s == c1
942 && (ln == 1 || !(OP(c) == EXACTF
943 ? ibcmp(s, m, ln)
944 : ibcmp_locale(s, m, ln)))
945 && (norun || regtry(prog, s)) )
946 goto got_it;
947 s++;
948 }
949 else
950 while (s <= e) {
951 if ( (*(U8*)s == c1 || *(U8*)s == c2)
952 && (ln == 1 || !(OP(c) == EXACTF
953 ? ibcmp(s, m, ln)
954 : ibcmp_locale(s, m, ln)))
955 && (norun || regtry(prog, s)) )
956 goto got_it;
957 s++;
958 }
b3c9acc1
IZ
959 }
960 break;
bbce6d69 961 case BOUNDL:
3280af22 962 PL_reg_flags |= RF_tainted;
bbce6d69 963 /* FALL THROUGH */
a0d0e21e 964 case BOUND:
ffc61ed2 965 if (do_utf8) {
12d33761 966 if (s == PL_bostr)
ffc61ed2
JH
967 tmp = '\n';
968 else {
1aa99e6b 969 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 970
0064a8a9
JH
971 if (s > (char*)r)
972 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
973 }
974 tmp = ((OP(c) == BOUND ?
9041c2e3 975 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 976 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
977 while (s < strend) {
978 if (tmp == !(OP(c) == BOUND ?
3568d838 979 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
980 isALNUM_LC_utf8((U8*)s)))
981 {
982 tmp = !tmp;
983 if ((norun || regtry(prog, s)))
984 goto got_it;
985 }
986 s += UTF8SKIP(s);
a687059c 987 }
a0d0e21e 988 }
667bb95a 989 else {
12d33761 990 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
991 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
992 while (s < strend) {
993 if (tmp ==
994 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
995 tmp = !tmp;
996 if ((norun || regtry(prog, s)))
997 goto got_it;
998 }
999 s++;
a0ed51b3 1000 }
a0ed51b3 1001 }
6eb5f6b9 1002 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1003 goto got_it;
1004 break;
bbce6d69 1005 case NBOUNDL:
3280af22 1006 PL_reg_flags |= RF_tainted;
bbce6d69 1007 /* FALL THROUGH */
a0d0e21e 1008 case NBOUND:
ffc61ed2 1009 if (do_utf8) {
12d33761 1010 if (s == PL_bostr)
ffc61ed2
JH
1011 tmp = '\n';
1012 else {
1aa99e6b 1013 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1014
0064a8a9
JH
1015 if (s > (char*)r)
1016 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1017 }
1018 tmp = ((OP(c) == NBOUND ?
9041c2e3 1019 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1020 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1021 while (s < strend) {
1022 if (tmp == !(OP(c) == NBOUND ?
3568d838 1023 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1024 isALNUM_LC_utf8((U8*)s)))
1025 tmp = !tmp;
1026 else if ((norun || regtry(prog, s)))
1027 goto got_it;
1028 s += UTF8SKIP(s);
1029 }
a0d0e21e 1030 }
667bb95a 1031 else {
12d33761 1032 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1033 tmp = ((OP(c) == NBOUND ?
1034 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1035 while (s < strend) {
1036 if (tmp ==
1037 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1038 tmp = !tmp;
1039 else if ((norun || regtry(prog, s)))
1040 goto got_it;
1041 s++;
1042 }
a0ed51b3 1043 }
6eb5f6b9 1044 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1045 goto got_it;
1046 break;
a0d0e21e 1047 case ALNUM:
ffc61ed2 1048 if (do_utf8) {
8269fa76 1049 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1050 while (s < strend) {
3568d838 1051 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1052 if (tmp && (norun || regtry(prog, s)))
1053 goto got_it;
1054 else
1055 tmp = doevery;
1056 }
bbce6d69 1057 else
ffc61ed2
JH
1058 tmp = 1;
1059 s += UTF8SKIP(s);
bbce6d69 1060 }
bbce6d69 1061 }
ffc61ed2
JH
1062 else {
1063 while (s < strend) {
1064 if (isALNUM(*s)) {
1065 if (tmp && (norun || regtry(prog, s)))
1066 goto got_it;
1067 else
1068 tmp = doevery;
1069 }
a0ed51b3 1070 else
ffc61ed2
JH
1071 tmp = 1;
1072 s++;
a0ed51b3 1073 }
a0ed51b3
LW
1074 }
1075 break;
bbce6d69 1076 case ALNUML:
3280af22 1077 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1078 if (do_utf8) {
1079 while (s < strend) {
1080 if (isALNUM_LC_utf8((U8*)s)) {
1081 if (tmp && (norun || regtry(prog, s)))
1082 goto got_it;
1083 else
1084 tmp = doevery;
1085 }
a687059c 1086 else
ffc61ed2
JH
1087 tmp = 1;
1088 s += UTF8SKIP(s);
a0d0e21e 1089 }
a0d0e21e 1090 }
ffc61ed2
JH
1091 else {
1092 while (s < strend) {
1093 if (isALNUM_LC(*s)) {
1094 if (tmp && (norun || regtry(prog, s)))
1095 goto got_it;
1096 else
1097 tmp = doevery;
1098 }
a0ed51b3 1099 else
ffc61ed2
JH
1100 tmp = 1;
1101 s++;
a0ed51b3 1102 }
a0ed51b3
LW
1103 }
1104 break;
a0d0e21e 1105 case NALNUM:
ffc61ed2 1106 if (do_utf8) {
8269fa76 1107 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1108 while (s < strend) {
3568d838 1109 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1110 if (tmp && (norun || regtry(prog, s)))
1111 goto got_it;
1112 else
1113 tmp = doevery;
1114 }
bbce6d69 1115 else
ffc61ed2
JH
1116 tmp = 1;
1117 s += UTF8SKIP(s);
bbce6d69 1118 }
bbce6d69 1119 }
ffc61ed2
JH
1120 else {
1121 while (s < strend) {
1122 if (!isALNUM(*s)) {
1123 if (tmp && (norun || regtry(prog, s)))
1124 goto got_it;
1125 else
1126 tmp = doevery;
1127 }
a0ed51b3 1128 else
ffc61ed2
JH
1129 tmp = 1;
1130 s++;
a0ed51b3 1131 }
a0ed51b3
LW
1132 }
1133 break;
bbce6d69 1134 case NALNUML:
3280af22 1135 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1136 if (do_utf8) {
1137 while (s < strend) {
1138 if (!isALNUM_LC_utf8((U8*)s)) {
1139 if (tmp && (norun || regtry(prog, s)))
1140 goto got_it;
1141 else
1142 tmp = doevery;
1143 }
a687059c 1144 else
ffc61ed2
JH
1145 tmp = 1;
1146 s += UTF8SKIP(s);
a687059c 1147 }
a0d0e21e 1148 }
ffc61ed2
JH
1149 else {
1150 while (s < strend) {
1151 if (!isALNUM_LC(*s)) {
1152 if (tmp && (norun || regtry(prog, s)))
1153 goto got_it;
1154 else
1155 tmp = doevery;
1156 }
a0ed51b3 1157 else
ffc61ed2
JH
1158 tmp = 1;
1159 s++;
a0ed51b3 1160 }
a0ed51b3
LW
1161 }
1162 break;
a0d0e21e 1163 case SPACE:
ffc61ed2 1164 if (do_utf8) {
8269fa76 1165 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1166 while (s < strend) {
3568d838 1167 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1168 if (tmp && (norun || regtry(prog, s)))
1169 goto got_it;
1170 else
1171 tmp = doevery;
1172 }
a0d0e21e 1173 else
ffc61ed2
JH
1174 tmp = 1;
1175 s += UTF8SKIP(s);
2304df62 1176 }
a0d0e21e 1177 }
ffc61ed2
JH
1178 else {
1179 while (s < strend) {
1180 if (isSPACE(*s)) {
1181 if (tmp && (norun || regtry(prog, s)))
1182 goto got_it;
1183 else
1184 tmp = doevery;
1185 }
a0ed51b3 1186 else
ffc61ed2
JH
1187 tmp = 1;
1188 s++;
a0ed51b3 1189 }
a0ed51b3
LW
1190 }
1191 break;
bbce6d69 1192 case SPACEL:
3280af22 1193 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1194 if (do_utf8) {
1195 while (s < strend) {
1196 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1197 if (tmp && (norun || regtry(prog, s)))
1198 goto got_it;
1199 else
1200 tmp = doevery;
1201 }
bbce6d69 1202 else
ffc61ed2
JH
1203 tmp = 1;
1204 s += UTF8SKIP(s);
bbce6d69 1205 }
bbce6d69 1206 }
ffc61ed2
JH
1207 else {
1208 while (s < strend) {
1209 if (isSPACE_LC(*s)) {
1210 if (tmp && (norun || regtry(prog, s)))
1211 goto got_it;
1212 else
1213 tmp = doevery;
1214 }
a0ed51b3 1215 else
ffc61ed2
JH
1216 tmp = 1;
1217 s++;
a0ed51b3 1218 }
a0ed51b3
LW
1219 }
1220 break;
a0d0e21e 1221 case NSPACE:
ffc61ed2 1222 if (do_utf8) {
8269fa76 1223 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1224 while (s < strend) {
3568d838 1225 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1226 if (tmp && (norun || regtry(prog, s)))
1227 goto got_it;
1228 else
1229 tmp = doevery;
1230 }
a0d0e21e 1231 else
ffc61ed2
JH
1232 tmp = 1;
1233 s += UTF8SKIP(s);
a687059c 1234 }
a0d0e21e 1235 }
ffc61ed2
JH
1236 else {
1237 while (s < strend) {
1238 if (!isSPACE(*s)) {
1239 if (tmp && (norun || regtry(prog, s)))
1240 goto got_it;
1241 else
1242 tmp = doevery;
1243 }
a0ed51b3 1244 else
ffc61ed2
JH
1245 tmp = 1;
1246 s++;
a0ed51b3 1247 }
a0ed51b3
LW
1248 }
1249 break;
bbce6d69 1250 case NSPACEL:
3280af22 1251 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1252 if (do_utf8) {
1253 while (s < strend) {
1254 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1255 if (tmp && (norun || regtry(prog, s)))
1256 goto got_it;
1257 else
1258 tmp = doevery;
1259 }
bbce6d69 1260 else
ffc61ed2
JH
1261 tmp = 1;
1262 s += UTF8SKIP(s);
bbce6d69 1263 }
bbce6d69 1264 }
ffc61ed2
JH
1265 else {
1266 while (s < strend) {
1267 if (!isSPACE_LC(*s)) {
1268 if (tmp && (norun || regtry(prog, s)))
1269 goto got_it;
1270 else
1271 tmp = doevery;
1272 }
a0ed51b3 1273 else
ffc61ed2
JH
1274 tmp = 1;
1275 s++;
a0ed51b3 1276 }
a0ed51b3
LW
1277 }
1278 break;
a0d0e21e 1279 case DIGIT:
ffc61ed2 1280 if (do_utf8) {
8269fa76 1281 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1282 while (s < strend) {
3568d838 1283 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1284 if (tmp && (norun || regtry(prog, s)))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
a0d0e21e 1289 else
ffc61ed2
JH
1290 tmp = 1;
1291 s += UTF8SKIP(s);
2b69d0c2 1292 }
a0d0e21e 1293 }
ffc61ed2
JH
1294 else {
1295 while (s < strend) {
1296 if (isDIGIT(*s)) {
1297 if (tmp && (norun || regtry(prog, s)))
1298 goto got_it;
1299 else
1300 tmp = doevery;
1301 }
a0ed51b3 1302 else
ffc61ed2
JH
1303 tmp = 1;
1304 s++;
a0ed51b3 1305 }
a0ed51b3
LW
1306 }
1307 break;
b8c5462f
JH
1308 case DIGITL:
1309 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1310 if (do_utf8) {
1311 while (s < strend) {
1312 if (isDIGIT_LC_utf8((U8*)s)) {
1313 if (tmp && (norun || regtry(prog, s)))
1314 goto got_it;
1315 else
1316 tmp = doevery;
1317 }
b8c5462f 1318 else
ffc61ed2
JH
1319 tmp = 1;
1320 s += UTF8SKIP(s);
b8c5462f 1321 }
b8c5462f 1322 }
ffc61ed2
JH
1323 else {
1324 while (s < strend) {
1325 if (isDIGIT_LC(*s)) {
1326 if (tmp && (norun || regtry(prog, s)))
1327 goto got_it;
1328 else
1329 tmp = doevery;
1330 }
b8c5462f 1331 else
ffc61ed2
JH
1332 tmp = 1;
1333 s++;
b8c5462f 1334 }
b8c5462f
JH
1335 }
1336 break;
a0d0e21e 1337 case NDIGIT:
ffc61ed2 1338 if (do_utf8) {
8269fa76 1339 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1340 while (s < strend) {
3568d838 1341 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1342 if (tmp && (norun || regtry(prog, s)))
1343 goto got_it;
1344 else
1345 tmp = doevery;
1346 }
a0d0e21e 1347 else
ffc61ed2
JH
1348 tmp = 1;
1349 s += UTF8SKIP(s);
a687059c 1350 }
a0d0e21e 1351 }
ffc61ed2
JH
1352 else {
1353 while (s < strend) {
1354 if (!isDIGIT(*s)) {
1355 if (tmp && (norun || regtry(prog, s)))
1356 goto got_it;
1357 else
1358 tmp = doevery;
1359 }
a0ed51b3 1360 else
ffc61ed2
JH
1361 tmp = 1;
1362 s++;
a0ed51b3 1363 }
a0ed51b3
LW
1364 }
1365 break;
b8c5462f
JH
1366 case NDIGITL:
1367 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1368 if (do_utf8) {
1369 while (s < strend) {
1370 if (!isDIGIT_LC_utf8((U8*)s)) {
1371 if (tmp && (norun || regtry(prog, s)))
1372 goto got_it;
1373 else
1374 tmp = doevery;
1375 }
b8c5462f 1376 else
ffc61ed2
JH
1377 tmp = 1;
1378 s += UTF8SKIP(s);
b8c5462f 1379 }
a0ed51b3 1380 }
ffc61ed2
JH
1381 else {
1382 while (s < strend) {
1383 if (!isDIGIT_LC(*s)) {
1384 if (tmp && (norun || regtry(prog, s)))
1385 goto got_it;
1386 else
1387 tmp = doevery;
1388 }
cf93c79d 1389 else
ffc61ed2
JH
1390 tmp = 1;
1391 s++;
b8c5462f 1392 }
b8c5462f
JH
1393 }
1394 break;
b3c9acc1 1395 default:
3c3eec57
GS
1396 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1397 break;
d6a28714 1398 }
6eb5f6b9
JH
1399 return 0;
1400 got_it:
1401 return s;
1402}
1403
1404/*
1405 - regexec_flags - match a regexp against a string
1406 */
1407I32
1408Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1409 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1410/* strend: pointer to null at end of string */
1411/* strbeg: real beginning of string */
1412/* minend: end of match must be >=minend after stringarg. */
1413/* data: May be used for some additional optimizations. */
1414/* nosave: For optimizations. */
1415{
6eb5f6b9
JH
1416 register char *s;
1417 register regnode *c;
1418 register char *startpos = stringarg;
6eb5f6b9
JH
1419 I32 minlen; /* must match at least this many chars */
1420 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1421 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1422 constant substr. */ /* CC */
1423 I32 end_shift = 0; /* Same for the end. */ /* CC */
1424 I32 scream_pos = -1; /* Internal iterator of scream. */
1425 char *scream_olds;
1426 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1427 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1428
1429 PL_regcc = 0;
1430
1431 cache_re(prog);
1432#ifdef DEBUGGING
aea4f609 1433 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1434#endif
1435
1436 /* Be paranoid... */
1437 if (prog == NULL || startpos == NULL) {
1438 Perl_croak(aTHX_ "NULL regexp parameter");
1439 return 0;
1440 }
1441
1442 minlen = prog->minlen;
1aa99e6b 1443 if (do_utf8) {
3baa4c62
JH
1444 if (!(prog->reganch & ROPT_SANY_SEEN))
1445 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1446 }
1447 else {
1448 if (strend - startpos < minlen) goto phooey;
1449 }
6eb5f6b9 1450
6eb5f6b9
JH
1451 /* Check validity of program. */
1452 if (UCHARAT(prog->program) != REG_MAGIC) {
1453 Perl_croak(aTHX_ "corrupted regexp program");
1454 }
1455
1456 PL_reg_flags = 0;
1457 PL_reg_eval_set = 0;
1458 PL_reg_maxiter = 0;
1459
1460 if (prog->reganch & ROPT_UTF8)
1461 PL_reg_flags |= RF_utf8;
1462
1463 /* Mark beginning of line for ^ and lookbehind. */
1464 PL_regbol = startpos;
1465 PL_bostr = strbeg;
1466 PL_reg_sv = sv;
1467
1468 /* Mark end of line for $ (and such) */
1469 PL_regeol = strend;
1470
1471 /* see how far we have to get to not match where we matched before */
1472 PL_regtill = startpos+minend;
1473
1474 /* We start without call_cc context. */
1475 PL_reg_call_cc = 0;
1476
1477 /* If there is a "must appear" string, look for it. */
1478 s = startpos;
1479
1480 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1481 MAGIC *mg;
1482
1483 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1484 PL_reg_ganch = startpos;
1485 else if (sv && SvTYPE(sv) >= SVt_PVMG
1486 && SvMAGIC(sv)
14befaf4
DM
1487 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1488 && mg->mg_len >= 0) {
6eb5f6b9
JH
1489 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1490 if (prog->reganch & ROPT_ANCH_GPOS) {
1491 if (s > PL_reg_ganch)
1492 goto phooey;
1493 s = PL_reg_ganch;
1494 }
1495 }
1496 else /* pos() not defined */
1497 PL_reg_ganch = strbeg;
1498 }
1499
699c3c34
JH
1500 if (do_utf8 == (UTF!=0) &&
1501 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1502 re_scream_pos_data d;
1503
1504 d.scream_olds = &scream_olds;
1505 d.scream_pos = &scream_pos;
1506 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1507 if (!s)
1508 goto phooey; /* not present */
1509 }
1510
1511 DEBUG_r( if (!PL_colorset) reginitcolors() );
1512 DEBUG_r(PerlIO_printf(Perl_debug_log,
1513 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1514 PL_colors[4],PL_colors[5],PL_colors[0],
1515 prog->precomp,
1516 PL_colors[1],
1517 (strlen(prog->precomp) > 60 ? "..." : ""),
1518 PL_colors[0],
1519 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1520 startpos, PL_colors[1],
1521 (strend - startpos > 60 ? "..." : ""))
1522 );
1523
1524 /* Simplest case: anchored match need be tried only once. */
1525 /* [unless only anchor is BOL and multiline is set] */
1526 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1527 if (s == startpos && regtry(prog, startpos))
1528 goto got_it;
1529 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1530 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1531 {
1532 char *end;
1533
1534 if (minlen)
1535 dontbother = minlen - 1;
1aa99e6b 1536 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1537 /* for multiline we only have to try after newlines */
1538 if (prog->check_substr) {
1539 if (s == startpos)
1540 goto after_try;
1541 while (1) {
1542 if (regtry(prog, s))
1543 goto got_it;
1544 after_try:
1545 if (s >= end)
1546 goto phooey;
1547 if (prog->reganch & RE_USE_INTUIT) {
1548 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1549 if (!s)
1550 goto phooey;
1551 }
1552 else
1553 s++;
1554 }
1555 } else {
1556 if (s > startpos)
1557 s--;
1558 while (s < end) {
1559 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1560 if (regtry(prog, s))
1561 goto got_it;
1562 }
1563 }
1564 }
1565 }
1566 goto phooey;
1567 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1568 if (regtry(prog, PL_reg_ganch))
1569 goto got_it;
1570 goto phooey;
1571 }
1572
1573 /* Messy cases: unanchored match. */
9041c2e3 1574 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1575 /* we have /x+whatever/ */
1576 /* it must be a one character string (XXXX Except UTF?) */
1577 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1578#ifdef DEBUGGING
1579 int did_match = 0;
1580#endif
1581
1aa99e6b 1582 if (do_utf8) {
6eb5f6b9
JH
1583 while (s < strend) {
1584 if (*s == ch) {
bf93d4cc 1585 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1586 if (regtry(prog, s)) goto got_it;
1587 s += UTF8SKIP(s);
1588 while (s < strend && *s == ch)
1589 s += UTF8SKIP(s);
1590 }
1591 s += UTF8SKIP(s);
1592 }
1593 }
1594 else {
1595 while (s < strend) {
1596 if (*s == ch) {
bf93d4cc 1597 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1598 if (regtry(prog, s)) goto got_it;
1599 s++;
1600 while (s < strend && *s == ch)
1601 s++;
1602 }
1603 s++;
1604 }
1605 }
b7953727 1606 DEBUG_r(if (!did_match)
bf93d4cc 1607 PerlIO_printf(Perl_debug_log,
b7953727
JH
1608 "Did not find anchored character...\n")
1609 );
6eb5f6b9
JH
1610 }
1611 /*SUPPRESS 560*/
1aa99e6b
IH
1612 else if (do_utf8 == (UTF!=0) &&
1613 (prog->anchored_substr != Nullsv
9041c2e3 1614 || (prog->float_substr != Nullsv
1aa99e6b 1615 && prog->float_max_offset < strend - s))) {
9041c2e3 1616 SV *must = prog->anchored_substr
6eb5f6b9 1617 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1618 I32 back_max =
6eb5f6b9 1619 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1620 I32 back_min =
6eb5f6b9 1621 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1622 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1623 -(I32)(CHR_SVLEN(must)
1aa99e6b 1624 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1625 char *last1; /* Last position checked before */
bf93d4cc
GS
1626#ifdef DEBUGGING
1627 int did_match = 0;
1628#endif
6eb5f6b9
JH
1629
1630 if (s > PL_bostr)
1631 last1 = HOPc(s, -1);
1632 else
1633 last1 = s - 1; /* bogus */
1634
1635 /* XXXX check_substr already used to find `s', can optimize if
1636 check_substr==must. */
1637 scream_pos = -1;
1638 dontbother = end_shift;
1639 strend = HOPc(strend, -dontbother);
1640 while ( (s <= last) &&
9041c2e3 1641 ((flags & REXEC_SCREAM)
1aa99e6b 1642 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1643 end_shift, &scream_pos, 0))
1aa99e6b 1644 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1645 (unsigned char*)strend, must,
6eb5f6b9 1646 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1647 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1648 if (HOPc(s, -back_max) > last1) {
1649 last1 = HOPc(s, -back_min);
1650 s = HOPc(s, -back_max);
1651 }
1652 else {
1653 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1654
1655 last1 = HOPc(s, -back_min);
1656 s = t;
1657 }
1aa99e6b 1658 if (do_utf8) {
6eb5f6b9
JH
1659 while (s <= last1) {
1660 if (regtry(prog, s))
1661 goto got_it;
1662 s += UTF8SKIP(s);
1663 }
1664 }
1665 else {
1666 while (s <= last1) {
1667 if (regtry(prog, s))
1668 goto got_it;
1669 s++;
1670 }
1671 }
1672 }
b7953727
JH
1673 DEBUG_r(if (!did_match)
1674 PerlIO_printf(Perl_debug_log,
1675 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1676 ((must == prog->anchored_substr)
1677 ? "anchored" : "floating"),
1678 PL_colors[0],
1679 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1680 SvPVX(must),
b7953727
JH
1681 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1682 );
6eb5f6b9
JH
1683 goto phooey;
1684 }
155aba94 1685 else if ((c = prog->regstclass)) {
66e933ab
GS
1686 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1687 /* don't bother with what can't match */
6eb5f6b9 1688 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1689 DEBUG_r({
1690 SV *prop = sv_newmortal();
1691 regprop(prop, c);
1692 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1693 });
6eb5f6b9
JH
1694 if (find_byclass(prog, c, s, strend, startpos, 0))
1695 goto got_it;
bf93d4cc 1696 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1697 }
1698 else {
1699 dontbother = 0;
1700 if (prog->float_substr != Nullsv) { /* Trim the end. */
1701 char *last;
d6a28714
JH
1702
1703 if (flags & REXEC_SCREAM) {
1704 last = screaminstr(sv, prog->float_substr, s - strbeg,
1705 end_shift, &scream_pos, 1); /* last one */
1706 if (!last)
ffc61ed2 1707 last = scream_olds; /* Only one occurrence. */
b8c5462f 1708 }
d6a28714
JH
1709 else {
1710 STRLEN len;
1711 char *little = SvPV(prog->float_substr, len);
1712
1713 if (SvTAIL(prog->float_substr)) {
1714 if (memEQ(strend - len + 1, little, len - 1))
1715 last = strend - len + 1;
1716 else if (!PL_multiline)
9041c2e3 1717 last = memEQ(strend - len, little, len)
d6a28714 1718 ? strend - len : Nullch;
b8c5462f 1719 else
d6a28714
JH
1720 goto find_last;
1721 } else {
1722 find_last:
9041c2e3 1723 if (len)
d6a28714 1724 last = rninstr(s, strend, little, little + len);
b8c5462f 1725 else
d6a28714 1726 last = strend; /* matching `$' */
b8c5462f 1727 }
b8c5462f 1728 }
bf93d4cc
GS
1729 if (last == NULL) {
1730 DEBUG_r(PerlIO_printf(Perl_debug_log,
1731 "%sCan't trim the tail, match fails (should not happen)%s\n",
1732 PL_colors[4],PL_colors[5]));
1733 goto phooey; /* Should not happen! */
1734 }
d6a28714
JH
1735 dontbother = strend - last + prog->float_min_offset;
1736 }
1737 if (minlen && (dontbother < minlen))
1738 dontbother = minlen - 1;
1739 strend -= dontbother; /* this one's always in bytes! */
1740 /* We don't know much -- general case. */
1aa99e6b 1741 if (do_utf8) {
d6a28714
JH
1742 for (;;) {
1743 if (regtry(prog, s))
1744 goto got_it;
1745 if (s >= strend)
1746 break;
b8c5462f 1747 s += UTF8SKIP(s);
d6a28714
JH
1748 };
1749 }
1750 else {
1751 do {
1752 if (regtry(prog, s))
1753 goto got_it;
1754 } while (s++ < strend);
1755 }
1756 }
1757
1758 /* Failure. */
1759 goto phooey;
1760
1761got_it:
1762 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1763
1764 if (PL_reg_eval_set) {
1765 /* Preserve the current value of $^R */
1766 if (oreplsv != GvSV(PL_replgv))
1767 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1768 restored, the value remains
1769 the same. */
1770 restore_pos(aTHXo_ 0);
1771 }
1772
1773 /* make sure $`, $&, $', and $digit will work later */
1774 if ( !(flags & REXEC_NOT_FIRST) ) {
1775 if (RX_MATCH_COPIED(prog)) {
1776 Safefree(prog->subbeg);
1777 RX_MATCH_COPIED_off(prog);
1778 }
1779 if (flags & REXEC_COPY_STR) {
1780 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1781
1782 s = savepvn(strbeg, i);
1783 prog->subbeg = s;
1784 prog->sublen = i;
1785 RX_MATCH_COPIED_on(prog);
1786 }
1787 else {
1788 prog->subbeg = strbeg;
1789 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1790 }
1791 }
9041c2e3 1792
d6a28714
JH
1793 return 1;
1794
1795phooey:
bf93d4cc
GS
1796 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1797 PL_colors[4],PL_colors[5]));
d6a28714
JH
1798 if (PL_reg_eval_set)
1799 restore_pos(aTHXo_ 0);
1800 return 0;
1801}
1802
1803/*
1804 - regtry - try match at specific point
1805 */
1806STATIC I32 /* 0 failure, 1 success */
1807S_regtry(pTHX_ regexp *prog, char *startpos)
1808{
d6a28714
JH
1809 register I32 i;
1810 register I32 *sp;
1811 register I32 *ep;
1812 CHECKPOINT lastcp;
1813
02db2b7b
IZ
1814#ifdef DEBUGGING
1815 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1816#endif
d6a28714
JH
1817 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1818 MAGIC *mg;
1819
1820 PL_reg_eval_set = RS_init;
1821 DEBUG_r(DEBUG_s(
b900a521
JH
1822 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1823 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1824 ));
e8347627 1825 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1826 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1827 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1828 SAVETMPS;
1829 /* Apparently this is not needed, judging by wantarray. */
e8347627 1830 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1831 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1832
1833 if (PL_reg_sv) {
1834 /* Make $_ available to executed code. */
1835 if (PL_reg_sv != DEFSV) {
1836 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1837 SAVESPTR(DEFSV);
1838 DEFSV = PL_reg_sv;
b8c5462f 1839 }
d6a28714 1840
9041c2e3 1841 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1842 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1843 /* prepare for quick setting of pos */
14befaf4
DM
1844 sv_magic(PL_reg_sv, (SV*)0,
1845 PERL_MAGIC_regex_global, Nullch, 0);
1846 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1847 mg->mg_len = -1;
b8c5462f 1848 }
d6a28714
JH
1849 PL_reg_magic = mg;
1850 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1851 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1852 }
09687e5a 1853 if (!PL_reg_curpm) {
0f79a09d 1854 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1855#ifdef USE_ITHREADS
1856 {
1857 SV* repointer = newSViv(0);
1858 av_push(PL_regex_padav,repointer);
1859 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1860 PL_regex_pad = AvARRAY(PL_regex_padav);
1861 }
1862#endif
1863 }
aaa362c4 1864 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1865 PL_reg_oldcurpm = PL_curpm;
1866 PL_curpm = PL_reg_curpm;
1867 if (RX_MATCH_COPIED(prog)) {
1868 /* Here is a serious problem: we cannot rewrite subbeg,
1869 since it may be needed if this match fails. Thus
1870 $` inside (?{}) could fail... */
1871 PL_reg_oldsaved = prog->subbeg;
1872 PL_reg_oldsavedlen = prog->sublen;
1873 RX_MATCH_COPIED_off(prog);
1874 }
1875 else
1876 PL_reg_oldsaved = Nullch;
1877 prog->subbeg = PL_bostr;
1878 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1879 }
1880 prog->startp[0] = startpos - PL_bostr;
1881 PL_reginput = startpos;
1882 PL_regstartp = prog->startp;
1883 PL_regendp = prog->endp;
1884 PL_reglastparen = &prog->lastparen;
a01268b5 1885 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1886 prog->lastparen = 0;
1887 PL_regsize = 0;
1888 DEBUG_r(PL_reg_starttry = startpos);
1889 if (PL_reg_start_tmpl <= prog->nparens) {
1890 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1891 if(PL_reg_start_tmp)
1892 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1893 else
1894 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1895 }
1896
1897 /* XXXX What this code is doing here?!!! There should be no need
1898 to do this again and again, PL_reglastparen should take care of
3dd2943c 1899 this! --ilya*/
dafc8851
JH
1900
1901 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1902 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1903 * PL_reglastparen), is not needed at all by the test suite
1904 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1905 * enough, for building DynaLoader, or otherwise this
1906 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1907 * will happen. Meanwhile, this code *is* needed for the
1908 * above-mentioned test suite tests to succeed. The common theme
1909 * on those tests seems to be returning null fields from matches.
1910 * --jhi */
dafc8851 1911#if 1
d6a28714
JH
1912 sp = prog->startp;
1913 ep = prog->endp;
1914 if (prog->nparens) {
09e8ae3b 1915 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1916 *++sp = -1;
1917 *++ep = -1;
1918 }
1919 }
dafc8851 1920#endif
02db2b7b 1921 REGCP_SET(lastcp);
d6a28714
JH
1922 if (regmatch(prog->program + 1)) {
1923 prog->endp[0] = PL_reginput - PL_bostr;
1924 return 1;
1925 }
02db2b7b 1926 REGCP_UNWIND(lastcp);
d6a28714
JH
1927 return 0;
1928}
1929
02db2b7b
IZ
1930#define RE_UNWIND_BRANCH 1
1931#define RE_UNWIND_BRANCHJ 2
1932
1933union re_unwind_t;
1934
1935typedef struct { /* XX: makes sense to enlarge it... */
1936 I32 type;
1937 I32 prev;
1938 CHECKPOINT lastcp;
1939} re_unwind_generic_t;
1940
1941typedef struct {
1942 I32 type;
1943 I32 prev;
1944 CHECKPOINT lastcp;
1945 I32 lastparen;
1946 regnode *next;
1947 char *locinput;
1948 I32 nextchr;
1949#ifdef DEBUGGING
1950 int regindent;
1951#endif
1952} re_unwind_branch_t;
1953
1954typedef union re_unwind_t {
1955 I32 type;
1956 re_unwind_generic_t generic;
1957 re_unwind_branch_t branch;
1958} re_unwind_t;
1959
d6a28714
JH
1960/*
1961 - regmatch - main matching routine
1962 *
1963 * Conceptually the strategy is simple: check to see whether the current
1964 * node matches, call self recursively to see whether the rest matches,
1965 * and then act accordingly. In practice we make some effort to avoid
1966 * recursion, in particular by going through "ordinary" nodes (that don't
1967 * need to know whether the rest of the match failed) by a loop instead of
1968 * by recursion.
1969 */
1970/* [lwall] I've hoisted the register declarations to the outer block in order to
1971 * maybe save a little bit of pushing and popping on the stack. It also takes
1972 * advantage of machines that use a register save mask on subroutine entry.
1973 */
1974STATIC I32 /* 0 failure, 1 success */
1975S_regmatch(pTHX_ regnode *prog)
1976{
d6a28714
JH
1977 register regnode *scan; /* Current node. */
1978 regnode *next; /* Next node. */
1979 regnode *inner; /* Next node in internal branch. */
1980 register I32 nextchr; /* renamed nextchr - nextchar colides with
1981 function of same name */
1982 register I32 n; /* no or next */
b7953727
JH
1983 register I32 ln = 0; /* len or last */
1984 register char *s = Nullch; /* operand or save */
d6a28714 1985 register char *locinput = PL_reginput;
b7953727 1986 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 1987 int minmod = 0, sw = 0, logical = 0;
02db2b7b 1988 I32 unwind = 0;
b7953727 1989#if 0
02db2b7b 1990 I32 firstcp = PL_savestack_ix;
b7953727 1991#endif
ffc61ed2 1992 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1993
d6a28714
JH
1994#ifdef DEBUGGING
1995 PL_regindent++;
1996#endif
1997
1998 /* Note that nextchr is a byte even in UTF */
1999 nextchr = UCHARAT(locinput);
2000 scan = prog;
2001 while (scan != NULL) {
2002#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 2003#if 1
d6a28714
JH
2004# define sayYES goto yes
2005# define sayNO goto no
7821416a
IZ
2006# define sayYES_FINAL goto yes_final
2007# define sayYES_LOUD goto yes_loud
2008# define sayNO_FINAL goto no_final
2009# define sayNO_SILENT goto do_no
d6a28714
JH
2010# define saySAME(x) if (x) goto yes; else goto no
2011# define REPORT_CODE_OFF 24
2012#else
2013# define sayYES return 1
2014# define sayNO return 0
7821416a
IZ
2015# define sayYES_FINAL return 1
2016# define sayYES_LOUD return 1
2017# define sayNO_FINAL return 0
2018# define sayNO_SILENT return 0
d6a28714
JH
2019# define saySAME(x) return x
2020#endif
2021 DEBUG_r( {
2022 SV *prop = sv_newmortal();
2023 int docolor = *PL_colors[0];
2024 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2025 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2026 /* The part of the string before starttry has one color
2027 (pref0_len chars), between starttry and current
2028 position another one (pref_len - pref0_len chars),
2029 after the current position the third one.
2030 We assume that pref0_len <= pref_len, otherwise we
2031 decrease pref0_len. */
9041c2e3 2032 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2033 ? (5 + taill) - l : locinput - PL_bostr;
2034 int pref0_len;
d6a28714 2035
1aa99e6b
IH
2036 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2037 pref_len++;
2038 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2039 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2040 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2041 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2042 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2043 l--;
d6a28714
JH
2044 if (pref0_len < 0)
2045 pref0_len = 0;
2046 if (pref0_len > pref_len)
2047 pref0_len = pref_len;
2048 regprop(prop, scan);
9041c2e3 2049 PerlIO_printf(Perl_debug_log,
b900a521 2050 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2051 (IV)(locinput - PL_bostr),
2052 PL_colors[4], pref0_len,
d6a28714 2053 locinput - pref_len, PL_colors[5],
9041c2e3 2054 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2055 locinput - pref_len + pref0_len, PL_colors[3],
2056 (docolor ? "" : "> <"),
2057 PL_colors[0], l, locinput, PL_colors[1],
2058 15 - l - pref_len + 1,
2059 "",
b900a521 2060 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2061 SvPVX(prop));
2062 } );
2063
2064 next = scan + NEXT_OFF(scan);
2065 if (next == scan)
2066 next = NULL;
2067
2068 switch (OP(scan)) {
2069 case BOL:
12d33761
HS
2070 if (locinput == PL_bostr || (PL_multiline &&
2071 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2072 {
2073 /* regtill = regbol; */
b8c5462f
JH
2074 break;
2075 }
d6a28714
JH
2076 sayNO;
2077 case MBOL:
12d33761
HS
2078 if (locinput == PL_bostr ||
2079 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2080 {
b8c5462f
JH
2081 break;
2082 }
d6a28714
JH
2083 sayNO;
2084 case SBOL:
c2a73568 2085 if (locinput == PL_bostr)
b8c5462f 2086 break;
d6a28714
JH
2087 sayNO;
2088 case GPOS:
2089 if (locinput == PL_reg_ganch)
2090 break;
2091 sayNO;
2092 case EOL:
2093 if (PL_multiline)
2094 goto meol;
2095 else
2096 goto seol;
2097 case MEOL:
2098 meol:
2099 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2100 sayNO;
b8c5462f 2101 break;
d6a28714
JH
2102 case SEOL:
2103 seol:
2104 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2105 sayNO;
d6a28714 2106 if (PL_regeol - locinput > 1)
b8c5462f 2107 sayNO;
b8c5462f 2108 break;
d6a28714
JH
2109 case EOS:
2110 if (PL_regeol != locinput)
b8c5462f 2111 sayNO;
d6a28714 2112 break;
ffc61ed2 2113 case SANY:
d6a28714 2114 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2115 sayNO;
b8c5462f 2116 nextchr = UCHARAT(++locinput);
a0d0e21e 2117 break;
ffc61ed2 2118 case REG_ANY:
1aa99e6b
IH
2119 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2120 sayNO;
2121 if (do_utf8) {
b8c5462f 2122 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2123 if (locinput > PL_regeol)
2124 sayNO;
a0ed51b3 2125 nextchr = UCHARAT(locinput);
a0ed51b3 2126 }
1aa99e6b
IH
2127 else
2128 nextchr = UCHARAT(++locinput);
a0ed51b3 2129 break;
d6a28714 2130 case EXACT:
cd439c50
IZ
2131 s = STRING(scan);
2132 ln = STR_LEN(scan);
1aa99e6b
IH
2133 if (do_utf8 != (UTF!=0)) {
2134 char *l = locinput;
2135 char *e = s + ln;
2136 STRLEN len;
2137 if (do_utf8)
2138 while (s < e) {
2139 if (l >= PL_regeol)
2140 sayNO;
9041c2e3 2141 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2142 sayNO;
2143 s++;
2144 l += len;
2145 }
2146 else
2147 while (s < e) {
2148 if (l >= PL_regeol)
2149 sayNO;
9041c2e3 2150 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2151 sayNO;
2152 s += len;
2153 l++;
2154 }
2155 locinput = l;
2156 nextchr = UCHARAT(locinput);
2157 break;
2158 }
d6a28714
JH
2159 /* Inline the first character, for speed. */
2160 if (UCHARAT(s) != nextchr)
2161 sayNO;
2162 if (PL_regeol - locinput < ln)
2163 sayNO;
2164 if (ln > 1 && memNE(s, locinput, ln))
2165 sayNO;
2166 locinput += ln;
2167 nextchr = UCHARAT(locinput);
2168 break;
2169 case EXACTFL:
b8c5462f
JH
2170 PL_reg_flags |= RF_tainted;
2171 /* FALL THROUGH */
d6a28714 2172 case EXACTF:
cd439c50
IZ
2173 s = STRING(scan);
2174 ln = STR_LEN(scan);
d6a28714 2175
1aa99e6b 2176 if (do_utf8) {
d6a28714 2177 char *l = locinput;
1aa99e6b
IH
2178 char *e;
2179 e = s + ln;
d6a28714
JH
2180 c1 = OP(scan) == EXACTF;
2181 while (s < e) {
1aa99e6b 2182 if (l >= PL_regeol) {
d6a28714
JH
2183 sayNO;
2184 }
2b9d42f0 2185 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2186 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2187 sayNO;
2188 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2189 l += UTF8SKIP(l);
b8c5462f 2190 }
d6a28714 2191 locinput = l;
a0ed51b3
LW
2192 nextchr = UCHARAT(locinput);
2193 break;
2194 }
d6a28714
JH
2195
2196 /* Inline the first character, for speed. */
2197 if (UCHARAT(s) != nextchr &&
2198 UCHARAT(s) != ((OP(scan) == EXACTF)
2199 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2200 sayNO;
d6a28714 2201 if (PL_regeol - locinput < ln)
b8c5462f 2202 sayNO;
d6a28714
JH
2203 if (ln > 1 && (OP(scan) == EXACTF
2204 ? ibcmp(s, locinput, ln)
2205 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2206 sayNO;
d6a28714
JH
2207 locinput += ln;
2208 nextchr = UCHARAT(locinput);
a0d0e21e 2209 break;
d6a28714 2210 case ANYOF:
ffc61ed2
JH
2211 if (do_utf8) {
2212 if (!reginclass(scan, (U8*)locinput, do_utf8))
2213 sayNO;
2214 if (locinput >= PL_regeol)
2215 sayNO;
2216 locinput += PL_utf8skip[nextchr];
b8c5462f 2217 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2218 }
2219 else {
2220 if (nextchr < 0)
2221 nextchr = UCHARAT(locinput);
2222 if (!reginclass(scan, (U8*)locinput, do_utf8))
2223 sayNO;
2224 if (!nextchr && locinput >= PL_regeol)
2225 sayNO;
2226 nextchr = UCHARAT(++locinput);
2227 }
b8c5462f 2228 break;
d6a28714 2229 case ALNUML:
b8c5462f
JH
2230 PL_reg_flags |= RF_tainted;
2231 /* FALL THROUGH */
d6a28714 2232 case ALNUM:
b8c5462f 2233 if (!nextchr)
4633a7c4 2234 sayNO;
ffc61ed2 2235 if (do_utf8) {
ad24be35 2236 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2237 if (!(OP(scan) == ALNUM
3568d838 2238 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2239 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2240 {
2241 sayNO;
a0ed51b3 2242 }
b8c5462f 2243 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2244 nextchr = UCHARAT(locinput);
2245 break;
2246 }
ffc61ed2 2247 if (!(OP(scan) == ALNUM
d6a28714 2248 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2249 sayNO;
b8c5462f 2250 nextchr = UCHARAT(++locinput);
a0d0e21e 2251 break;
d6a28714 2252 case NALNUML:
b8c5462f
JH
2253 PL_reg_flags |= RF_tainted;
2254 /* FALL THROUGH */
d6a28714
JH
2255 case NALNUM:
2256 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2257 sayNO;
ffc61ed2 2258 if (do_utf8) {
8269fa76 2259 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2260 if (OP(scan) == NALNUM
3568d838 2261 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2262 : isALNUM_LC_utf8((U8*)locinput))
2263 {
b8c5462f 2264 sayNO;
d6a28714 2265 }
b8c5462f
JH
2266 locinput += PL_utf8skip[nextchr];
2267 nextchr = UCHARAT(locinput);
2268 break;
2269 }
ffc61ed2 2270 if (OP(scan) == NALNUM
d6a28714 2271 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2272 sayNO;
76e3520e 2273 nextchr = UCHARAT(++locinput);
a0d0e21e 2274 break;
d6a28714
JH
2275 case BOUNDL:
2276 case NBOUNDL:
3280af22 2277 PL_reg_flags |= RF_tainted;
bbce6d69 2278 /* FALL THROUGH */
d6a28714
JH
2279 case BOUND:
2280 case NBOUND:
2281 /* was last char in word? */
ffc61ed2 2282 if (do_utf8) {
12d33761
HS
2283 if (locinput == PL_bostr)
2284 ln = '\n';
ffc61ed2
JH
2285 else {
2286 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2287
2b9d42f0 2288 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2289 }
2290 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2291 ln = isALNUM_uni(ln);
8269fa76 2292 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2293 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2294 }
2295 else {
9041c2e3 2296 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2297 n = isALNUM_LC_utf8((U8*)locinput);
2298 }
a0ed51b3 2299 }
d6a28714 2300 else {
12d33761
HS
2301 ln = (locinput != PL_bostr) ?
2302 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2303 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2304 ln = isALNUM(ln);
2305 n = isALNUM(nextchr);
2306 }
2307 else {
2308 ln = isALNUM_LC(ln);
2309 n = isALNUM_LC(nextchr);
2310 }
d6a28714 2311 }
ffc61ed2
JH
2312 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2313 OP(scan) == BOUNDL))
2314 sayNO;
a0ed51b3 2315 break;
d6a28714 2316 case SPACEL:
3280af22 2317 PL_reg_flags |= RF_tainted;
bbce6d69 2318 /* FALL THROUGH */
d6a28714 2319 case SPACE:
9442cb0e 2320 if (!nextchr)
4633a7c4 2321 sayNO;
1aa99e6b 2322 if (do_utf8) {
fd400ab9 2323 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2324 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2325 if (!(OP(scan) == SPACE
3568d838 2326 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2327 : isSPACE_LC_utf8((U8*)locinput)))
2328 {
2329 sayNO;
2330 }
2331 locinput += PL_utf8skip[nextchr];
2332 nextchr = UCHARAT(locinput);
2333 break;
d6a28714 2334 }
ffc61ed2
JH
2335 if (!(OP(scan) == SPACE
2336 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2337 sayNO;
2338 nextchr = UCHARAT(++locinput);
2339 }
2340 else {
2341 if (!(OP(scan) == SPACE
2342 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2343 sayNO;
2344 nextchr = UCHARAT(++locinput);
a0ed51b3 2345 }
a0ed51b3 2346 break;
d6a28714 2347 case NSPACEL:
3280af22 2348 PL_reg_flags |= RF_tainted;
bbce6d69 2349 /* FALL THROUGH */
d6a28714 2350 case NSPACE:
9442cb0e 2351 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2352 sayNO;
1aa99e6b 2353 if (do_utf8) {
8269fa76 2354 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2355 if (OP(scan) == NSPACE
3568d838 2356 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2357 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2358 {
2359 sayNO;
2360 }
2361 locinput += PL_utf8skip[nextchr];
2362 nextchr = UCHARAT(locinput);
2363 break;
a0ed51b3 2364 }
ffc61ed2 2365 if (OP(scan) == NSPACE
d6a28714 2366 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2367 sayNO;
76e3520e 2368 nextchr = UCHARAT(++locinput);
a0d0e21e 2369 break;
d6a28714 2370 case DIGITL:
a0ed51b3
LW
2371 PL_reg_flags |= RF_tainted;
2372 /* FALL THROUGH */
d6a28714 2373 case DIGIT:
9442cb0e 2374 if (!nextchr)
a0ed51b3 2375 sayNO;
1aa99e6b 2376 if (do_utf8) {
8269fa76 2377 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2378 if (!(OP(scan) == DIGIT
3568d838 2379 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2380 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2381 {
a0ed51b3 2382 sayNO;
dfe13c55 2383 }
6f06b55f 2384 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2385 nextchr = UCHARAT(locinput);
2386 break;
2387 }
ffc61ed2 2388 if (!(OP(scan) == DIGIT
9442cb0e 2389 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2390 sayNO;
2391 nextchr = UCHARAT(++locinput);
2392 break;
d6a28714 2393 case NDIGITL:
b8c5462f
JH
2394 PL_reg_flags |= RF_tainted;
2395 /* FALL THROUGH */
d6a28714 2396 case NDIGIT:
9442cb0e 2397 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2398 sayNO;
1aa99e6b 2399 if (do_utf8) {
8269fa76 2400 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2401 if (OP(scan) == NDIGIT
3568d838 2402 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2403 : isDIGIT_LC_utf8((U8*)locinput))
2404 {
a0ed51b3 2405 sayNO;
9442cb0e 2406 }
6f06b55f 2407 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2408 nextchr = UCHARAT(locinput);
2409 break;
2410 }
ffc61ed2 2411 if (OP(scan) == NDIGIT
9442cb0e 2412 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2413 sayNO;
2414 nextchr = UCHARAT(++locinput);
2415 break;
2416 case CLUMP:
8269fa76 2417 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2418 if (locinput >= PL_regeol ||
2419 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2420 sayNO;
6f06b55f 2421 locinput += PL_utf8skip[nextchr];
3568d838
JH
2422 while (locinput < PL_regeol &&
2423 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2424 locinput += UTF8SKIP(locinput);
2425 if (locinput > PL_regeol)
2426 sayNO;
2427 nextchr = UCHARAT(locinput);
2428 break;
c8756f30 2429 case REFFL:
3280af22 2430 PL_reg_flags |= RF_tainted;
c8756f30 2431 /* FALL THROUGH */
c277df42 2432 case REF:
c8756f30 2433 case REFF:
c277df42 2434 n = ARG(scan); /* which paren pair */
cf93c79d 2435 ln = PL_regstartp[n];
2c2d71f5 2436 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2437 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2438 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2439 if (ln == PL_regendp[n])
a0d0e21e 2440 break;
a0ed51b3 2441
cf93c79d 2442 s = PL_bostr + ln;
1aa99e6b 2443 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2444 char *l = locinput;
cf93c79d 2445 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2446 /*
2447 * Note that we can't do the "other character" lookup trick as
2448 * in the 8-bit case (no pun intended) because in Unicode we
2449 * have to map both upper and title case to lower case.
2450 */
2451 if (OP(scan) == REFF) {
2452 while (s < e) {
2453 if (l >= PL_regeol)
2454 sayNO;
dfe13c55 2455 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2456 sayNO;
2457 s += UTF8SKIP(s);
2458 l += UTF8SKIP(l);
2459 }
2460 }
2461 else {
2462 while (s < e) {
2463 if (l >= PL_regeol)
2464 sayNO;
dfe13c55 2465 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2466 sayNO;
2467 s += UTF8SKIP(s);
2468 l += UTF8SKIP(l);
2469 }
2470 }
2471 locinput = l;
2472 nextchr = UCHARAT(locinput);
2473 break;
2474 }
2475
a0d0e21e 2476 /* Inline the first character, for speed. */
76e3520e 2477 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2478 (OP(scan) == REF ||
2479 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2480 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2481 sayNO;
cf93c79d 2482 ln = PL_regendp[n] - ln;
3280af22 2483 if (locinput + ln > PL_regeol)
4633a7c4 2484 sayNO;
c8756f30
AK
2485 if (ln > 1 && (OP(scan) == REF
2486 ? memNE(s, locinput, ln)
2487 : (OP(scan) == REFF
2488 ? ibcmp(s, locinput, ln)
2489 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2490 sayNO;
a0d0e21e 2491 locinput += ln;
76e3520e 2492 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2493 break;
2494
2495 case NOTHING:
c277df42 2496 case TAIL:
a0d0e21e
LW
2497 break;
2498 case BACK:
2499 break;
c277df42
IZ
2500 case EVAL:
2501 {
2502 dSP;
533c011a 2503 OP_4tree *oop = PL_op;
3280af22
NIS
2504 COP *ocurcop = PL_curcop;
2505 SV **ocurpad = PL_curpad;
c277df42 2506 SV *ret;
9041c2e3 2507
c277df42 2508 n = ARG(scan);
533c011a 2509 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2510 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2511 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2512 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2513
cea2e8a9 2514 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2515 SPAGAIN;
2516 ret = POPs;
2517 PUTBACK;
9041c2e3 2518
0f5d15d6
IZ
2519 PL_op = oop;
2520 PL_curpad = ocurpad;
2521 PL_curcop = ocurcop;
c277df42 2522 if (logical) {
0f5d15d6
IZ
2523 if (logical == 2) { /* Postponed subexpression. */
2524 regexp *re;
22c35a8c 2525 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2526 re_cc_state state;
0f5d15d6
IZ
2527 CHECKPOINT cp, lastcp;
2528
2529 if(SvROK(ret) || SvRMAGICAL(ret)) {
2530 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2531
2532 if(SvMAGICAL(sv))
14befaf4 2533 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2534 }
2535 if (mg) {
2536 re = (regexp *)mg->mg_obj;
df0003d4 2537 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2538 }
2539 else {
2540 STRLEN len;
2541 char *t = SvPV(ret, len);
2542 PMOP pm;
2543 char *oprecomp = PL_regprecomp;
2544 I32 osize = PL_regsize;
2545 I32 onpar = PL_regnpar;
2546
5fcd1c1b 2547 Zero(&pm, 1, PMOP);
cea2e8a9 2548 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2549 if (!(SvFLAGS(ret)
0f5d15d6 2550 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2551 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2552 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2553 PL_regprecomp = oprecomp;
2554 PL_regsize = osize;
2555 PL_regnpar = onpar;
2556 }
2557 DEBUG_r(
9041c2e3 2558 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2559 "Entering embedded `%s%.60s%s%s'\n",
2560 PL_colors[0],
2561 re->precomp,
2562 PL_colors[1],
2563 (strlen(re->precomp) > 60 ? "..." : ""))
2564 );
2565 state.node = next;
2566 state.prev = PL_reg_call_cc;
2567 state.cc = PL_regcc;
2568 state.re = PL_reg_re;
2569
2ab05381 2570 PL_regcc = 0;
9041c2e3 2571
0f5d15d6 2572 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2573 REGCP_SET(lastcp);
0f5d15d6
IZ
2574 cache_re(re);
2575 state.ss = PL_savestack_ix;
2576 *PL_reglastparen = 0;
a01268b5 2577 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2578 PL_reg_call_cc = &state;
2579 PL_reginput = locinput;
2c2d71f5
JH
2580
2581 /* XXXX This is too dramatic a measure... */
2582 PL_reg_maxiter = 0;
2583
0f5d15d6 2584 if (regmatch(re->program + 1)) {
2c914db6
IZ
2585 /* Even though we succeeded, we need to restore
2586 global variables, since we may be wrapped inside
2587 SUSPEND, thus the match may be not finished yet. */
2588
2589 /* XXXX Do this only if SUSPENDed? */
2590 PL_reg_call_cc = state.prev;
2591 PL_regcc = state.cc;
2592 PL_reg_re = state.re;
2593 cache_re(PL_reg_re);
2594
2595 /* XXXX This is too dramatic a measure... */
2596 PL_reg_maxiter = 0;
2597
2598 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2599 ReREFCNT_dec(re);
2600 regcpblow(cp);
2601 sayYES;
2602 }
0f5d15d6 2603 ReREFCNT_dec(re);
02db2b7b 2604 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2605 regcppop();
2606 PL_reg_call_cc = state.prev;
2607 PL_regcc = state.cc;
2608 PL_reg_re = state.re;
d3790889 2609 cache_re(PL_reg_re);
2c2d71f5
JH
2610
2611 /* XXXX This is too dramatic a measure... */
2612 PL_reg_maxiter = 0;
2613
0f5d15d6
IZ
2614 sayNO;
2615 }
c277df42 2616 sw = SvTRUE(ret);
0f5d15d6 2617 logical = 0;
a0ed51b3
LW
2618 }
2619 else
3280af22 2620 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2621 break;
2622 }
a0d0e21e 2623 case OPEN:
c277df42 2624 n = ARG(scan); /* which paren pair */
3280af22
NIS
2625 PL_reg_start_tmp[n] = locinput;
2626 if (n > PL_regsize)
2627 PL_regsize = n;
a0d0e21e
LW
2628 break;
2629 case CLOSE:
c277df42 2630 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2631 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2632 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2633 if (n > *PL_reglastparen)
2634 *PL_reglastparen = n;
a01268b5 2635 *PL_reglastcloseparen = n;
a0d0e21e 2636 break;
c277df42
IZ
2637 case GROUPP:
2638 n = ARG(scan); /* which paren pair */
cf93c79d 2639 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2640 break;
2641 case IFTHEN:
2c2d71f5 2642 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2643 if (sw)
2644 next = NEXTOPER(NEXTOPER(scan));
2645 else {
2646 next = scan + ARG(scan);
2647 if (OP(next) == IFTHEN) /* Fake one. */
2648 next = NEXTOPER(NEXTOPER(next));
2649 }
2650 break;
2651 case LOGICAL:
0f5d15d6 2652 logical = scan->flags;
c277df42 2653 break;
2ab05381
IZ
2654/*******************************************************************
2655 PL_regcc contains infoblock about the innermost (...)* loop, and
2656 a pointer to the next outer infoblock.
2657
2658 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2659
2660 1) After matching X, regnode for CURLYX is processed;
2661
9041c2e3 2662 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2663 regmatch() recursively with the starting point at WHILEM node;
2664
2665 3) Each hit of WHILEM node tries to match A and Z (in the order
2666 depending on the current iteration, min/max of {min,max} and
2667 greediness). The information about where are nodes for "A"
2668 and "Z" is read from the infoblock, as is info on how many times "A"
2669 was already matched, and greediness.
2670
2671 4) After A matches, the same WHILEM node is hit again.
2672
2673 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2674 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2675 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2676 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2677 of the external loop.
2678
2679 Currently present infoblocks form a tree with a stem formed by PL_curcc
2680 and whatever it mentions via ->next, and additional attached trees
2681 corresponding to temporarily unset infoblocks as in "5" above.
2682
9041c2e3 2683 In the following picture infoblocks for outer loop of
2ab05381
IZ
2684 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2685 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2686 infoblocks are drawn below the "reset" infoblock.
2687
2688 In fact in the picture below we do not show failed matches for Z and T
2689 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2690 more obvious *why* one needs to *temporary* unset infoblocks.]
2691
2692 Matched REx position InfoBlocks Comment
2693 (Y(A)*?Z)*?T x
2694 Y(A)*?Z)*?T x <- O
2695 Y (A)*?Z)*?T x <- O
2696 Y A)*?Z)*?T x <- O <- I
2697 YA )*?Z)*?T x <- O <- I
2698 YA A)*?Z)*?T x <- O <- I
2699 YAA )*?Z)*?T x <- O <- I
2700 YAA Z)*?T x <- O # Temporary unset I
2701 I
2702
2703 YAAZ Y(A)*?Z)*?T x <- O
2704 I
2705
2706 YAAZY (A)*?Z)*?T x <- O
2707 I
2708
2709 YAAZY A)*?Z)*?T x <- O <- I
2710 I
2711
2712 YAAZYA )*?Z)*?T x <- O <- I
2713 I
2714
2715 YAAZYA Z)*?T x <- O # Temporary unset I
2716 I,I
2717
2718 YAAZYAZ )*?T x <- O
2719 I,I
2720
2721 YAAZYAZ T x # Temporary unset O
2722 O
2723 I,I
2724
2725 YAAZYAZT x
2726 O
2727 I,I
2728 *******************************************************************/
a0d0e21e
LW
2729 case CURLYX: {
2730 CURCUR cc;
3280af22 2731 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2732 /* No need to save/restore up to this paren */
2733 I32 parenfloor = scan->flags;
c277df42
IZ
2734
2735 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2736 next += ARG(next);
3280af22
NIS
2737 cc.oldcc = PL_regcc;
2738 PL_regcc = &cc;
cb434fcc
IZ
2739 /* XXXX Probably it is better to teach regpush to support
2740 parenfloor > PL_regsize... */
2741 if (parenfloor > *PL_reglastparen)
2742 parenfloor = *PL_reglastparen; /* Pessimization... */
2743 cc.parenfloor = parenfloor;
a0d0e21e
LW
2744 cc.cur = -1;
2745 cc.min = ARG1(scan);
2746 cc.max = ARG2(scan);
c277df42 2747 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2748 cc.next = next;
2749 cc.minmod = minmod;
2750 cc.lastloc = 0;
3280af22 2751 PL_reginput = locinput;
a0d0e21e
LW
2752 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2753 regcpblow(cp);
3280af22 2754 PL_regcc = cc.oldcc;
4633a7c4 2755 saySAME(n);
a0d0e21e
LW
2756 }
2757 /* NOT REACHED */
2758 case WHILEM: {
2759 /*
2760 * This is really hard to understand, because after we match
2761 * what we're trying to match, we must make sure the rest of
2c2d71f5 2762 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2763 * to go back UP the parse tree by recursing ever deeper. And
2764 * if it fails, we have to reset our parent's current state
2765 * that we can try again after backing off.
2766 */
2767
c277df42 2768 CHECKPOINT cp, lastcp;
3280af22 2769 CURCUR* cc = PL_regcc;
c277df42
IZ
2770 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2771
4633a7c4 2772 n = cc->cur + 1; /* how many we know we matched */
3280af22 2773 PL_reginput = locinput;
a0d0e21e 2774
c277df42 2775 DEBUG_r(
9041c2e3
NIS
2776 PerlIO_printf(Perl_debug_log,
2777 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2778 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2779 (long)n, (long)cc->min,
c277df42
IZ
2780 (long)cc->max, (long)cc)
2781 );
4633a7c4 2782
a0d0e21e
LW
2783 /* If degenerate scan matches "", assume scan done. */
2784
579cf2c3 2785 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2786 PL_regcc = cc->oldcc;
2ab05381
IZ
2787 if (PL_regcc)
2788 ln = PL_regcc->cur;
c277df42 2789 DEBUG_r(
c3464db5
DD
2790 PerlIO_printf(Perl_debug_log,
2791 "%*s empty match detected, try continuation...\n",
3280af22 2792 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2793 );
a0d0e21e 2794 if (regmatch(cc->next))
4633a7c4 2795 sayYES;
2ab05381
IZ
2796 if (PL_regcc)
2797 PL_regcc->cur = ln;
3280af22 2798 PL_regcc = cc;
4633a7c4 2799 sayNO;
a0d0e21e
LW
2800 }
2801
2802 /* First just match a string of min scans. */
2803
2804 if (n < cc->min) {
2805 cc->cur = n;
2806 cc->lastloc = locinput;
4633a7c4
LW
2807 if (regmatch(cc->scan))
2808 sayYES;
2809 cc->cur = n - 1;
c277df42 2810 cc->lastloc = lastloc;
4633a7c4 2811 sayNO;
a0d0e21e
LW
2812 }
2813
2c2d71f5
JH
2814 if (scan->flags) {
2815 /* Check whether we already were at this position.
2816 Postpone detection until we know the match is not
2817 *that* much linear. */
2818 if (!PL_reg_maxiter) {
2819 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2820 PL_reg_leftiter = PL_reg_maxiter;
2821 }
2822 if (PL_reg_leftiter-- == 0) {
2823 I32 size = (PL_reg_maxiter + 7)/8;
2824 if (PL_reg_poscache) {
2825 if (PL_reg_poscache_size < size) {
2826 Renew(PL_reg_poscache, size, char);
2827 PL_reg_poscache_size = size;
2828 }
2829 Zero(PL_reg_poscache, size, char);
2830 }
2831 else {
2832 PL_reg_poscache_size = size;
2833 Newz(29, PL_reg_poscache, size, char);
2834 }
2835 DEBUG_r(
2836 PerlIO_printf(Perl_debug_log,
2837 "%sDetected a super-linear match, switching on caching%s...\n",
2838 PL_colors[4], PL_colors[5])
2839 );
2840 }
2841 if (PL_reg_leftiter < 0) {
2842 I32 o = locinput - PL_bostr, b;
2843
2844 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2845 b = o % 8;
2846 o /= 8;
2847 if (PL_reg_poscache[o] & (1<<b)) {
2848 DEBUG_r(
2849 PerlIO_printf(Perl_debug_log,
2850 "%*s already tried at this position...\n",
2851 REPORT_CODE_OFF+PL_regindent*2, "")
2852 );
7821416a 2853 sayNO_SILENT;
2c2d71f5
JH
2854 }
2855 PL_reg_poscache[o] |= (1<<b);
2856 }
2857 }
2858
a0d0e21e
LW
2859 /* Prefer next over scan for minimal matching. */
2860
2861 if (cc->minmod) {
3280af22 2862 PL_regcc = cc->oldcc;
2ab05381
IZ
2863 if (PL_regcc)
2864 ln = PL_regcc->cur;
5f05dabc 2865 cp = regcppush(cc->parenfloor);
02db2b7b 2866 REGCP_SET(lastcp);
5f05dabc 2867 if (regmatch(cc->next)) {
c277df42 2868 regcpblow(cp);
4633a7c4 2869 sayYES; /* All done. */
5f05dabc 2870 }
02db2b7b 2871 REGCP_UNWIND(lastcp);
5f05dabc 2872 regcppop();
2ab05381
IZ
2873 if (PL_regcc)
2874 PL_regcc->cur = ln;
3280af22 2875 PL_regcc = cc;
a0d0e21e 2876
c277df42 2877 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2878 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2879 && !(PL_reg_flags & RF_warned)) {
2880 PL_reg_flags |= RF_warned;
e476b1b5 2881 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2882 "Complex regular subexpression recursion",
2883 REG_INFTY - 1);
c277df42 2884 }
4633a7c4 2885 sayNO;
c277df42 2886 }
a687059c 2887
c277df42 2888 DEBUG_r(
c3464db5
DD
2889 PerlIO_printf(Perl_debug_log,
2890 "%*s trying longer...\n",
3280af22 2891 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2892 );
a0d0e21e 2893 /* Try scanning more and see if it helps. */
3280af22 2894 PL_reginput = locinput;
a0d0e21e
LW
2895 cc->cur = n;
2896 cc->lastloc = locinput;
5f05dabc 2897 cp = regcppush(cc->parenfloor);
02db2b7b 2898 REGCP_SET(lastcp);
5f05dabc 2899 if (regmatch(cc->scan)) {
c277df42 2900 regcpblow(cp);
4633a7c4 2901 sayYES;
5f05dabc 2902 }
02db2b7b 2903 REGCP_UNWIND(lastcp);
5f05dabc 2904 regcppop();
4633a7c4 2905 cc->cur = n - 1;
c277df42 2906 cc->lastloc = lastloc;
4633a7c4 2907 sayNO;
a0d0e21e
LW
2908 }
2909
2910 /* Prefer scan over next for maximal matching. */
2911
2912 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2913 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2914 cc->cur = n;
2915 cc->lastloc = locinput;
02db2b7b 2916 REGCP_SET(lastcp);
5f05dabc 2917 if (regmatch(cc->scan)) {
c277df42 2918 regcpblow(cp);
4633a7c4 2919 sayYES;
5f05dabc 2920 }
02db2b7b 2921 REGCP_UNWIND(lastcp);
a0d0e21e 2922 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2923 PL_reginput = locinput;
c277df42 2924 DEBUG_r(
c3464db5
DD
2925 PerlIO_printf(Perl_debug_log,
2926 "%*s failed, try continuation...\n",
3280af22 2927 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2928 );
2929 }
9041c2e3 2930 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2931 && !(PL_reg_flags & RF_warned)) {
3280af22 2932 PL_reg_flags |= RF_warned;
e476b1b5 2933 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2934 "Complex regular subexpression recursion",
2935 REG_INFTY - 1);
a0d0e21e
LW
2936 }
2937
2938 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2939 PL_regcc = cc->oldcc;
2ab05381
IZ
2940 if (PL_regcc)
2941 ln = PL_regcc->cur;
a0d0e21e 2942 if (regmatch(cc->next))
4633a7c4 2943 sayYES;
2ab05381
IZ
2944 if (PL_regcc)
2945 PL_regcc->cur = ln;
3280af22 2946 PL_regcc = cc;
4633a7c4 2947 cc->cur = n - 1;
c277df42 2948 cc->lastloc = lastloc;
4633a7c4 2949 sayNO;
a0d0e21e
LW
2950 }
2951 /* NOT REACHED */
9041c2e3 2952 case BRANCHJ:
c277df42
IZ
2953 next = scan + ARG(scan);
2954 if (next == scan)
2955 next = NULL;
2956 inner = NEXTOPER(NEXTOPER(scan));
2957 goto do_branch;
9041c2e3 2958 case BRANCH:
c277df42
IZ
2959 inner = NEXTOPER(scan);
2960 do_branch:
2961 {
c277df42
IZ
2962 c1 = OP(scan);
2963 if (OP(next) != c1) /* No choice. */
2964 next = inner; /* Avoid recursion. */
a0d0e21e 2965 else {
02db2b7b
IZ
2966 I32 lastparen = *PL_reglastparen;
2967 I32 unwind1;
2968 re_unwind_branch_t *uw;
2969
2970 /* Put unwinding data on stack */
2971 unwind1 = SSNEWt(1,re_unwind_branch_t);
2972 uw = SSPTRt(unwind1,re_unwind_branch_t);
2973 uw->prev = unwind;
2974 unwind = unwind1;
2975 uw->type = ((c1 == BRANCH)
2976 ? RE_UNWIND_BRANCH
2977 : RE_UNWIND_BRANCHJ);
2978 uw->lastparen = lastparen;
2979 uw->next = next;
2980 uw->locinput = locinput;
2981 uw->nextchr = nextchr;
2982#ifdef DEBUGGING
2983 uw->regindent = ++PL_regindent;
2984#endif
c277df42 2985
02db2b7b
IZ
2986 REGCP_SET(uw->lastcp);
2987
2988 /* Now go into the first branch */
2989 next = inner;
a687059c 2990 }
a0d0e21e
LW
2991 }
2992 break;
2993 case MINMOD:
2994 minmod = 1;
2995 break;
c277df42
IZ
2996 case CURLYM:
2997 {
00db4c45 2998 I32 l = 0;
c277df42 2999 CHECKPOINT lastcp;
9041c2e3 3000
c277df42
IZ
3001 /* We suppose that the next guy does not need
3002 backtracking: in particular, it is of constant length,
3003 and has no parenths to influence future backrefs. */
3004 ln = ARG1(scan); /* min to match */
3005 n = ARG2(scan); /* max to match */
c277df42
IZ
3006 paren = scan->flags;
3007 if (paren) {
3280af22
NIS
3008 if (paren > PL_regsize)
3009 PL_regsize = paren;
3010 if (paren > *PL_reglastparen)
3011 *PL_reglastparen = paren;
c277df42 3012 }
dc45a647 3013 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3014 if (paren)
3015 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3016 PL_reginput = locinput;
c277df42
IZ
3017 if (minmod) {
3018 minmod = 0;
3019 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3020 sayNO;
5f4b28b2 3021 if (ln && l == 0 && n >= ln
c277df42
IZ
3022 /* In fact, this is tricky. If paren, then the
3023 fact that we did/didnot match may influence
3024 future execution. */
3025 && !(paren && ln == 0))
3026 ln = n;
3280af22 3027 locinput = PL_reginput;
22c35a8c 3028 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3029 c1 = (U8)*STRING(next);
c277df42 3030 if (OP(next) == EXACTF)
22c35a8c 3031 c2 = PL_fold[c1];
c277df42 3032 else if (OP(next) == EXACTFL)
22c35a8c 3033 c2 = PL_fold_locale[c1];
c277df42
IZ
3034 else
3035 c2 = c1;
a0ed51b3
LW
3036 }
3037 else
c277df42 3038 c1 = c2 = -1000;
02db2b7b 3039 REGCP_SET(lastcp);
5f4b28b2 3040 /* This may be improved if l == 0. */
c277df42
IZ
3041 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3042 /* If it could work, try it. */
3043 if (c1 == -1000 ||
3280af22
NIS
3044 UCHARAT(PL_reginput) == c1 ||
3045 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3046 {
3047 if (paren) {
3048 if (n) {
cf93c79d
IZ
3049 PL_regstartp[paren] =
3050 HOPc(PL_reginput, -l) - PL_bostr;
3051 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3052 }
3053 else
cf93c79d 3054 PL_regendp[paren] = -1;
c277df42
IZ
3055 }
3056 if (regmatch(next))
3057 sayYES;
02db2b7b 3058 REGCP_UNWIND(lastcp);
c277df42
IZ
3059 }
3060 /* Couldn't or didn't -- move forward. */
3280af22 3061 PL_reginput = locinput;
c277df42
IZ
3062 if (regrepeat_hard(scan, 1, &l)) {
3063 ln++;
3280af22 3064 locinput = PL_reginput;
c277df42
IZ
3065 }
3066 else
3067 sayNO;
3068 }
a0ed51b3
LW
3069 }
3070 else {
c277df42
IZ
3071 n = regrepeat_hard(scan, n, &l);
3072 if (n != 0 && l == 0
3073 /* In fact, this is tricky. If paren, then the
3074 fact that we did/didnot match may influence
3075 future execution. */
3076 && !(paren && ln == 0))
3077 ln = n;
3280af22 3078 locinput = PL_reginput;
c277df42 3079 DEBUG_r(
5c0ca799 3080 PerlIO_printf(Perl_debug_log,
faccc32b 3081 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3082 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3083 (IV) n, (IV)l)
c277df42
IZ
3084 );
3085 if (n >= ln) {
22c35a8c 3086 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3087 c1 = (U8)*STRING(next);
c277df42 3088 if (OP(next) == EXACTF)
22c35a8c 3089 c2 = PL_fold[c1];
c277df42 3090 else if (OP(next) == EXACTFL)
22c35a8c 3091 c2 = PL_fold_locale[c1];
c277df42
IZ
3092 else
3093 c2 = c1;
a0ed51b3
LW
3094 }
3095 else
c277df42
IZ
3096 c1 = c2 = -1000;
3097 }
02db2b7b 3098 REGCP_SET(lastcp);
c277df42
IZ
3099 while (n >= ln) {
3100 /* If it could work, try it. */
3101 if (c1 == -1000 ||
3280af22
NIS
3102 UCHARAT(PL_reginput) == c1 ||
3103 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3104 {
3105 DEBUG_r(
c3464db5 3106 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3107 "%*s trying tail with n=%"IVdf"...\n",
3108 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3109 );
3110 if (paren) {
3111 if (n) {
cf93c79d
IZ
3112 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3113 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3114 }
a0ed51b3 3115 else
cf93c79d 3116 PL_regendp[paren] = -1;
c277df42 3117 }
a0ed51b3
LW
3118 if (regmatch(next))
3119 sayYES;
02db2b7b 3120 REGCP_UNWIND(lastcp);
a0ed51b3 3121 }
c277df42
IZ
3122 /* Couldn't or didn't -- back up. */
3123 n--;
dfe13c55 3124 locinput = HOPc(locinput, -l);
3280af22 3125 PL_reginput = locinput;
c277df42
IZ
3126 }
3127 }
3128 sayNO;
3129 break;
3130 }
3131 case CURLYN:
3132 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3133 if (paren > PL_regsize)
3134 PL_regsize = paren;
3135 if (paren > *PL_reglastparen)
3136 *PL_reglastparen = paren;
c277df42
IZ
3137 ln = ARG1(scan); /* min to match */
3138 n = ARG2(scan); /* max to match */
dc45a647 3139 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3140 goto repeat;
a0d0e21e 3141 case CURLY:
c277df42 3142 paren = 0;
a0d0e21e
LW
3143 ln = ARG1(scan); /* min to match */
3144 n = ARG2(scan); /* max to match */
dc45a647 3145 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3146 goto repeat;
3147 case STAR:
3148 ln = 0;
c277df42 3149 n = REG_INFTY;
a0d0e21e 3150 scan = NEXTOPER(scan);
c277df42 3151 paren = 0;
a0d0e21e
LW
3152 goto repeat;
3153 case PLUS:
c277df42
IZ
3154 ln = 1;
3155 n = REG_INFTY;
3156 scan = NEXTOPER(scan);
3157 paren = 0;
3158 repeat:
a0d0e21e
LW
3159 /*
3160 * Lookahead to avoid useless match attempts
3161 * when we know what character comes next.
3162 */
22c35a8c 3163 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3164 U8 *s = (U8*)STRING(next);
3165 if (!UTF) {
3166 c2 = c1 = *s;
3167 if (OP(next) == EXACTF)
3168 c2 = PL_fold[c1];
3169 else if (OP(next) == EXACTFL)
3170 c2 = PL_fold_locale[c1];
3171 }
3172 else { /* UTF */
3173 if (OP(next) == EXACTF) {
3174 c1 = to_utf8_lower(s);
3175 c2 = to_utf8_upper(s);
3176 }
3177 else {
9041c2e3 3178 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3179 }
3180 }
bbce6d69 3181 }
a0d0e21e 3182 else
bbce6d69 3183 c1 = c2 = -1000;
3280af22 3184 PL_reginput = locinput;
a0d0e21e 3185 if (minmod) {
c277df42 3186 CHECKPOINT lastcp;
a0d0e21e
LW
3187 minmod = 0;
3188 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3189 sayNO;
a0ed51b3 3190 locinput = PL_reginput;
02db2b7b 3191 REGCP_SET(lastcp);
0fe9bf95 3192 if (c1 != -1000) {
1aa99e6b 3193 char *e; /* Should not check after this */
0fe9bf95
IZ
3194 char *old = locinput;
3195
1aa99e6b 3196 if (n == REG_INFTY) {
0fe9bf95 3197 e = PL_regeol - 1;
1aa99e6b
IH
3198 if (do_utf8)
3199 while (UTF8_IS_CONTINUATION(*(U8*)e))
3200 e--;
3201 }
3202 else if (do_utf8) {
3203 int m = n - ln;
3204 for (e = locinput;
3205 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3206 e += UTF8SKIP(e);
3207 }
3208 else {
3209 e = locinput + n - ln;
3210 if (e >= PL_regeol)
3211 e = PL_regeol - 1;
3212 }
0fe9bf95 3213 while (1) {
1aa99e6b 3214 int count;
0fe9bf95 3215 /* Find place 'next' could work */
1aa99e6b
IH
3216 if (!do_utf8) {
3217 if (c1 == c2) {
3218 while (locinput <= e && *locinput != c1)
3219 locinput++;
3220 } else {
9041c2e3 3221 while (locinput <= e
1aa99e6b
IH
3222 && *locinput != c1
3223 && *locinput != c2)
3224 locinput++;
3225 }
3226 count = locinput - old;
3227 }
3228 else {
3229 STRLEN len;
3230 if (c1 == c2) {
3231 for (count = 0;
3232 locinput <= e &&
9041c2e3 3233 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3234 count++)
3235 locinput += len;
3236
3237 } else {
3238 for (count = 0; locinput <= e; count++) {
9041c2e3 3239 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3240 if (c == c1 || c == c2)
3241 break;
9041c2e3 3242 locinput += len;
1aa99e6b
IH
3243 }
3244 }
0fe9bf95 3245 }
9041c2e3 3246 if (locinput > e)
0fe9bf95
IZ
3247 sayNO;
3248 /* PL_reginput == old now */
3249 if (locinput != old) {
3250 ln = 1; /* Did some */
1aa99e6b 3251 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3252 sayNO;
3253 }
3254 /* PL_reginput == locinput now */
29d1e993 3255 TRYPAREN(paren, ln, locinput);
0fe9bf95 3256 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3257 REGCP_UNWIND(lastcp);
0fe9bf95 3258 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3259 old = locinput;
3260 if (do_utf8)
3261 locinput += UTF8SKIP(locinput);
3262 else
3263 locinput++;
0fe9bf95
IZ
3264 }
3265 }
3266 else
c277df42 3267 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3268 UV c;
3269 if (c1 != -1000) {
3270 if (do_utf8)
9041c2e3 3271 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3272 else
9041c2e3 3273 c = UCHARAT(PL_reginput);
2390ecbc
PP
3274 /* If it could work, try it. */
3275 if (c == c1 || c == c2)
3276 {
3277 TRYPAREN(paren, n, PL_reginput);
3278 REGCP_UNWIND(lastcp);
3279 }
1aa99e6b 3280 }
a0d0e21e 3281 /* If it could work, try it. */
2390ecbc 3282 else if (c1 == -1000)
bbce6d69 3283 {
29d1e993 3284 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3285 REGCP_UNWIND(lastcp);
bbce6d69 3286 }
c277df42 3287 /* Couldn't or didn't -- move forward. */
a0ed51b3 3288 PL_reginput = locinput;
a0d0e21e
LW
3289 if (regrepeat(scan, 1)) {
3290 ln++;
a0ed51b3
LW
3291 locinput = PL_reginput;
3292 }
3293 else
4633a7c4 3294 sayNO;
a0d0e21e
LW
3295 }
3296 }
3297 else {
c277df42 3298 CHECKPOINT lastcp;
a0d0e21e 3299 n = regrepeat(scan, n);
a0ed51b3 3300 locinput = PL_reginput;
22c35a8c 3301 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3302 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3303 ln = n; /* why back off? */
1aeab75a
GS
3304 /* ...because $ and \Z can match before *and* after
3305 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3306 We should back off by one in this case. */
3307 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3308 ln--;
3309 }
02db2b7b 3310 REGCP_SET(lastcp);
c277df42 3311 if (paren) {
8fa7f367 3312 UV c = 0;
c277df42 3313 while (n >= ln) {
1aa99e6b
IH
3314 if (c1 != -1000) {
3315 if (do_utf8)
9041c2e3 3316 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3317 else
9041c2e3 3318 c = UCHARAT(PL_reginput);
1aa99e6b 3319 }
c277df42 3320 /* If it could work, try it. */
1aa99e6b 3321 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3322 {
29d1e993 3323 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3324 REGCP_UNWIND(lastcp);
c277df42
IZ
3325 }
3326 /* Couldn't or didn't -- back up. */
3327 n--;
dfe13c55 3328 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3329 }
a0ed51b3
LW
3330 }
3331 else {
8fa7f367 3332 UV c = 0;
c277df42 3333 while (n >= ln) {
1aa99e6b
IH
3334 if (c1 != -1000) {
3335 if (do_utf8)
9041c2e3 3336 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3337 else
9041c2e3 3338 c = UCHARAT(PL_reginput);
1aa99e6b 3339 }
c277df42 3340 /* If it could work, try it. */
1aa99e6b 3341 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3342 {
29d1e993 3343 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3344 REGCP_UNWIND(lastcp);
c277df42
IZ
3345 }
3346 /* Couldn't or didn't -- back up. */
3347 n--;
dfe13c55 3348 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3349 }
a0d0e21e
LW
3350 }
3351 }
4633a7c4 3352 sayNO;
c277df42 3353 break;
a0d0e21e 3354 case END:
0f5d15d6
IZ
3355 if (PL_reg_call_cc) {
3356 re_cc_state *cur_call_cc = PL_reg_call_cc;
3357 CURCUR *cctmp = PL_regcc;
3358 regexp *re = PL_reg_re;
3359 CHECKPOINT cp, lastcp;
3360
3361 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3362 REGCP_SET(lastcp);
0f5d15d6
IZ
3363 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3364 the caller. */
3365 PL_reginput = locinput; /* Make position available to
3366 the callcc. */
3367 cache_re(PL_reg_call_cc->re);
3368 PL_regcc = PL_reg_call_cc->cc;
3369 PL_reg_call_cc = PL_reg_call_cc->prev;
3370 if (regmatch(cur_call_cc->node)) {
3371 PL_reg_call_cc = cur_call_cc;
3372 regcpblow(cp);
3373 sayYES;
3374 }
02db2b7b 3375 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3376 regcppop();
3377 PL_reg_call_cc = cur_call_cc;
3378 PL_regcc = cctmp;
3379 PL_reg_re = re;
3380 cache_re(re);
3381
3382 DEBUG_r(
3383 PerlIO_printf(Perl_debug_log,
3384 "%*s continuation failed...\n",
3385 REPORT_CODE_OFF+PL_regindent*2, "")
3386 );
7821416a 3387 sayNO_SILENT;
0f5d15d6 3388 }
7821416a
IZ
3389 if (locinput < PL_regtill) {
3390 DEBUG_r(PerlIO_printf(Perl_debug_log,
3391 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3392 PL_colors[4],
3393 (long)(locinput - PL_reg_starttry),
3394 (long)(PL_regtill - PL_reg_starttry),
3395 PL_colors[5]));
3396 sayNO_FINAL; /* Cannot match: too short. */
3397 }
3398 PL_reginput = locinput; /* put where regtry can find it */
3399 sayYES_FINAL; /* Success! */
7e5428c5 3400 case SUCCEED:
3280af22 3401 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3402 sayYES_LOUD; /* Success! */
c277df42
IZ
3403 case SUSPEND:
3404 n = 1;
9fe1d20c 3405 PL_reginput = locinput;
9041c2e3 3406 goto do_ifmatch;
a0d0e21e 3407 case UNLESSM:
c277df42 3408 n = 0;
a0ed51b3 3409 if (scan->flags) {
efb30f32
HS
3410 s = HOPBACKc(locinput, scan->flags);
3411 if (!s)
3412 goto say_yes;
3413 PL_reginput = s;
a0ed51b3
LW
3414 }
3415 else
3416 PL_reginput = locinput;
c277df42
IZ
3417 goto do_ifmatch;
3418 case IFMATCH:
3419 n = 1;
a0ed51b3 3420 if (scan->flags) {
efb30f32
HS
3421 s = HOPBACKc(locinput, scan->flags);
3422 if (!s)
3423 goto say_no;
3424 PL_reginput = s;
a0ed51b3
LW
3425 }
3426 else
3427 PL_reginput = locinput;
3428
c277df42 3429 do_ifmatch:
c277df42
IZ
3430 inner = NEXTOPER(NEXTOPER(scan));
3431 if (regmatch(inner) != n) {
3432 say_no:
3433 if (logical) {
3434 logical = 0;
3435 sw = 0;
3436 goto do_longjump;
a0ed51b3
LW
3437 }
3438 else
c277df42
IZ
3439 sayNO;
3440 }
3441 say_yes:
3442 if (logical) {
3443 logical = 0;
3444 sw = 1;
3445 }
fe44a5e8 3446 if (OP(scan) == SUSPEND) {
3280af22 3447 locinput = PL_reginput;
565764a8 3448 nextchr = UCHARAT(locinput);
fe44a5e8 3449 }
c277df42
IZ
3450 /* FALL THROUGH. */
3451 case LONGJMP:
3452 do_longjump:
3453 next = scan + ARG(scan);
3454 if (next == scan)
3455 next = NULL;
a0d0e21e
LW
3456 break;
3457 default:
b900a521 3458 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3459 PTR2UV(scan), OP(scan));
cea2e8a9 3460 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3461 }
02db2b7b 3462 reenter:
a0d0e21e
LW
3463 scan = next;
3464 }
a687059c 3465
a0d0e21e
LW
3466 /*
3467 * We get here only if there's trouble -- normally "case END" is
3468 * the terminating point.
3469 */
cea2e8a9 3470 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3471 /*NOTREACHED*/
4633a7c4
LW
3472 sayNO;
3473
7821416a
IZ
3474yes_loud:
3475 DEBUG_r(
3476 PerlIO_printf(Perl_debug_log,
3477 "%*s %scould match...%s\n",
3478 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3479 );
3480 goto yes;
3481yes_final:
3482 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3483 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3484yes:
3485#ifdef DEBUGGING
3280af22 3486 PL_regindent--;
4633a7c4 3487#endif
02db2b7b
IZ
3488
3489#if 0 /* Breaks $^R */
3490 if (unwind)
3491 regcpblow(firstcp);
3492#endif
4633a7c4
LW
3493 return 1;
3494
3495no:
7821416a
IZ
3496 DEBUG_r(
3497 PerlIO_printf(Perl_debug_log,
3498 "%*s %sfailed...%s\n",
3499 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3500 );
3501 goto do_no;
3502no_final:
3503do_no:
02db2b7b
IZ
3504 if (unwind) {
3505 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3506
3507 switch (uw->type) {
3508 case RE_UNWIND_BRANCH:
3509 case RE_UNWIND_BRANCHJ:
3510 {
3511 re_unwind_branch_t *uwb = &(uw->branch);
3512 I32 lastparen = uwb->lastparen;
9041c2e3 3513
02db2b7b
IZ
3514 REGCP_UNWIND(uwb->lastcp);
3515 for (n = *PL_reglastparen; n > lastparen; n--)
3516 PL_regendp[n] = -1;
3517 *PL_reglastparen = n;
3518 scan = next = uwb->next;
9041c2e3
NIS
3519 if ( !scan ||
3520 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3521 ? BRANCH : BRANCHJ) ) { /* Failure */
3522 unwind = uwb->prev;
3523#ifdef DEBUGGING
3524 PL_regindent--;
3525#endif
3526 goto do_no;
3527 }
3528 /* Have more choice yet. Reuse the same uwb. */
3529 /*SUPPRESS 560*/
3530 if ((n = (uwb->type == RE_UNWIND_BRANCH
3531 ? NEXT_OFF(next) : ARG(next))))
3532 next += n;
3533 else
3534 next = NULL; /* XXXX Needn't unwinding in this case... */
3535 uwb->next = next;
3536 next = NEXTOPER(scan);
3537 if (uwb->type == RE_UNWIND_BRANCHJ)
3538 next = NEXTOPER(next);
3539 locinput = uwb->locinput;
3540 nextchr = uwb->nextchr;
3541#ifdef DEBUGGING
3542 PL_regindent = uwb->regindent;
3543#endif
3544
3545 goto reenter;
3546 }
3547 /* NOT REACHED */
3548 default:
3549 Perl_croak(aTHX_ "regexp unwind memory corruption");
3550 }
3551 /* NOT REACHED */
3552 }
4633a7c4 3553#ifdef DEBUGGING
3280af22 3554 PL_regindent--;
4633a7c4 3555#endif
a0d0e21e 3556 return 0;
a687059c
LW
3557}
3558
3559/*
3560 - regrepeat - repeatedly match something simple, report how many
3561 */
3562/*
3563 * [This routine now assumes that it will only match on things of length 1.
3564 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3565 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3566 */
76e3520e 3567STATIC I32
cea2e8a9 3568S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3569{
a0d0e21e 3570 register char *scan;
a0d0e21e 3571 register I32 c;
3280af22 3572 register char *loceol = PL_regeol;
a0ed51b3 3573 register I32 hardcount = 0;
ffc61ed2 3574 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3575
3280af22 3576 scan = PL_reginput;
c277df42 3577 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3578 loceol = scan + max;
a0d0e21e 3579 switch (OP(p)) {
22c35a8c 3580 case REG_ANY:
1aa99e6b 3581 if (do_utf8) {
ffc61ed2 3582 loceol = PL_regeol;
1aa99e6b 3583 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3584 scan += UTF8SKIP(scan);
3585 hardcount++;
3586 }
3587 } else {
3588 while (scan < loceol && *scan != '\n')
3589 scan++;
a0ed51b3
LW
3590 }
3591 break;
ffc61ed2 3592 case SANY:
3baa4c62 3593 scan = loceol;
a0ed51b3 3594 break;
bbce6d69 3595 case EXACT: /* length of string is 1 */
cd439c50 3596 c = (U8)*STRING(p);