This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: perlintro.pod
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH \
135)
136
137#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn))
138
139#define NEXT_IMPT(rn) STMT_START { \
140 while (JUMPABLE(rn)) \
141 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \
142 rn = NEXTOPER(NEXTOPER(rn)); \
143 else rn += NEXT_OFF(rn); \
5f80c4cf 144} STMT_END
74750237 145
acfe0abc 146static void restore_pos(pTHX_ void *arg);
51371543 147
76e3520e 148STATIC CHECKPOINT
cea2e8a9 149S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 150{
3280af22 151 int retval = PL_savestack_ix;
b1ce53c5
JH
152#define REGCP_PAREN_ELEMS 4
153 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
154 int p;
155
e49a9654
IH
156 if (paren_elems_to_push < 0)
157 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
158
a01268b5 159#define REGCP_OTHER_ELEMS 6
b1ce53c5 160 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 161 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 162/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
163 SSPUSHINT(PL_regendp[p]);
164 SSPUSHINT(PL_regstartp[p]);
3280af22 165 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
166 SSPUSHINT(p);
167 }
b1ce53c5 168/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
169 SSPUSHINT(PL_regsize);
170 SSPUSHINT(*PL_reglastparen);
a01268b5 171 SSPUSHINT(*PL_reglastcloseparen);
3280af22 172 SSPUSHPTR(PL_reginput);
41123dfd
JH
173#define REGCP_FRAME_ELEMS 2
174/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
175 * are needed for the regexp context stack bookkeeping. */
176 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 177 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 178
a0d0e21e
LW
179 return retval;
180}
181
c277df42 182/* These are needed since we do not localize EVAL nodes: */
02db2b7b 183# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 184 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 185 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 186
02db2b7b 187# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 188 PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 190 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 191
76e3520e 192STATIC char *
cea2e8a9 193S_regcppop(pTHX)
a0d0e21e 194{
b1ce53c5 195 I32 i;
a0d0e21e
LW
196 U32 paren = 0;
197 char *input;
cf93c79d 198 I32 tmps;
b1ce53c5
JH
199
200 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 201 i = SSPOPINT;
b1ce53c5
JH
202 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
203 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 204 input = (char *) SSPOPPTR;
a01268b5 205 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
206 *PL_reglastparen = SSPOPINT;
207 PL_regsize = SSPOPINT;
b1ce53c5
JH
208
209 /* Now restore the parentheses context. */
41123dfd
JH
210 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
211 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 212 paren = (U32)SSPOPINT;
3280af22 213 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
214 PL_regstartp[paren] = SSPOPINT;
215 tmps = SSPOPINT;
3280af22
NIS
216 if (paren <= *PL_reglastparen)
217 PL_regendp[paren] = tmps;
c277df42 218 DEBUG_r(
c3464db5 219 PerlIO_printf(Perl_debug_log,
b900a521 220 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 221 (UV)paren, (IV)PL_regstartp[paren],
b900a521 222 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 223 (IV)PL_regendp[paren],
3280af22 224 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 225 );
a0d0e21e 226 }
c277df42 227 DEBUG_r(
3280af22 228 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 229 PerlIO_printf(Perl_debug_log,
faccc32b
JH
230 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
231 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
232 }
233 );
daf18116 234#if 1
dafc8851
JH
235 /* It would seem that the similar code in regtry()
236 * already takes care of this, and in fact it is in
237 * a better location to since this code can #if 0-ed out
238 * but the code in regtry() is needed or otherwise tests
239 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
240 * (as of patchlevel 7877) will fail. Then again,
241 * this code seems to be necessary or otherwise
242 * building DynaLoader will fail:
243 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
244 * --jhi */
3280af22
NIS
245 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
246 if (paren > PL_regsize)
cf93c79d
IZ
247 PL_regstartp[paren] = -1;
248 PL_regendp[paren] = -1;
a0d0e21e 249 }
dafc8851 250#endif
a0d0e21e
LW
251 return input;
252}
253
0f5d15d6 254STATIC char *
cea2e8a9 255S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
256{
257 I32 tmp = PL_savestack_ix;
258
259 PL_savestack_ix = ss;
260 regcppop();
261 PL_savestack_ix = tmp;
942e002e 262 return Nullch;
0f5d15d6
IZ
263}
264
265typedef struct re_cc_state
266{
267 I32 ss;
268 regnode *node;
269 struct re_cc_state *prev;
270 CURCUR *cc;
271 regexp *re;
272} re_cc_state;
273
02db2b7b 274#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 275
29d1e993
HS
276#define TRYPAREN(paren, n, input) { \
277 if (paren) { \
278 if (n) { \
279 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
280 PL_regendp[paren] = input - PL_bostr; \
281 } \
282 else \
283 PL_regendp[paren] = -1; \
284 } \
285 if (regmatch(next)) \
286 sayYES; \
287 if (paren && n) \
288 PL_regendp[paren] = -1; \
289}
290
291
a687059c 292/*
e50aee73 293 * pregexec and friends
a687059c
LW
294 */
295
296/*
c277df42 297 - pregexec - match a regexp against a string
a687059c 298 */
c277df42 299I32
864dbfa3 300Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 301 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
302/* strend: pointer to null at end of string */
303/* strbeg: real beginning of string */
304/* minend: end of match must be >=minend after stringarg. */
305/* nosave: For optimizations. */
306{
307 return
9041c2e3 308 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
309 nosave ? 0 : REXEC_COPY_STR);
310}
0f5d15d6
IZ
311
312STATIC void
cea2e8a9 313S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
314{
315 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
316#ifdef DEBUGGING
317 PL_regprogram = prog->program;
318#endif
319 PL_regnpar = prog->nparens;
9041c2e3
NIS
320 PL_regdata = prog->data;
321 PL_reg_re = prog;
0f5d15d6 322}
22e551b9 323
9041c2e3 324/*
cad2e5aa
JH
325 * Need to implement the following flags for reg_anch:
326 *
327 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
328 * USE_INTUIT_ML
329 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
330 * INTUIT_AUTORITATIVE_ML
331 * INTUIT_ONCE_NOML - Intuit can match in one location only.
332 * INTUIT_ONCE_ML
333 *
334 * Another flag for this function: SECOND_TIME (so that float substrs
335 * with giant delta may be not rechecked).
336 */
337
338/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
339
2c2d71f5 340/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
341 Otherwise, only SvCUR(sv) is used to get strbeg. */
342
343/* XXXX We assume that strpos is strbeg unless sv. */
344
6eb5f6b9
JH
345/* XXXX Some places assume that there is a fixed substring.
346 An update may be needed if optimizer marks as "INTUITable"
347 RExen without fixed substrings. Similarly, it is assumed that
348 lengths of all the strings are no more than minlen, thus they
349 cannot come from lookahead.
350 (Or minlen should take into account lookahead.) */
351
2c2d71f5
JH
352/* A failure to find a constant substring means that there is no need to make
353 an expensive call to REx engine, thus we celebrate a failure. Similarly,
354 finding a substring too deep into the string means that less calls to
30944b6d
IZ
355 regtry() should be needed.
356
357 REx compiler's optimizer found 4 possible hints:
358 a) Anchored substring;
359 b) Fixed substring;
360 c) Whether we are anchored (beginning-of-line or \G);
361 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 362 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
363 string which does not contradict any of them.
364 */
2c2d71f5 365
6eb5f6b9
JH
366/* Most of decisions we do here should have been done at compile time.
367 The nodes of the REx which we used for the search should have been
368 deleted from the finite automaton. */
369
cad2e5aa
JH
370char *
371Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
372 char *strend, U32 flags, re_scream_pos_data *data)
373{
b7953727 374 register I32 start_shift = 0;
cad2e5aa 375 /* Should be nonnegative! */
b7953727 376 register I32 end_shift = 0;
2c2d71f5
JH
377 register char *s;
378 register SV *check;
a1933d95 379 char *strbeg;
cad2e5aa
JH
380 char *t;
381 I32 ml_anch;
6eb5f6b9 382 register char *other_last = Nullch; /* other substr checked before this */
b7953727 383 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
384#ifdef DEBUGGING
385 char *i_strpos = strpos;
386#endif
cad2e5aa
JH
387
388 DEBUG_r( if (!PL_colorset) reginitcolors() );
389 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 390 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
391 PL_colors[4],PL_colors[5],PL_colors[0],
392 prog->precomp,
393 PL_colors[1],
394 (strlen(prog->precomp) > 60 ? "..." : ""),
395 PL_colors[0],
b900a521 396 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
397 strpos, PL_colors[1],
398 (strend - strpos > 60 ? "..." : ""))
399 );
400
1aa99e6b
IH
401 if (prog->reganch & ROPT_UTF8)
402 PL_reg_flags |= RF_utf8;
403
404 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 405 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 406 goto fail;
2c2d71f5 407 }
a1933d95 408 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 409 PL_regeol = strend;
653099ff 410 check = prog->check_substr;
2c2d71f5 411 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
412 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
413 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 414 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 415
7e25d62c
JH
416 if (!ml_anch) {
417 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
418 | ROPT_IMPLICIT)) /* not a real BOL */
419 /* SvCUR is not set on references: SvRV and SvPVX overlap */
420 && sv && !SvROK(sv)
421 && (strpos != strbeg)) {
422 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
423 goto fail;
424 }
425 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 426 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 427 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
428 I32 slen;
429
1aa99e6b 430 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
431 if (SvTAIL(check)) {
432 slen = SvCUR(check); /* >= 1 */
cad2e5aa 433
9041c2e3 434 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
435 || (strend - s == slen && strend[-1] != '\n')) {
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
437 goto fail_finish;
cad2e5aa
JH
438 }
439 /* Now should match s[0..slen-2] */
440 slen--;
653099ff 441 if (slen && (*SvPVX(check) != *s
cad2e5aa 442 || (slen > 1
653099ff 443 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
444 report_neq:
445 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
446 goto fail_finish;
447 }
cad2e5aa 448 }
653099ff
GS
449 else if (*SvPVX(check) != *s
450 || ((slen = SvCUR(check)) > 1
451 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
452 goto report_neq;
453 goto success_at_start;
7e25d62c 454 }
cad2e5aa 455 }
2c2d71f5 456 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 457 s = strpos;
2c2d71f5 458 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 459 end_shift = prog->minlen - start_shift -
653099ff 460 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 461 if (!ml_anch) {
653099ff
GS
462 I32 end = prog->check_offset_max + CHR_SVLEN(check)
463 - (SvTAIL(check) != 0);
1aa99e6b 464 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
465
466 if (end_shift < eshift)
467 end_shift = eshift;
468 }
cad2e5aa 469 }
2c2d71f5 470 else { /* Can match at random position */
cad2e5aa
JH
471 ml_anch = 0;
472 s = strpos;
2c2d71f5
JH
473 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
474 /* Should be nonnegative! */
475 end_shift = prog->minlen - start_shift -
653099ff 476 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
477 }
478
2c2d71f5 479#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 480 if (end_shift < 0)
6bbae5e6 481 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
482#endif
483
2c2d71f5
JH
484 restart:
485 /* Find a possible match in the region s..strend by looking for
486 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 487 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
488 I32 p = -1; /* Internal iterator of scream. */
489 I32 *pp = data ? data->scream_pos : &p;
490
2c2d71f5
JH
491 if (PL_screamfirst[BmRARE(check)] >= 0
492 || ( BmRARE(check) == '\n'
493 && (BmPREVIOUS(check) == SvCUR(check) - 1)
494 && SvTAIL(check) ))
9041c2e3 495 s = screaminstr(sv, check,
2c2d71f5 496 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 497 else
2c2d71f5 498 goto fail_finish;
cad2e5aa
JH
499 if (data)
500 *data->scream_olds = s;
501 }
f33976b4 502 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
503 s = fbm_instr((U8*)(s + start_shift),
504 (U8*)(strend - end_shift),
505 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 506 else
1aa99e6b
IH
507 s = fbm_instr(HOP3(s, start_shift, strend),
508 HOP3(strend, -end_shift, strbeg),
2c2d71f5 509 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
510
511 /* Update the count-of-usability, remove useless subpatterns,
512 unshift s. */
2c2d71f5
JH
513
514 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
515 (s ? "Found" : "Did not find"),
516 ((check == prog->anchored_substr) ? "anchored" : "floating"),
517 PL_colors[0],
7b0972df
JH
518 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
519 SvPVX(check),
2c2d71f5
JH
520 PL_colors[1], (SvTAIL(check) ? "$" : ""),
521 (s ? " at offset " : "...\n") ) );
522
523 if (!s)
524 goto fail_finish;
525
6eb5f6b9
JH
526 check_at = s;
527
2c2d71f5 528 /* Finish the diagnostic message */
30944b6d 529 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
530
531 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
532 Start with the other substr.
533 XXXX no SCREAM optimization yet - and a very coarse implementation
534 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
535 *always* match. Probably should be marked during compile...
536 Probably it is right to do no SCREAM here...
537 */
538
539 if (prog->float_substr && prog->anchored_substr) {
30944b6d 540 /* Take into account the "other" substring. */
2c2d71f5
JH
541 /* XXXX May be hopelessly wrong for UTF... */
542 if (!other_last)
6eb5f6b9 543 other_last = strpos;
2c2d71f5 544 if (check == prog->float_substr) {
30944b6d
IZ
545 do_other_anchored:
546 {
1aa99e6b 547 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
548 char *s1 = s;
549
2c2d71f5
JH
550 t = s - prog->check_offset_max;
551 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
552 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 553 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 554 && t > strpos)))
30944b6d 555 /* EMPTY */;
2c2d71f5
JH
556 else
557 t = strpos;
1aa99e6b 558 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
559 if (t < other_last) /* These positions already checked */
560 t = other_last;
1aa99e6b 561 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
562 if (last < last1)
563 last1 = last;
564 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
565 /* On end-of-str: see comment below. */
566 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
567 HOP3(HOP3(last1, prog->anchored_offset, strend)
568 + SvCUR(prog->anchored_substr),
569 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
570 prog->anchored_substr,
571 PL_multiline ? FBMrf_MULTILINE : 0);
572 DEBUG_r(PerlIO_printf(Perl_debug_log,
573 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
574 (s ? "Found" : "Contradicts"),
575 PL_colors[0],
7b0972df
JH
576 (int)(SvCUR(prog->anchored_substr)
577 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
578 SvPVX(prog->anchored_substr),
579 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
580 if (!s) {
581 if (last1 >= last2) {
582 DEBUG_r(PerlIO_printf(Perl_debug_log,
583 ", giving up...\n"));
584 goto fail_finish;
585 }
586 DEBUG_r(PerlIO_printf(Perl_debug_log,
587 ", trying floating at offset %ld...\n",
1aa99e6b
IH
588 (long)(HOP3c(s1, 1, strend) - i_strpos)));
589 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
590 s = HOP3c(last, 1, strend);
2c2d71f5
JH
591 goto restart;
592 }
593 else {
594 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 595 (long)(s - i_strpos)));
1aa99e6b
IH
596 t = HOP3c(s, -prog->anchored_offset, strbeg);
597 other_last = HOP3c(s, 1, strend);
30944b6d 598 s = s1;
2c2d71f5
JH
599 if (t == strpos)
600 goto try_at_start;
2c2d71f5
JH
601 goto try_at_offset;
602 }
30944b6d 603 }
2c2d71f5
JH
604 }
605 else { /* Take into account the floating substring. */
606 char *last, *last1;
607 char *s1 = s;
608
1aa99e6b
IH
609 t = HOP3c(s, -start_shift, strbeg);
610 last1 = last =
611 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
612 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
613 last = HOP3c(t, prog->float_max_offset, strend);
614 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
615 if (s < other_last)
616 s = other_last;
2c2d71f5
JH
617 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
618 /* fbm_instr() takes into account exact value of end-of-str
619 if the check is SvTAIL(ed). Since false positives are OK,
620 and end-of-str is not later than strend we are OK. */
621 s = fbm_instr((unsigned char*)s,
622 (unsigned char*)last + SvCUR(prog->float_substr)
623 - (SvTAIL(prog->float_substr)!=0),
624 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
625 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
626 (s ? "Found" : "Contradicts"),
627 PL_colors[0],
7b0972df
JH
628 (int)(SvCUR(prog->float_substr)
629 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
630 SvPVX(prog->float_substr),
631 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
632 if (!s) {
633 if (last1 == last) {
634 DEBUG_r(PerlIO_printf(Perl_debug_log,
635 ", giving up...\n"));
636 goto fail_finish;
637 }
638 DEBUG_r(PerlIO_printf(Perl_debug_log,
639 ", trying anchored starting at offset %ld...\n",
30944b6d 640 (long)(s1 + 1 - i_strpos)));
803ff556 641 other_last = last;
1aa99e6b 642 s = HOP3c(t, 1, strend);
2c2d71f5
JH
643 goto restart;
644 }
645 else {
646 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 647 (long)(s - i_strpos)));
803ff556 648 other_last = s; /* Fix this later. --Hugo */
30944b6d 649 s = s1;
2c2d71f5
JH
650 if (t == strpos)
651 goto try_at_start;
2c2d71f5
JH
652 goto try_at_offset;
653 }
654 }
cad2e5aa 655 }
2c2d71f5
JH
656
657 t = s - prog->check_offset_max;
2c2d71f5
JH
658 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
659 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
660 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
661 && t > strpos))) {
2c2d71f5
JH
662 /* Fixed substring is found far enough so that the match
663 cannot start at strpos. */
664 try_at_offset:
cad2e5aa 665 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
666 /* Eventually fbm_*() should handle this, but often
667 anchored_offset is not 0, so this check will not be wasted. */
668 /* XXXX In the code below we prefer to look for "^" even in
669 presence of anchored substrings. And we search even
670 beyond the found float position. These pessimizations
671 are historical artefacts only. */
672 find_anchor:
2c2d71f5 673 while (t < strend - prog->minlen) {
cad2e5aa 674 if (*t == '\n') {
4ee3650e 675 if (t < check_at - prog->check_offset_min) {
30944b6d 676 if (prog->anchored_substr) {
4ee3650e
GS
677 /* Since we moved from the found position,
678 we definitely contradict the found anchored
30944b6d
IZ
679 substr. Due to the above check we do not
680 contradict "check" substr.
681 Thus we can arrive here only if check substr
682 is float. Redo checking for "other"=="fixed".
683 */
9041c2e3 684 strpos = t + 1;
30944b6d
IZ
685 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
686 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
687 goto do_other_anchored;
688 }
4ee3650e
GS
689 /* We don't contradict the found floating substring. */
690 /* XXXX Why not check for STCLASS? */
cad2e5aa 691 s = t + 1;
2c2d71f5 692 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 693 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
694 goto set_useful;
695 }
4ee3650e
GS
696 /* Position contradicts check-string */
697 /* XXXX probably better to look for check-string
698 than for "\n", so one should lower the limit for t? */
699 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 700 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 701 other_last = strpos = s = t + 1;
cad2e5aa
JH
702 goto restart;
703 }
704 t++;
705 }
2c2d71f5
JH
706 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
707 PL_colors[0],PL_colors[1]));
708 goto fail_finish;
cad2e5aa 709 }
f5952150
GS
710 else {
711 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
712 PL_colors[0],PL_colors[1]));
713 }
cad2e5aa
JH
714 s = t;
715 set_useful:
2c2d71f5 716 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
717 }
718 else {
f5952150 719 /* The found string does not prohibit matching at strpos,
2c2d71f5 720 - no optimization of calling REx engine can be performed,
f5952150
GS
721 unless it was an MBOL and we are not after MBOL,
722 or a future STCLASS check will fail this. */
2c2d71f5
JH
723 try_at_start:
724 /* Even in this situation we may use MBOL flag if strpos is offset
725 wrt the start of the string. */
05b4157f 726 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 727 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
728 /* May be due to an implicit anchor of m{.*foo} */
729 && !(prog->reganch & ROPT_IMPLICIT))
730 {
cad2e5aa
JH
731 t = strpos;
732 goto find_anchor;
733 }
30944b6d 734 DEBUG_r( if (ml_anch)
f5952150
GS
735 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
736 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 737 );
2c2d71f5 738 success_at_start:
30944b6d 739 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 740 && prog->check_substr /* Could be deleted already */
cad2e5aa 741 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
742 && prog->check_substr == prog->float_substr)
743 {
cad2e5aa 744 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 745 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
746 SvREFCNT_dec(prog->check_substr);
747 prog->check_substr = Nullsv; /* disable */
748 prog->float_substr = Nullsv; /* clear */
5e39e1e5 749 check = Nullsv; /* abort */
cad2e5aa 750 s = strpos;
3cf5c195
IZ
751 /* XXXX This is a remnant of the old implementation. It
752 looks wasteful, since now INTUIT can use many
6eb5f6b9 753 other heuristics. */
cad2e5aa
JH
754 prog->reganch &= ~RE_USE_INTUIT;
755 }
756 else
757 s = strpos;
758 }
759
6eb5f6b9
JH
760 /* Last resort... */
761 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
762 if (prog->regstclass) {
763 /* minlen == 0 is possible if regstclass is \b or \B,
764 and the fixed substr is ''$.
765 Since minlen is already taken into account, s+1 is before strend;
766 accidentally, minlen >= 1 guaranties no false positives at s + 1
767 even for \b or \B. But (minlen? 1 : 0) below assumes that
768 regstclass does not come from lookahead... */
769 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
770 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 771 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 772 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 773 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 774 : 1);
6eb5f6b9 775 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
776 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
777 : (prog->float_substr
778 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
779 cl_l, strend)
780 : strend);
a1933d95 781 char *startpos = strbeg;
6eb5f6b9
JH
782
783 t = s;
76384e4a 784 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 785 PL_regdata = prog->data;
76384e4a
GS
786 PL_bostr = startpos;
787 }
f33976b4 788 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
789 if (!s) {
790#ifdef DEBUGGING
b7953727 791 char *what = 0;
6eb5f6b9
JH
792#endif
793 if (endpos == strend) {
794 DEBUG_r( PerlIO_printf(Perl_debug_log,
795 "Could not match STCLASS...\n") );
796 goto fail;
797 }
66e933ab
GS
798 DEBUG_r( PerlIO_printf(Perl_debug_log,
799 "This position contradicts STCLASS...\n") );
653099ff
GS
800 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
801 goto fail;
6eb5f6b9
JH
802 /* Contradict one of substrings */
803 if (prog->anchored_substr) {
6eb5f6b9
JH
804 if (prog->anchored_substr == check) {
805 DEBUG_r( what = "anchored" );
806 hop_and_restart:
1aa99e6b 807 s = HOP3c(t, 1, strend);
66e933ab
GS
808 if (s + start_shift + end_shift > strend) {
809 /* XXXX Should be taken into account earlier? */
810 DEBUG_r( PerlIO_printf(Perl_debug_log,
811 "Could not match STCLASS...\n") );
812 goto fail;
813 }
5e39e1e5
HS
814 if (!check)
815 goto giveup;
6eb5f6b9 816 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 817 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
818 what, (long)(s + start_shift - i_strpos)) );
819 goto restart;
820 }
66e933ab 821 /* Have both, check_string is floating */
6eb5f6b9
JH
822 if (t + start_shift >= check_at) /* Contradicts floating=check */
823 goto retry_floating_check;
824 /* Recheck anchored substring, but not floating... */
9041c2e3 825 s = check_at;
5e39e1e5
HS
826 if (!check)
827 goto giveup;
6eb5f6b9 828 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 829 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
830 (long)(other_last - i_strpos)) );
831 goto do_other_anchored;
832 }
60e71179
GS
833 /* Another way we could have checked stclass at the
834 current position only: */
835 if (ml_anch) {
836 s = t = t + 1;
5e39e1e5
HS
837 if (!check)
838 goto giveup;
60e71179 839 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
840 "Looking for /%s^%s/m starting at offset %ld...\n",
841 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 842 goto try_at_offset;
66e933ab 843 }
60e71179
GS
844 if (!prog->float_substr) /* Could have been deleted */
845 goto fail;
6eb5f6b9
JH
846 /* Check is floating subtring. */
847 retry_floating_check:
848 t = check_at - start_shift;
849 DEBUG_r( what = "floating" );
850 goto hop_and_restart;
851 }
b7953727
JH
852 if (t != s) {
853 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 854 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
855 (long)(t - i_strpos), (long)(s - i_strpos))
856 );
857 }
858 else {
859 DEBUG_r(PerlIO_printf(Perl_debug_log,
860 "Does not contradict STCLASS...\n");
861 );
862 }
6eb5f6b9 863 }
5e39e1e5
HS
864 giveup:
865 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
866 PL_colors[4], (check ? "Guessed" : "Giving up"),
867 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 868 return s;
2c2d71f5
JH
869
870 fail_finish: /* Substring not found */
66e933ab
GS
871 if (prog->check_substr) /* could be removed already */
872 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 873 fail:
2c2d71f5 874 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
875 PL_colors[4],PL_colors[5]));
876 return Nullch;
877}
9661b544 878
6eb5f6b9 879/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
880STATIC char *
881S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 882{
6eb5f6b9
JH
883 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
884 char *m;
d8093b23
G
885 STRLEN ln;
886 unsigned int c1;
887 unsigned int c2;
6eb5f6b9
JH
888 char *e;
889 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 890 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 891
6eb5f6b9
JH
892 /* We know what class it must start with. */
893 switch (OP(c)) {
6eb5f6b9 894 case ANYOF:
a0ed51b3 895 while (s < strend) {
ffc61ed2 896 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
897 if (tmp && (norun || regtry(prog, s)))
898 goto got_it;
899 else
900 tmp = doevery;
a0ed51b3 901 }
6eb5f6b9
JH
902 else
903 tmp = 1;
ffc61ed2 904 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 905 }
6eb5f6b9 906 break;
f33976b4
DB
907 case CANY:
908 while (s < strend) {
909 if (tmp && (norun || regtry(prog, s)))
910 goto got_it;
911 else
912 tmp = doevery;
913 s++;
914 }
915 break;
6eb5f6b9
JH
916 case EXACTF:
917 m = STRING(c);
918 ln = STR_LEN(c);
1aa99e6b 919 if (UTF) {
a2a2844f
JH
920 STRLEN ulen1, ulen2;
921 U8 tmpbuf1[UTF8_MAXLEN*2+1];
922 U8 tmpbuf2[UTF8_MAXLEN*2+1];
923
924 to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
925 to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
926
927 c1 = utf8_to_uvuni(tmpbuf1, 0);
928 c2 = utf8_to_uvuni(tmpbuf2, 0);
1aa99e6b
IH
929 }
930 else {
931 c1 = *(U8*)m;
932 c2 = PL_fold[c1];
933 }
6eb5f6b9
JH
934 goto do_exactf;
935 case EXACTFL:
936 m = STRING(c);
937 ln = STR_LEN(c);
d8093b23 938 c1 = *(U8*)m;
6eb5f6b9
JH
939 c2 = PL_fold_locale[c1];
940 do_exactf:
941 e = strend - ln;
b3c9acc1 942
6eb5f6b9
JH
943 if (norun && e < s)
944 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
945
946 if (do_utf8) {
947 STRLEN len;
948 if (c1 == c2)
949 while (s <= e) {
9041c2e3 950 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
951 && regtry(prog, s) )
952 goto got_it;
953 s += len;
954 }
955 else
956 while (s <= e) {
9041c2e3 957 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
958 if ( (c == c1 || c == c2) && regtry(prog, s) )
959 goto got_it;
960 s += len;
961 }
962 }
963 else {
964 if (c1 == c2)
965 while (s <= e) {
966 if ( *(U8*)s == c1
967 && (ln == 1 || !(OP(c) == EXACTF
968 ? ibcmp(s, m, ln)
969 : ibcmp_locale(s, m, ln)))
970 && (norun || regtry(prog, s)) )
971 goto got_it;
972 s++;
973 }
974 else
975 while (s <= e) {
976 if ( (*(U8*)s == c1 || *(U8*)s == c2)
977 && (ln == 1 || !(OP(c) == EXACTF
978 ? ibcmp(s, m, ln)
979 : ibcmp_locale(s, m, ln)))
980 && (norun || regtry(prog, s)) )
981 goto got_it;
982 s++;
983 }
b3c9acc1
IZ
984 }
985 break;
bbce6d69 986 case BOUNDL:
3280af22 987 PL_reg_flags |= RF_tainted;
bbce6d69 988 /* FALL THROUGH */
a0d0e21e 989 case BOUND:
ffc61ed2 990 if (do_utf8) {
12d33761 991 if (s == PL_bostr)
ffc61ed2
JH
992 tmp = '\n';
993 else {
1aa99e6b 994 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 995
0064a8a9
JH
996 if (s > (char*)r)
997 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
998 }
999 tmp = ((OP(c) == BOUND ?
9041c2e3 1000 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1001 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1002 while (s < strend) {
1003 if (tmp == !(OP(c) == BOUND ?
3568d838 1004 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1005 isALNUM_LC_utf8((U8*)s)))
1006 {
1007 tmp = !tmp;
1008 if ((norun || regtry(prog, s)))
1009 goto got_it;
1010 }
1011 s += UTF8SKIP(s);
a687059c 1012 }
a0d0e21e 1013 }
667bb95a 1014 else {
12d33761 1015 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1016 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1017 while (s < strend) {
1018 if (tmp ==
1019 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1020 tmp = !tmp;
1021 if ((norun || regtry(prog, s)))
1022 goto got_it;
1023 }
1024 s++;
a0ed51b3 1025 }
a0ed51b3 1026 }
6eb5f6b9 1027 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1028 goto got_it;
1029 break;
bbce6d69 1030 case NBOUNDL:
3280af22 1031 PL_reg_flags |= RF_tainted;
bbce6d69 1032 /* FALL THROUGH */
a0d0e21e 1033 case NBOUND:
ffc61ed2 1034 if (do_utf8) {
12d33761 1035 if (s == PL_bostr)
ffc61ed2
JH
1036 tmp = '\n';
1037 else {
1aa99e6b 1038 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1039
0064a8a9
JH
1040 if (s > (char*)r)
1041 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1042 }
1043 tmp = ((OP(c) == NBOUND ?
9041c2e3 1044 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1045 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1046 while (s < strend) {
1047 if (tmp == !(OP(c) == NBOUND ?
3568d838 1048 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1049 isALNUM_LC_utf8((U8*)s)))
1050 tmp = !tmp;
1051 else if ((norun || regtry(prog, s)))
1052 goto got_it;
1053 s += UTF8SKIP(s);
1054 }
a0d0e21e 1055 }
667bb95a 1056 else {
12d33761 1057 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1058 tmp = ((OP(c) == NBOUND ?
1059 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1060 while (s < strend) {
1061 if (tmp ==
1062 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1063 tmp = !tmp;
1064 else if ((norun || regtry(prog, s)))
1065 goto got_it;
1066 s++;
1067 }
a0ed51b3 1068 }
6eb5f6b9 1069 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1070 goto got_it;
1071 break;
a0d0e21e 1072 case ALNUM:
ffc61ed2 1073 if (do_utf8) {
8269fa76 1074 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1075 while (s < strend) {
3568d838 1076 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1077 if (tmp && (norun || regtry(prog, s)))
1078 goto got_it;
1079 else
1080 tmp = doevery;
1081 }
bbce6d69 1082 else
ffc61ed2
JH
1083 tmp = 1;
1084 s += UTF8SKIP(s);
bbce6d69 1085 }
bbce6d69 1086 }
ffc61ed2
JH
1087 else {
1088 while (s < strend) {
1089 if (isALNUM(*s)) {
1090 if (tmp && (norun || regtry(prog, s)))
1091 goto got_it;
1092 else
1093 tmp = doevery;
1094 }
a0ed51b3 1095 else
ffc61ed2
JH
1096 tmp = 1;
1097 s++;
a0ed51b3 1098 }
a0ed51b3
LW
1099 }
1100 break;
bbce6d69 1101 case ALNUML:
3280af22 1102 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1103 if (do_utf8) {
1104 while (s < strend) {
1105 if (isALNUM_LC_utf8((U8*)s)) {
1106 if (tmp && (norun || regtry(prog, s)))
1107 goto got_it;
1108 else
1109 tmp = doevery;
1110 }
a687059c 1111 else
ffc61ed2
JH
1112 tmp = 1;
1113 s += UTF8SKIP(s);
a0d0e21e 1114 }
a0d0e21e 1115 }
ffc61ed2
JH
1116 else {
1117 while (s < strend) {
1118 if (isALNUM_LC(*s)) {
1119 if (tmp && (norun || regtry(prog, s)))
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
a0ed51b3 1124 else
ffc61ed2
JH
1125 tmp = 1;
1126 s++;
a0ed51b3 1127 }
a0ed51b3
LW
1128 }
1129 break;
a0d0e21e 1130 case NALNUM:
ffc61ed2 1131 if (do_utf8) {
8269fa76 1132 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1133 while (s < strend) {
3568d838 1134 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1135 if (tmp && (norun || regtry(prog, s)))
1136 goto got_it;
1137 else
1138 tmp = doevery;
1139 }
bbce6d69 1140 else
ffc61ed2
JH
1141 tmp = 1;
1142 s += UTF8SKIP(s);
bbce6d69 1143 }
bbce6d69 1144 }
ffc61ed2
JH
1145 else {
1146 while (s < strend) {
1147 if (!isALNUM(*s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
a0ed51b3 1153 else
ffc61ed2
JH
1154 tmp = 1;
1155 s++;
a0ed51b3 1156 }
a0ed51b3
LW
1157 }
1158 break;
bbce6d69 1159 case NALNUML:
3280af22 1160 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1161 if (do_utf8) {
1162 while (s < strend) {
1163 if (!isALNUM_LC_utf8((U8*)s)) {
1164 if (tmp && (norun || regtry(prog, s)))
1165 goto got_it;
1166 else
1167 tmp = doevery;
1168 }
a687059c 1169 else
ffc61ed2
JH
1170 tmp = 1;
1171 s += UTF8SKIP(s);
a687059c 1172 }
a0d0e21e 1173 }
ffc61ed2
JH
1174 else {
1175 while (s < strend) {
1176 if (!isALNUM_LC(*s)) {
1177 if (tmp && (norun || regtry(prog, s)))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
a0ed51b3 1182 else
ffc61ed2
JH
1183 tmp = 1;
1184 s++;
a0ed51b3 1185 }
a0ed51b3
LW
1186 }
1187 break;
a0d0e21e 1188 case SPACE:
ffc61ed2 1189 if (do_utf8) {
8269fa76 1190 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1191 while (s < strend) {
3568d838 1192 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1193 if (tmp && (norun || regtry(prog, s)))
1194 goto got_it;
1195 else
1196 tmp = doevery;
1197 }
a0d0e21e 1198 else
ffc61ed2
JH
1199 tmp = 1;
1200 s += UTF8SKIP(s);
2304df62 1201 }
a0d0e21e 1202 }
ffc61ed2
JH
1203 else {
1204 while (s < strend) {
1205 if (isSPACE(*s)) {
1206 if (tmp && (norun || regtry(prog, s)))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
a0ed51b3 1211 else
ffc61ed2
JH
1212 tmp = 1;
1213 s++;
a0ed51b3 1214 }
a0ed51b3
LW
1215 }
1216 break;
bbce6d69 1217 case SPACEL:
3280af22 1218 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1219 if (do_utf8) {
1220 while (s < strend) {
1221 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1222 if (tmp && (norun || regtry(prog, s)))
1223 goto got_it;
1224 else
1225 tmp = doevery;
1226 }
bbce6d69 1227 else
ffc61ed2
JH
1228 tmp = 1;
1229 s += UTF8SKIP(s);
bbce6d69 1230 }
bbce6d69 1231 }
ffc61ed2
JH
1232 else {
1233 while (s < strend) {
1234 if (isSPACE_LC(*s)) {
1235 if (tmp && (norun || regtry(prog, s)))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
a0ed51b3 1240 else
ffc61ed2
JH
1241 tmp = 1;
1242 s++;
a0ed51b3 1243 }
a0ed51b3
LW
1244 }
1245 break;
a0d0e21e 1246 case NSPACE:
ffc61ed2 1247 if (do_utf8) {
8269fa76 1248 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1249 while (s < strend) {
3568d838 1250 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1251 if (tmp && (norun || regtry(prog, s)))
1252 goto got_it;
1253 else
1254 tmp = doevery;
1255 }
a0d0e21e 1256 else
ffc61ed2
JH
1257 tmp = 1;
1258 s += UTF8SKIP(s);
a687059c 1259 }
a0d0e21e 1260 }
ffc61ed2
JH
1261 else {
1262 while (s < strend) {
1263 if (!isSPACE(*s)) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
a0ed51b3 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s++;
a0ed51b3 1272 }
a0ed51b3
LW
1273 }
1274 break;
bbce6d69 1275 case NSPACEL:
3280af22 1276 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1277 if (do_utf8) {
1278 while (s < strend) {
1279 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1280 if (tmp && (norun || regtry(prog, s)))
1281 goto got_it;
1282 else
1283 tmp = doevery;
1284 }
bbce6d69 1285 else
ffc61ed2
JH
1286 tmp = 1;
1287 s += UTF8SKIP(s);
bbce6d69 1288 }
bbce6d69 1289 }
ffc61ed2
JH
1290 else {
1291 while (s < strend) {
1292 if (!isSPACE_LC(*s)) {
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
a0ed51b3 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s++;
a0ed51b3 1301 }
a0ed51b3
LW
1302 }
1303 break;
a0d0e21e 1304 case DIGIT:
ffc61ed2 1305 if (do_utf8) {
8269fa76 1306 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1307 while (s < strend) {
3568d838 1308 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1309 if (tmp && (norun || regtry(prog, s)))
1310 goto got_it;
1311 else
1312 tmp = doevery;
1313 }
a0d0e21e 1314 else
ffc61ed2
JH
1315 tmp = 1;
1316 s += UTF8SKIP(s);
2b69d0c2 1317 }
a0d0e21e 1318 }
ffc61ed2
JH
1319 else {
1320 while (s < strend) {
1321 if (isDIGIT(*s)) {
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
a0ed51b3 1327 else
ffc61ed2
JH
1328 tmp = 1;
1329 s++;
a0ed51b3 1330 }
a0ed51b3
LW
1331 }
1332 break;
b8c5462f
JH
1333 case DIGITL:
1334 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1335 if (do_utf8) {
1336 while (s < strend) {
1337 if (isDIGIT_LC_utf8((U8*)s)) {
1338 if (tmp && (norun || regtry(prog, s)))
1339 goto got_it;
1340 else
1341 tmp = doevery;
1342 }
b8c5462f 1343 else
ffc61ed2
JH
1344 tmp = 1;
1345 s += UTF8SKIP(s);
b8c5462f 1346 }
b8c5462f 1347 }
ffc61ed2
JH
1348 else {
1349 while (s < strend) {
1350 if (isDIGIT_LC(*s)) {
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
b8c5462f 1356 else
ffc61ed2
JH
1357 tmp = 1;
1358 s++;
b8c5462f 1359 }
b8c5462f
JH
1360 }
1361 break;
a0d0e21e 1362 case NDIGIT:
ffc61ed2 1363 if (do_utf8) {
8269fa76 1364 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1365 while (s < strend) {
3568d838 1366 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1367 if (tmp && (norun || regtry(prog, s)))
1368 goto got_it;
1369 else
1370 tmp = doevery;
1371 }
a0d0e21e 1372 else
ffc61ed2
JH
1373 tmp = 1;
1374 s += UTF8SKIP(s);
a687059c 1375 }
a0d0e21e 1376 }
ffc61ed2
JH
1377 else {
1378 while (s < strend) {
1379 if (!isDIGIT(*s)) {
1380 if (tmp && (norun || regtry(prog, s)))
1381 goto got_it;
1382 else
1383 tmp = doevery;
1384 }
a0ed51b3 1385 else
ffc61ed2
JH
1386 tmp = 1;
1387 s++;
a0ed51b3 1388 }
a0ed51b3
LW
1389 }
1390 break;
b8c5462f
JH
1391 case NDIGITL:
1392 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1393 if (do_utf8) {
1394 while (s < strend) {
1395 if (!isDIGIT_LC_utf8((U8*)s)) {
1396 if (tmp && (norun || regtry(prog, s)))
1397 goto got_it;
1398 else
1399 tmp = doevery;
1400 }
b8c5462f 1401 else
ffc61ed2
JH
1402 tmp = 1;
1403 s += UTF8SKIP(s);
b8c5462f 1404 }
a0ed51b3 1405 }
ffc61ed2
JH
1406 else {
1407 while (s < strend) {
1408 if (!isDIGIT_LC(*s)) {
1409 if (tmp && (norun || regtry(prog, s)))
1410 goto got_it;
1411 else
1412 tmp = doevery;
1413 }
cf93c79d 1414 else
ffc61ed2
JH
1415 tmp = 1;
1416 s++;
b8c5462f 1417 }
b8c5462f
JH
1418 }
1419 break;
b3c9acc1 1420 default:
3c3eec57
GS
1421 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1422 break;
d6a28714 1423 }
6eb5f6b9
JH
1424 return 0;
1425 got_it:
1426 return s;
1427}
1428
1429/*
1430 - regexec_flags - match a regexp against a string
1431 */
1432I32
1433Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1434 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1435/* strend: pointer to null at end of string */
1436/* strbeg: real beginning of string */
1437/* minend: end of match must be >=minend after stringarg. */
1438/* data: May be used for some additional optimizations. */
1439/* nosave: For optimizations. */
1440{
6eb5f6b9
JH
1441 register char *s;
1442 register regnode *c;
1443 register char *startpos = stringarg;
6eb5f6b9
JH
1444 I32 minlen; /* must match at least this many chars */
1445 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1446 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1447 constant substr. */ /* CC */
1448 I32 end_shift = 0; /* Same for the end. */ /* CC */
1449 I32 scream_pos = -1; /* Internal iterator of scream. */
1450 char *scream_olds;
1451 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1452 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1453
1454 PL_regcc = 0;
1455
1456 cache_re(prog);
1457#ifdef DEBUGGING
aea4f609 1458 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1459#endif
1460
1461 /* Be paranoid... */
1462 if (prog == NULL || startpos == NULL) {
1463 Perl_croak(aTHX_ "NULL regexp parameter");
1464 return 0;
1465 }
1466
1467 minlen = prog->minlen;
f33976b4 1468 if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
3baa4c62 1469 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1470 }
1471 else {
f33976b4 1472 if (strend - startpos < minlen) goto phooey;
1aa99e6b 1473 }
6eb5f6b9 1474
6eb5f6b9
JH
1475 /* Check validity of program. */
1476 if (UCHARAT(prog->program) != REG_MAGIC) {
1477 Perl_croak(aTHX_ "corrupted regexp program");
1478 }
1479
1480 PL_reg_flags = 0;
1481 PL_reg_eval_set = 0;
1482 PL_reg_maxiter = 0;
1483
1484 if (prog->reganch & ROPT_UTF8)
1485 PL_reg_flags |= RF_utf8;
1486
1487 /* Mark beginning of line for ^ and lookbehind. */
1488 PL_regbol = startpos;
1489 PL_bostr = strbeg;
1490 PL_reg_sv = sv;
1491
1492 /* Mark end of line for $ (and such) */
1493 PL_regeol = strend;
1494
1495 /* see how far we have to get to not match where we matched before */
1496 PL_regtill = startpos+minend;
1497
1498 /* We start without call_cc context. */
1499 PL_reg_call_cc = 0;
1500
1501 /* If there is a "must appear" string, look for it. */
1502 s = startpos;
1503
1504 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1505 MAGIC *mg;
1506
1507 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1508 PL_reg_ganch = startpos;
1509 else if (sv && SvTYPE(sv) >= SVt_PVMG
1510 && SvMAGIC(sv)
14befaf4
DM
1511 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1512 && mg->mg_len >= 0) {
6eb5f6b9
JH
1513 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1514 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1515 if (s > PL_reg_ganch)
6eb5f6b9
JH
1516 goto phooey;
1517 s = PL_reg_ganch;
1518 }
1519 }
1520 else /* pos() not defined */
1521 PL_reg_ganch = strbeg;
1522 }
1523
699c3c34
JH
1524 if (do_utf8 == (UTF!=0) &&
1525 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1526 re_scream_pos_data d;
1527
1528 d.scream_olds = &scream_olds;
1529 d.scream_pos = &scream_pos;
1530 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1531 if (!s)
1532 goto phooey; /* not present */
1533 }
1534
1535 DEBUG_r( if (!PL_colorset) reginitcolors() );
1536 DEBUG_r(PerlIO_printf(Perl_debug_log,
1537 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1538 PL_colors[4],PL_colors[5],PL_colors[0],
1539 prog->precomp,
1540 PL_colors[1],
1541 (strlen(prog->precomp) > 60 ? "..." : ""),
1542 PL_colors[0],
1543 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1544 startpos, PL_colors[1],
1545 (strend - startpos > 60 ? "..." : ""))
1546 );
1547
1548 /* Simplest case: anchored match need be tried only once. */
1549 /* [unless only anchor is BOL and multiline is set] */
1550 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1551 if (s == startpos && regtry(prog, startpos))
1552 goto got_it;
1553 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1554 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1555 {
1556 char *end;
1557
1558 if (minlen)
1559 dontbother = minlen - 1;
1aa99e6b 1560 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1561 /* for multiline we only have to try after newlines */
1562 if (prog->check_substr) {
1563 if (s == startpos)
1564 goto after_try;
1565 while (1) {
1566 if (regtry(prog, s))
1567 goto got_it;
1568 after_try:
1569 if (s >= end)
1570 goto phooey;
1571 if (prog->reganch & RE_USE_INTUIT) {
1572 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1573 if (!s)
1574 goto phooey;
1575 }
1576 else
1577 s++;
1578 }
1579 } else {
1580 if (s > startpos)
1581 s--;
1582 while (s < end) {
1583 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1584 if (regtry(prog, s))
1585 goto got_it;
1586 }
1587 }
1588 }
1589 }
1590 goto phooey;
1591 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1592 if (regtry(prog, PL_reg_ganch))
1593 goto got_it;
1594 goto phooey;
1595 }
1596
1597 /* Messy cases: unanchored match. */
9041c2e3 1598 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1599 /* we have /x+whatever/ */
1600 /* it must be a one character string (XXXX Except UTF?) */
1601 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1602#ifdef DEBUGGING
1603 int did_match = 0;
1604#endif
1605
1aa99e6b 1606 if (do_utf8) {
6eb5f6b9
JH
1607 while (s < strend) {
1608 if (*s == ch) {
bf93d4cc 1609 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1610 if (regtry(prog, s)) goto got_it;
1611 s += UTF8SKIP(s);
1612 while (s < strend && *s == ch)
1613 s += UTF8SKIP(s);
1614 }
1615 s += UTF8SKIP(s);
1616 }
1617 }
1618 else {
1619 while (s < strend) {
1620 if (*s == ch) {
bf93d4cc 1621 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1622 if (regtry(prog, s)) goto got_it;
1623 s++;
1624 while (s < strend && *s == ch)
1625 s++;
1626 }
1627 s++;
1628 }
1629 }
b7953727 1630 DEBUG_r(if (!did_match)
bf93d4cc 1631 PerlIO_printf(Perl_debug_log,
b7953727
JH
1632 "Did not find anchored character...\n")
1633 );
6eb5f6b9
JH
1634 }
1635 /*SUPPRESS 560*/
1aa99e6b
IH
1636 else if (do_utf8 == (UTF!=0) &&
1637 (prog->anchored_substr != Nullsv
9041c2e3 1638 || (prog->float_substr != Nullsv
1aa99e6b 1639 && prog->float_max_offset < strend - s))) {
9041c2e3 1640 SV *must = prog->anchored_substr
6eb5f6b9 1641 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1642 I32 back_max =
6eb5f6b9 1643 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1644 I32 back_min =
6eb5f6b9 1645 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1646 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1647 -(I32)(CHR_SVLEN(must)
1aa99e6b 1648 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1649 char *last1; /* Last position checked before */
bf93d4cc
GS
1650#ifdef DEBUGGING
1651 int did_match = 0;
1652#endif
6eb5f6b9
JH
1653
1654 if (s > PL_bostr)
1655 last1 = HOPc(s, -1);
1656 else
1657 last1 = s - 1; /* bogus */
1658
1659 /* XXXX check_substr already used to find `s', can optimize if
1660 check_substr==must. */
1661 scream_pos = -1;
1662 dontbother = end_shift;
1663 strend = HOPc(strend, -dontbother);
1664 while ( (s <= last) &&
9041c2e3 1665 ((flags & REXEC_SCREAM)
1aa99e6b 1666 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1667 end_shift, &scream_pos, 0))
1aa99e6b 1668 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1669 (unsigned char*)strend, must,
6eb5f6b9 1670 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1671 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1672 if (HOPc(s, -back_max) > last1) {
1673 last1 = HOPc(s, -back_min);
1674 s = HOPc(s, -back_max);
1675 }
1676 else {
1677 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1678
1679 last1 = HOPc(s, -back_min);
1680 s = t;
1681 }
1aa99e6b 1682 if (do_utf8) {
6eb5f6b9
JH
1683 while (s <= last1) {
1684 if (regtry(prog, s))
1685 goto got_it;
1686 s += UTF8SKIP(s);
1687 }
1688 }
1689 else {
1690 while (s <= last1) {
1691 if (regtry(prog, s))
1692 goto got_it;
1693 s++;
1694 }
1695 }
1696 }
b7953727
JH
1697 DEBUG_r(if (!did_match)
1698 PerlIO_printf(Perl_debug_log,
1699 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1700 ((must == prog->anchored_substr)
1701 ? "anchored" : "floating"),
1702 PL_colors[0],
1703 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1704 SvPVX(must),
b7953727
JH
1705 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1706 );
6eb5f6b9
JH
1707 goto phooey;
1708 }
155aba94 1709 else if ((c = prog->regstclass)) {
66e933ab
GS
1710 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1711 /* don't bother with what can't match */
6eb5f6b9 1712 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1713 DEBUG_r({
1714 SV *prop = sv_newmortal();
1715 regprop(prop, c);
1716 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1717 });
6eb5f6b9
JH
1718 if (find_byclass(prog, c, s, strend, startpos, 0))
1719 goto got_it;
bf93d4cc 1720 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1721 }
1722 else {
1723 dontbother = 0;
1724 if (prog->float_substr != Nullsv) { /* Trim the end. */
1725 char *last;
d6a28714
JH
1726
1727 if (flags & REXEC_SCREAM) {
1728 last = screaminstr(sv, prog->float_substr, s - strbeg,
1729 end_shift, &scream_pos, 1); /* last one */
1730 if (!last)
ffc61ed2 1731 last = scream_olds; /* Only one occurrence. */
b8c5462f 1732 }
d6a28714
JH
1733 else {
1734 STRLEN len;
1735 char *little = SvPV(prog->float_substr, len);
1736
1737 if (SvTAIL(prog->float_substr)) {
1738 if (memEQ(strend - len + 1, little, len - 1))
1739 last = strend - len + 1;
1740 else if (!PL_multiline)
9041c2e3 1741 last = memEQ(strend - len, little, len)
d6a28714 1742 ? strend - len : Nullch;
b8c5462f 1743 else
d6a28714
JH
1744 goto find_last;
1745 } else {
1746 find_last:
9041c2e3 1747 if (len)
d6a28714 1748 last = rninstr(s, strend, little, little + len);
b8c5462f 1749 else
d6a28714 1750 last = strend; /* matching `$' */
b8c5462f 1751 }
b8c5462f 1752 }
bf93d4cc
GS
1753 if (last == NULL) {
1754 DEBUG_r(PerlIO_printf(Perl_debug_log,
1755 "%sCan't trim the tail, match fails (should not happen)%s\n",
1756 PL_colors[4],PL_colors[5]));
1757 goto phooey; /* Should not happen! */
1758 }
d6a28714
JH
1759 dontbother = strend - last + prog->float_min_offset;
1760 }
1761 if (minlen && (dontbother < minlen))
1762 dontbother = minlen - 1;
1763 strend -= dontbother; /* this one's always in bytes! */
1764 /* We don't know much -- general case. */
1aa99e6b 1765 if (do_utf8) {
d6a28714
JH
1766 for (;;) {
1767 if (regtry(prog, s))
1768 goto got_it;
1769 if (s >= strend)
1770 break;
b8c5462f 1771 s += UTF8SKIP(s);
d6a28714
JH
1772 };
1773 }
1774 else {
1775 do {
1776 if (regtry(prog, s))
1777 goto got_it;
1778 } while (s++ < strend);
1779 }
1780 }
1781
1782 /* Failure. */
1783 goto phooey;
1784
1785got_it:
1786 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1787
1788 if (PL_reg_eval_set) {
1789 /* Preserve the current value of $^R */
1790 if (oreplsv != GvSV(PL_replgv))
1791 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1792 restored, the value remains
1793 the same. */
acfe0abc 1794 restore_pos(aTHX_ 0);
d6a28714
JH
1795 }
1796
1797 /* make sure $`, $&, $', and $digit will work later */
1798 if ( !(flags & REXEC_NOT_FIRST) ) {
1799 if (RX_MATCH_COPIED(prog)) {
1800 Safefree(prog->subbeg);
1801 RX_MATCH_COPIED_off(prog);
1802 }
1803 if (flags & REXEC_COPY_STR) {
1804 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1805
1806 s = savepvn(strbeg, i);
1807 prog->subbeg = s;
1808 prog->sublen = i;
1809 RX_MATCH_COPIED_on(prog);
1810 }
1811 else {
1812 prog->subbeg = strbeg;
1813 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1814 }
1815 }
9041c2e3 1816
d6a28714
JH
1817 return 1;
1818
1819phooey:
bf93d4cc
GS
1820 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1821 PL_colors[4],PL_colors[5]));
d6a28714 1822 if (PL_reg_eval_set)
acfe0abc 1823 restore_pos(aTHX_ 0);
d6a28714
JH
1824 return 0;
1825}
1826
1827/*
1828 - regtry - try match at specific point
1829 */
1830STATIC I32 /* 0 failure, 1 success */
1831S_regtry(pTHX_ regexp *prog, char *startpos)
1832{
d6a28714
JH
1833 register I32 i;
1834 register I32 *sp;
1835 register I32 *ep;
1836 CHECKPOINT lastcp;
1837
02db2b7b
IZ
1838#ifdef DEBUGGING
1839 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1840#endif
d6a28714
JH
1841 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1842 MAGIC *mg;
1843
1844 PL_reg_eval_set = RS_init;
1845 DEBUG_r(DEBUG_s(
b900a521
JH
1846 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1847 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1848 ));
e8347627 1849 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1850 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1851 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1852 SAVETMPS;
1853 /* Apparently this is not needed, judging by wantarray. */
e8347627 1854 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1855 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1856
1857 if (PL_reg_sv) {
1858 /* Make $_ available to executed code. */
1859 if (PL_reg_sv != DEFSV) {
4d1ff10f 1860 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1861 SAVESPTR(DEFSV);
1862 DEFSV = PL_reg_sv;
b8c5462f 1863 }
d6a28714 1864
9041c2e3 1865 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1866 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1867 /* prepare for quick setting of pos */
14befaf4
DM
1868 sv_magic(PL_reg_sv, (SV*)0,
1869 PERL_MAGIC_regex_global, Nullch, 0);
1870 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1871 mg->mg_len = -1;
b8c5462f 1872 }
d6a28714
JH
1873 PL_reg_magic = mg;
1874 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1875 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1876 }
09687e5a 1877 if (!PL_reg_curpm) {
0f79a09d 1878 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1879#ifdef USE_ITHREADS
1880 {
1881 SV* repointer = newSViv(0);
577e12cc 1882 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1883 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1884 av_push(PL_regex_padav,repointer);
1885 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1886 PL_regex_pad = AvARRAY(PL_regex_padav);
1887 }
1888#endif
1889 }
aaa362c4 1890 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1891 PL_reg_oldcurpm = PL_curpm;
1892 PL_curpm = PL_reg_curpm;
1893 if (RX_MATCH_COPIED(prog)) {
1894 /* Here is a serious problem: we cannot rewrite subbeg,
1895 since it may be needed if this match fails. Thus
1896 $` inside (?{}) could fail... */
1897 PL_reg_oldsaved = prog->subbeg;
1898 PL_reg_oldsavedlen = prog->sublen;
1899 RX_MATCH_COPIED_off(prog);
1900 }
1901 else
1902 PL_reg_oldsaved = Nullch;
1903 prog->subbeg = PL_bostr;
1904 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1905 }
1906 prog->startp[0] = startpos - PL_bostr;
1907 PL_reginput = startpos;
1908 PL_regstartp = prog->startp;
1909 PL_regendp = prog->endp;
1910 PL_reglastparen = &prog->lastparen;
a01268b5 1911 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1912 prog->lastparen = 0;
1913 PL_regsize = 0;
1914 DEBUG_r(PL_reg_starttry = startpos);
1915 if (PL_reg_start_tmpl <= prog->nparens) {
1916 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1917 if(PL_reg_start_tmp)
1918 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1919 else
1920 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1921 }
1922
1923 /* XXXX What this code is doing here?!!! There should be no need
1924 to do this again and again, PL_reglastparen should take care of
3dd2943c 1925 this! --ilya*/
dafc8851
JH
1926
1927 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1928 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1929 * PL_reglastparen), is not needed at all by the test suite
1930 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1931 * enough, for building DynaLoader, or otherwise this
1932 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1933 * will happen. Meanwhile, this code *is* needed for the
1934 * above-mentioned test suite tests to succeed. The common theme
1935 * on those tests seems to be returning null fields from matches.
1936 * --jhi */
dafc8851 1937#if 1
d6a28714
JH
1938 sp = prog->startp;
1939 ep = prog->endp;
1940 if (prog->nparens) {
09e8ae3b 1941 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1942 *++sp = -1;
1943 *++ep = -1;
1944 }
1945 }
dafc8851 1946#endif
02db2b7b 1947 REGCP_SET(lastcp);
d6a28714
JH
1948 if (regmatch(prog->program + 1)) {
1949 prog->endp[0] = PL_reginput - PL_bostr;
1950 return 1;
1951 }
02db2b7b 1952 REGCP_UNWIND(lastcp);
d6a28714
JH
1953 return 0;
1954}
1955
02db2b7b
IZ
1956#define RE_UNWIND_BRANCH 1
1957#define RE_UNWIND_BRANCHJ 2
1958
1959union re_unwind_t;
1960
1961typedef struct { /* XX: makes sense to enlarge it... */
1962 I32 type;
1963 I32 prev;
1964 CHECKPOINT lastcp;
1965} re_unwind_generic_t;
1966
1967typedef struct {
1968 I32 type;
1969 I32 prev;
1970 CHECKPOINT lastcp;
1971 I32 lastparen;
1972 regnode *next;
1973 char *locinput;
1974 I32 nextchr;
1975#ifdef DEBUGGING
1976 int regindent;
1977#endif
1978} re_unwind_branch_t;
1979
1980typedef union re_unwind_t {
1981 I32 type;
1982 re_unwind_generic_t generic;
1983 re_unwind_branch_t branch;
1984} re_unwind_t;
1985
8ba1375e
MJD
1986#define sayYES goto yes
1987#define sayNO goto no
1988#define sayYES_FINAL goto yes_final
1989#define sayYES_LOUD goto yes_loud
1990#define sayNO_FINAL goto no_final
1991#define sayNO_SILENT goto do_no
1992#define saySAME(x) if (x) goto yes; else goto no
1993
1994#define REPORT_CODE_OFF 24
1995
d6a28714
JH
1996/*
1997 - regmatch - main matching routine
1998 *
1999 * Conceptually the strategy is simple: check to see whether the current
2000 * node matches, call self recursively to see whether the rest matches,
2001 * and then act accordingly. In practice we make some effort to avoid
2002 * recursion, in particular by going through "ordinary" nodes (that don't
2003 * need to know whether the rest of the match failed) by a loop instead of
2004 * by recursion.
2005 */
2006/* [lwall] I've hoisted the register declarations to the outer block in order to
2007 * maybe save a little bit of pushing and popping on the stack. It also takes
2008 * advantage of machines that use a register save mask on subroutine entry.
2009 */
2010STATIC I32 /* 0 failure, 1 success */
2011S_regmatch(pTHX_ regnode *prog)
2012{
d6a28714
JH
2013 register regnode *scan; /* Current node. */
2014 regnode *next; /* Next node. */
2015 regnode *inner; /* Next node in internal branch. */
2016 register I32 nextchr; /* renamed nextchr - nextchar colides with
2017 function of same name */
2018 register I32 n; /* no or next */
b7953727
JH
2019 register I32 ln = 0; /* len or last */
2020 register char *s = Nullch; /* operand or save */
d6a28714 2021 register char *locinput = PL_reginput;
b7953727 2022 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2023 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2024 I32 unwind = 0;
b7953727 2025#if 0
02db2b7b 2026 I32 firstcp = PL_savestack_ix;
b7953727 2027#endif
53c4c00c 2028 register bool do_utf8 = PL_reg_match_utf8;
02db2b7b 2029
d6a28714
JH
2030#ifdef DEBUGGING
2031 PL_regindent++;
2032#endif
2033
2034 /* Note that nextchr is a byte even in UTF */
2035 nextchr = UCHARAT(locinput);
2036 scan = prog;
2037 while (scan != NULL) {
8ba1375e 2038
d6a28714
JH
2039 DEBUG_r( {
2040 SV *prop = sv_newmortal();
2041 int docolor = *PL_colors[0];
2042 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2043 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2044 /* The part of the string before starttry has one color
2045 (pref0_len chars), between starttry and current
2046 position another one (pref_len - pref0_len chars),
2047 after the current position the third one.
2048 We assume that pref0_len <= pref_len, otherwise we
2049 decrease pref0_len. */
9041c2e3 2050 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2051 ? (5 + taill) - l : locinput - PL_bostr;
2052 int pref0_len;
d6a28714 2053
1aa99e6b
IH
2054 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2055 pref_len++;
2056 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2057 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2058 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2059 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2060 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2061 l--;
d6a28714
JH
2062 if (pref0_len < 0)
2063 pref0_len = 0;
2064 if (pref0_len > pref_len)
2065 pref0_len = pref_len;
2066 regprop(prop, scan);
9041c2e3 2067 PerlIO_printf(Perl_debug_log,
b900a521 2068 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2069 (IV)(locinput - PL_bostr),
2070 PL_colors[4], pref0_len,
d6a28714 2071 locinput - pref_len, PL_colors[5],
9041c2e3 2072 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2073 locinput - pref_len + pref0_len, PL_colors[3],
2074 (docolor ? "" : "> <"),
2075 PL_colors[0], l, locinput, PL_colors[1],
2076 15 - l - pref_len + 1,
2077 "",
b900a521 2078 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2079 SvPVX(prop));
2080 } );
2081
2082 next = scan + NEXT_OFF(scan);
2083 if (next == scan)
2084 next = NULL;
2085
2086 switch (OP(scan)) {
2087 case BOL:
12d33761
HS
2088 if (locinput == PL_bostr || (PL_multiline &&
2089 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2090 {
2091 /* regtill = regbol; */
b8c5462f
JH
2092 break;
2093 }
d6a28714
JH
2094 sayNO;
2095 case MBOL:
12d33761
HS
2096 if (locinput == PL_bostr ||
2097 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2098 {
b8c5462f
JH
2099 break;
2100 }
d6a28714
JH
2101 sayNO;
2102 case SBOL:
c2a73568 2103 if (locinput == PL_bostr)
b8c5462f 2104 break;
d6a28714
JH
2105 sayNO;
2106 case GPOS:
2107 if (locinput == PL_reg_ganch)
2108 break;
2109 sayNO;
2110 case EOL:
2111 if (PL_multiline)
2112 goto meol;
2113 else
2114 goto seol;
2115 case MEOL:
2116 meol:
2117 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2118 sayNO;
b8c5462f 2119 break;
d6a28714
JH
2120 case SEOL:
2121 seol:
2122 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2123 sayNO;
d6a28714 2124 if (PL_regeol - locinput > 1)
b8c5462f 2125 sayNO;
b8c5462f 2126 break;
d6a28714
JH
2127 case EOS:
2128 if (PL_regeol != locinput)
b8c5462f 2129 sayNO;
d6a28714 2130 break;
ffc61ed2 2131 case SANY:
d6a28714 2132 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2133 sayNO;
f33976b4
DB
2134 if (do_utf8) {
2135 locinput += PL_utf8skip[nextchr];
2136 if (locinput > PL_regeol)
2137 sayNO;
2138 nextchr = UCHARAT(locinput);
2139 }
2140 else
2141 nextchr = UCHARAT(++locinput);
2142 break;
2143 case CANY:
2144 if (!nextchr && locinput >= PL_regeol)
2145 sayNO;
b8c5462f 2146 nextchr = UCHARAT(++locinput);
a0d0e21e 2147 break;
ffc61ed2 2148 case REG_ANY:
1aa99e6b
IH
2149 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2150 sayNO;
2151 if (do_utf8) {
b8c5462f 2152 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2153 if (locinput > PL_regeol)
2154 sayNO;
a0ed51b3 2155 nextchr = UCHARAT(locinput);
a0ed51b3 2156 }
1aa99e6b
IH
2157 else
2158 nextchr = UCHARAT(++locinput);
a0ed51b3 2159 break;
d6a28714 2160 case EXACT:
cd439c50
IZ
2161 s = STRING(scan);
2162 ln = STR_LEN(scan);
1aa99e6b
IH
2163 if (do_utf8 != (UTF!=0)) {
2164 char *l = locinput;
2165 char *e = s + ln;
2166 STRLEN len;
2167 if (do_utf8)
2168 while (s < e) {
2169 if (l >= PL_regeol)
2170 sayNO;
9041c2e3 2171 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2172 sayNO;
2173 s++;
2174 l += len;
2175 }
2176 else
2177 while (s < e) {
2178 if (l >= PL_regeol)
2179 sayNO;
9041c2e3 2180 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2181 sayNO;
2182 s += len;
2183 l++;
2184 }
2185 locinput = l;
2186 nextchr = UCHARAT(locinput);
2187 break;
2188 }
d6a28714
JH
2189 /* Inline the first character, for speed. */
2190 if (UCHARAT(s) != nextchr)
2191 sayNO;
2192 if (PL_regeol - locinput < ln)
2193 sayNO;
2194 if (ln > 1 && memNE(s, locinput, ln))
2195 sayNO;
2196 locinput += ln;
2197 nextchr = UCHARAT(locinput);
2198 break;
2199 case EXACTFL:
b8c5462f
JH
2200 PL_reg_flags |= RF_tainted;
2201 /* FALL THROUGH */
d6a28714 2202 case EXACTF:
cd439c50
IZ
2203 s = STRING(scan);
2204 ln = STR_LEN(scan);
d6a28714 2205
1aa99e6b 2206 if (do_utf8) {
d6a28714 2207 char *l = locinput;
1aa99e6b 2208 char *e;
a2a2844f
JH
2209 STRLEN ulen;
2210 U8 tmpbuf[UTF8_MAXLEN*2+1];
1aa99e6b 2211 e = s + ln;
d6a28714 2212 while (s < e) {
a2a2844f 2213 if (l >= PL_regeol)
d6a28714 2214 sayNO;
a2a2844f
JH
2215 toLOWER_utf8((U8*)l, tmpbuf, &ulen);
2216 if (memNE(s, tmpbuf, ulen))
2217 sayNO;
2218 s += UTF8SKIP(s);
2219 l += ulen;
b8c5462f 2220 }
d6a28714 2221 locinput = l;
a0ed51b3
LW
2222 nextchr = UCHARAT(locinput);
2223 break;
2224 }
d6a28714
JH
2225
2226 /* Inline the first character, for speed. */
2227 if (UCHARAT(s) != nextchr &&
2228 UCHARAT(s) != ((OP(scan) == EXACTF)
2229 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2230 sayNO;
d6a28714 2231 if (PL_regeol - locinput < ln)
b8c5462f 2232 sayNO;
d6a28714
JH
2233 if (ln > 1 && (OP(scan) == EXACTF
2234 ? ibcmp(s, locinput, ln)
2235 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2236 sayNO;
d6a28714
JH
2237 locinput += ln;
2238 nextchr = UCHARAT(locinput);
a0d0e21e 2239 break;
d6a28714 2240 case ANYOF:
ffc61ed2
JH
2241 if (do_utf8) {
2242 if (!reginclass(scan, (U8*)locinput, do_utf8))
2243 sayNO;
2244 if (locinput >= PL_regeol)
2245 sayNO;
2246 locinput += PL_utf8skip[nextchr];
b8c5462f 2247 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2248 }
2249 else {
2250 if (nextchr < 0)
2251 nextchr = UCHARAT(locinput);
2252 if (!reginclass(scan, (U8*)locinput, do_utf8))
2253 sayNO;
2254 if (!nextchr && locinput >= PL_regeol)
2255 sayNO;
2256 nextchr = UCHARAT(++locinput);
2257 }
b8c5462f 2258 break;
d6a28714 2259 case ALNUML:
b8c5462f
JH
2260 PL_reg_flags |= RF_tainted;
2261 /* FALL THROUGH */
d6a28714 2262 case ALNUM:
b8c5462f 2263 if (!nextchr)
4633a7c4 2264 sayNO;
ffc61ed2 2265 if (do_utf8) {
ad24be35 2266 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2267 if (!(OP(scan) == ALNUM
3568d838 2268 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2269 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2270 {
2271 sayNO;
a0ed51b3 2272 }
b8c5462f 2273 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2274 nextchr = UCHARAT(locinput);
2275 break;
2276 }
ffc61ed2 2277 if (!(OP(scan) == ALNUM
d6a28714 2278 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2279 sayNO;
b8c5462f 2280 nextchr = UCHARAT(++locinput);
a0d0e21e 2281 break;
d6a28714 2282 case NALNUML:
b8c5462f
JH
2283 PL_reg_flags |= RF_tainted;
2284 /* FALL THROUGH */
d6a28714
JH
2285 case NALNUM:
2286 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2287 sayNO;
ffc61ed2 2288 if (do_utf8) {
8269fa76 2289 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2290 if (OP(scan) == NALNUM
3568d838 2291 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2292 : isALNUM_LC_utf8((U8*)locinput))
2293 {
b8c5462f 2294 sayNO;
d6a28714 2295 }
b8c5462f
JH
2296 locinput += PL_utf8skip[nextchr];
2297 nextchr = UCHARAT(locinput);
2298 break;
2299 }
ffc61ed2 2300 if (OP(scan) == NALNUM
d6a28714 2301 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2302 sayNO;
76e3520e 2303 nextchr = UCHARAT(++locinput);
a0d0e21e 2304 break;
d6a28714
JH
2305 case BOUNDL:
2306 case NBOUNDL:
3280af22 2307 PL_reg_flags |= RF_tainted;
bbce6d69 2308 /* FALL THROUGH */
d6a28714
JH
2309 case BOUND:
2310 case NBOUND:
2311 /* was last char in word? */
ffc61ed2 2312 if (do_utf8) {
12d33761
HS
2313 if (locinput == PL_bostr)
2314 ln = '\n';
ffc61ed2
JH
2315 else {
2316 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2317
2b9d42f0 2318 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2319 }
2320 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2321 ln = isALNUM_uni(ln);
8269fa76 2322 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2323 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2324 }
2325 else {
9041c2e3 2326 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2327 n = isALNUM_LC_utf8((U8*)locinput);
2328 }
a0ed51b3 2329 }
d6a28714 2330 else {
12d33761
HS
2331 ln = (locinput != PL_bostr) ?
2332 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2333 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2334 ln = isALNUM(ln);
2335 n = isALNUM(nextchr);
2336 }
2337 else {
2338 ln = isALNUM_LC(ln);
2339 n = isALNUM_LC(nextchr);
2340 }
d6a28714 2341 }
ffc61ed2
JH
2342 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2343 OP(scan) == BOUNDL))
2344 sayNO;
a0ed51b3 2345 break;
d6a28714 2346 case SPACEL:
3280af22 2347 PL_reg_flags |= RF_tainted;
bbce6d69 2348 /* FALL THROUGH */
d6a28714 2349 case SPACE:
9442cb0e 2350 if (!nextchr)
4633a7c4 2351 sayNO;
1aa99e6b 2352 if (do_utf8) {
fd400ab9 2353 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2354 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2355 if (!(OP(scan) == SPACE
3568d838 2356 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2357 : isSPACE_LC_utf8((U8*)locinput)))
2358 {
2359 sayNO;
2360 }
2361 locinput += PL_utf8skip[nextchr];
2362 nextchr = UCHARAT(locinput);
2363 break;
d6a28714 2364 }
ffc61ed2
JH
2365 if (!(OP(scan) == SPACE
2366 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2367 sayNO;
2368 nextchr = UCHARAT(++locinput);
2369 }
2370 else {
2371 if (!(OP(scan) == SPACE
2372 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2373 sayNO;
2374 nextchr = UCHARAT(++locinput);
a0ed51b3 2375 }
a0ed51b3 2376 break;
d6a28714 2377 case NSPACEL:
3280af22 2378 PL_reg_flags |= RF_tainted;
bbce6d69 2379 /* FALL THROUGH */
d6a28714 2380 case NSPACE:
9442cb0e 2381 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2382 sayNO;
1aa99e6b 2383 if (do_utf8) {
8269fa76 2384 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2385 if (OP(scan) == NSPACE
3568d838 2386 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2387 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2388 {
2389 sayNO;
2390 }
2391 locinput += PL_utf8skip[nextchr];
2392 nextchr = UCHARAT(locinput);
2393 break;
a0ed51b3 2394 }
ffc61ed2 2395 if (OP(scan) == NSPACE
d6a28714 2396 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2397 sayNO;
76e3520e 2398 nextchr = UCHARAT(++locinput);
a0d0e21e 2399 break;
d6a28714 2400 case DIGITL:
a0ed51b3
LW
2401 PL_reg_flags |= RF_tainted;
2402 /* FALL THROUGH */
d6a28714 2403 case DIGIT:
9442cb0e 2404 if (!nextchr)
a0ed51b3 2405 sayNO;
1aa99e6b 2406 if (do_utf8) {
8269fa76 2407 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2408 if (!(OP(scan) == DIGIT
3568d838 2409 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2410 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2411 {
a0ed51b3 2412 sayNO;
dfe13c55 2413 }
6f06b55f 2414 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2415 nextchr = UCHARAT(locinput);
2416 break;
2417 }
ffc61ed2 2418 if (!(OP(scan) == DIGIT
9442cb0e 2419 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2420 sayNO;
2421 nextchr = UCHARAT(++locinput);
2422 break;
d6a28714 2423 case NDIGITL:
b8c5462f
JH
2424 PL_reg_flags |= RF_tainted;
2425 /* FALL THROUGH */
d6a28714 2426 case NDIGIT:
9442cb0e 2427 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2428 sayNO;
1aa99e6b 2429 if (do_utf8) {
8269fa76 2430 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2431 if (OP(scan) == NDIGIT
3568d838 2432 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2433 : isDIGIT_LC_utf8((U8*)locinput))
2434 {
a0ed51b3 2435 sayNO;
9442cb0e 2436 }
6f06b55f 2437 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2438 nextchr = UCHARAT(locinput);
2439 break;
2440 }
ffc61ed2 2441 if (OP(scan) == NDIGIT
9442cb0e 2442 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2443 sayNO;
2444 nextchr = UCHARAT(++locinput);
2445 break;
2446 case CLUMP:
8269fa76 2447 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2448 if (locinput >= PL_regeol ||
2449 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2450 sayNO;
6f06b55f 2451 locinput += PL_utf8skip[nextchr];
3568d838
JH
2452 while (locinput < PL_regeol &&
2453 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2454 locinput += UTF8SKIP(locinput);
2455 if (locinput > PL_regeol)
2456 sayNO;
2457 nextchr = UCHARAT(locinput);
2458 break;
c8756f30 2459 case REFFL:
3280af22 2460 PL_reg_flags |= RF_tainted;
c8756f30 2461 /* FALL THROUGH */
c277df42 2462 case REF:
c8756f30 2463 case REFF:
c277df42 2464 n = ARG(scan); /* which paren pair */
cf93c79d 2465 ln = PL_regstartp[n];
2c2d71f5 2466 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2467 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2468 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2469 if (ln == PL_regendp[n])
a0d0e21e 2470 break;
a0ed51b3 2471
cf93c79d 2472 s = PL_bostr + ln;
1aa99e6b 2473 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2474 char *l = locinput;
cf93c79d 2475 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2476 /*
2477 * Note that we can't do the "other character" lookup trick as
2478 * in the 8-bit case (no pun intended) because in Unicode we
2479 * have to map both upper and title case to lower case.
2480 */
2481 if (OP(scan) == REFF) {
a2a2844f
JH
2482 STRLEN ulen1, ulen2;
2483 U8 tmpbuf1[UTF8_MAXLEN*2+1];
2484 U8 tmpbuf2[UTF8_MAXLEN*2+1];
a0ed51b3
LW
2485 while (s < e) {
2486 if (l >= PL_regeol)
2487 sayNO;
a2a2844f
JH
2488 toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
2489 toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
2490 if (ulen1 != ulen2 || memNE(tmpbuf1, tmpbuf2, ulen1))
a0ed51b3 2491 sayNO;
a2a2844f
JH
2492 s += ulen1;
2493 l += ulen2;
a0ed51b3
LW
2494 }
2495 }
2496 locinput = l;
2497 nextchr = UCHARAT(locinput);
2498 break;
2499 }
2500
a0d0e21e 2501 /* Inline the first character, for speed. */
76e3520e 2502 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2503 (OP(scan) == REF ||
2504 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2505 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2506 sayNO;
cf93c79d 2507 ln = PL_regendp[n] - ln;
3280af22 2508 if (locinput + ln > PL_regeol)
4633a7c4 2509 sayNO;
c8756f30
AK
2510 if (ln > 1 && (OP(scan) == REF
2511 ? memNE(s, locinput, ln)
2512 : (OP(scan) == REFF
2513 ? ibcmp(s, locinput, ln)
2514 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2515 sayNO;
a0d0e21e 2516 locinput += ln;
76e3520e 2517 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2518 break;
2519
2520 case NOTHING:
c277df42 2521 case TAIL:
a0d0e21e
LW
2522 break;
2523 case BACK:
2524 break;
c277df42
IZ
2525 case EVAL:
2526 {
2527 dSP;
533c011a 2528 OP_4tree *oop = PL_op;
3280af22
NIS
2529 COP *ocurcop = PL_curcop;
2530 SV **ocurpad = PL_curpad;
c277df42 2531 SV *ret;
9041c2e3 2532
c277df42 2533 n = ARG(scan);
533c011a 2534 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2535 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2536 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2537 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2538
8e5e9ebe
RGS
2539 {
2540 SV **before = SP;
2541 CALLRUNOPS(aTHX); /* Scalar context. */
2542 SPAGAIN;
2543 if (SP == before)
2544 ret = Nullsv; /* protect against empty (?{}) blocks. */
2545 else {
2546 ret = POPs;
2547 PUTBACK;
2548 }
2549 }
2550
0f5d15d6
IZ
2551 PL_op = oop;
2552 PL_curpad = ocurpad;
2553 PL_curcop = ocurcop;
c277df42 2554 if (logical) {
0f5d15d6
IZ
2555 if (logical == 2) { /* Postponed subexpression. */
2556 regexp *re;
22c35a8c 2557 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2558 re_cc_state state;
0f5d15d6
IZ
2559 CHECKPOINT cp, lastcp;
2560
2561 if(SvROK(ret) || SvRMAGICAL(ret)) {
2562 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2563
2564 if(SvMAGICAL(sv))
14befaf4 2565 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2566 }
2567 if (mg) {
2568 re = (regexp *)mg->mg_obj;
df0003d4 2569 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2570 }
2571 else {
2572 STRLEN len;
2573 char *t = SvPV(ret, len);
2574 PMOP pm;
2575 char *oprecomp = PL_regprecomp;
2576 I32 osize = PL_regsize;
2577 I32 onpar = PL_regnpar;
2578
5fcd1c1b 2579 Zero(&pm, 1, PMOP);
cea2e8a9 2580 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2581 if (!(SvFLAGS(ret)
0f5d15d6 2582 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2583 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2584 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2585 PL_regprecomp = oprecomp;
2586 PL_regsize = osize;
2587 PL_regnpar = onpar;
2588 }
2589 DEBUG_r(
9041c2e3 2590 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2591 "Entering embedded `%s%.60s%s%s'\n",
2592 PL_colors[0],
2593 re->precomp,
2594 PL_colors[1],
2595 (strlen(re->precomp) > 60 ? "..." : ""))
2596 );
2597 state.node = next;
2598 state.prev = PL_reg_call_cc;
2599 state.cc = PL_regcc;
2600 state.re = PL_reg_re;
2601
2ab05381 2602 PL_regcc = 0;
9041c2e3 2603
0f5d15d6 2604 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2605 REGCP_SET(lastcp);
0f5d15d6
IZ
2606 cache_re(re);
2607 state.ss = PL_savestack_ix;
2608 *PL_reglastparen = 0;
a01268b5 2609 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2610 PL_reg_call_cc = &state;
2611 PL_reginput = locinput;
2c2d71f5
JH
2612
2613 /* XXXX This is too dramatic a measure... */
2614 PL_reg_maxiter = 0;
2615
0f5d15d6 2616 if (regmatch(re->program + 1)) {
2c914db6
IZ
2617 /* Even though we succeeded, we need to restore
2618 global variables, since we may be wrapped inside
2619 SUSPEND, thus the match may be not finished yet. */
2620
2621 /* XXXX Do this only if SUSPENDed? */
2622 PL_reg_call_cc = state.prev;
2623 PL_regcc = state.cc;
2624 PL_reg_re = state.re;
2625 cache_re(PL_reg_re);
2626
2627 /* XXXX This is too dramatic a measure... */
2628 PL_reg_maxiter = 0;
2629
2630 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2631 ReREFCNT_dec(re);
2632 regcpblow(cp);
2633 sayYES;
2634 }
0f5d15d6 2635 ReREFCNT_dec(re);
02db2b7b 2636 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2637 regcppop();
2638 PL_reg_call_cc = state.prev;
2639 PL_regcc = state.cc;
2640 PL_reg_re = state.re;
d3790889 2641 cache_re(PL_reg_re);
2c2d71f5
JH
2642
2643 /* XXXX This is too dramatic a measure... */
2644 PL_reg_maxiter = 0;
2645
8e514ae6 2646 logical = 0;
0f5d15d6
IZ
2647 sayNO;
2648 }
c277df42 2649 sw = SvTRUE(ret);
0f5d15d6 2650 logical = 0;
a0ed51b3
LW
2651 }
2652 else
3280af22 2653 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2654 break;
2655 }
a0d0e21e 2656 case OPEN:
c277df42 2657 n = ARG(scan); /* which paren pair */
3280af22
NIS
2658 PL_reg_start_tmp[n] = locinput;
2659 if (n > PL_regsize)
2660 PL_regsize = n;
a0d0e21e
LW
2661 break;
2662 case CLOSE:
c277df42 2663 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2664 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2665 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2666 if (n > *PL_reglastparen)
2667 *PL_reglastparen = n;
a01268b5 2668 *PL_reglastcloseparen = n;
a0d0e21e 2669 break;
c277df42
IZ
2670 case GROUPP:
2671 n = ARG(scan); /* which paren pair */
cf93c79d 2672 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2673 break;
2674 case IFTHEN:
2c2d71f5 2675 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2676 if (sw)
2677 next = NEXTOPER(NEXTOPER(scan));
2678 else {
2679 next = scan + ARG(scan);
2680 if (OP(next) == IFTHEN) /* Fake one. */
2681 next = NEXTOPER(NEXTOPER(next));
2682 }
2683 break;
2684 case LOGICAL:
0f5d15d6 2685 logical = scan->flags;
c277df42 2686 break;
2ab05381
IZ
2687/*******************************************************************
2688 PL_regcc contains infoblock about the innermost (...)* loop, and
2689 a pointer to the next outer infoblock.
2690
2691 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2692
2693 1) After matching X, regnode for CURLYX is processed;
2694
9041c2e3 2695 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2696 regmatch() recursively with the starting point at WHILEM node;
2697
2698 3) Each hit of WHILEM node tries to match A and Z (in the order
2699 depending on the current iteration, min/max of {min,max} and
2700 greediness). The information about where are nodes for "A"
2701 and "Z" is read from the infoblock, as is info on how many times "A"
2702 was already matched, and greediness.
2703
2704 4) After A matches, the same WHILEM node is hit again.
2705
2706 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2707 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2708 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2709 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2710 of the external loop.
2711
2712 Currently present infoblocks form a tree with a stem formed by PL_curcc
2713 and whatever it mentions via ->next, and additional attached trees
2714 corresponding to temporarily unset infoblocks as in "5" above.
2715
9041c2e3 2716 In the following picture infoblocks for outer loop of
2ab05381
IZ
2717 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2718 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2719 infoblocks are drawn below the "reset" infoblock.
2720
2721 In fact in the picture below we do not show failed matches for Z and T
2722 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2723 more obvious *why* one needs to *temporary* unset infoblocks.]
2724
2725 Matched REx position InfoBlocks Comment
2726 (Y(A)*?Z)*?T x
2727 Y(A)*?Z)*?T x <- O
2728 Y (A)*?Z)*?T x <- O
2729 Y A)*?Z)*?T x <- O <- I
2730 YA )*?Z)*?T x <- O <- I
2731 YA A)*?Z)*?T x <- O <- I
2732 YAA )*?Z)*?T x <- O <- I
2733 YAA Z)*?T x <- O # Temporary unset I
2734 I
2735
2736 YAAZ Y(A)*?Z)*?T x <- O
2737 I
2738
2739 YAAZY (A)*?Z)*?T x <- O
2740 I
2741
2742 YAAZY A)*?Z)*?T x <- O <- I
2743 I
2744
2745 YAAZYA )*?Z)*?T x <- O <- I
2746 I
2747
2748 YAAZYA Z)*?T x <- O # Temporary unset I
2749 I,I
2750
2751 YAAZYAZ )*?T x <- O
2752 I,I
2753
2754 YAAZYAZ T x # Temporary unset O
2755 O
2756 I,I
2757
2758 YAAZYAZT x
2759 O
2760 I,I
2761 *******************************************************************/
a0d0e21e
LW
2762 case CURLYX: {
2763 CURCUR cc;
3280af22 2764 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2765 /* No need to save/restore up to this paren */
2766 I32 parenfloor = scan->flags;
c277df42
IZ
2767
2768 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2769 next += ARG(next);
3280af22
NIS
2770 cc.oldcc = PL_regcc;
2771 PL_regcc = &cc;
cb434fcc
IZ
2772 /* XXXX Probably it is better to teach regpush to support
2773 parenfloor > PL_regsize... */
2774 if (parenfloor > *PL_reglastparen)
2775 parenfloor = *PL_reglastparen; /* Pessimization... */
2776 cc.parenfloor = parenfloor;
a0d0e21e
LW
2777 cc.cur = -1;
2778 cc.min = ARG1(scan);
2779 cc.max = ARG2(scan);
c277df42 2780 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2781 cc.next = next;
2782 cc.minmod = minmod;
2783 cc.lastloc = 0;
3280af22 2784 PL_reginput = locinput;
a0d0e21e
LW
2785 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2786 regcpblow(cp);
3280af22 2787 PL_regcc = cc.oldcc;
4633a7c4 2788 saySAME(n);
a0d0e21e
LW
2789 }
2790 /* NOT REACHED */
2791 case WHILEM: {
2792 /*
2793 * This is really hard to understand, because after we match
2794 * what we're trying to match, we must make sure the rest of
2c2d71f5 2795 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2796 * to go back UP the parse tree by recursing ever deeper. And
2797 * if it fails, we have to reset our parent's current state
2798 * that we can try again after backing off.
2799 */
2800
c277df42 2801 CHECKPOINT cp, lastcp;
3280af22 2802 CURCUR* cc = PL_regcc;
c277df42
IZ
2803 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2804
4633a7c4 2805 n = cc->cur + 1; /* how many we know we matched */
3280af22 2806 PL_reginput = locinput;
a0d0e21e 2807
c277df42 2808 DEBUG_r(
9041c2e3
NIS
2809 PerlIO_printf(Perl_debug_log,
2810 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2811 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2812 (long)n, (long)cc->min,
c277df42
IZ
2813 (long)cc->max, (long)cc)
2814 );
4633a7c4 2815
a0d0e21e
LW
2816 /* If degenerate scan matches "", assume scan done. */
2817
579cf2c3 2818 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2819 PL_regcc = cc->oldcc;
2ab05381
IZ
2820 if (PL_regcc)
2821 ln = PL_regcc->cur;
c277df42 2822 DEBUG_r(
c3464db5
DD
2823 PerlIO_printf(Perl_debug_log,
2824 "%*s empty match detected, try continuation...\n",
3280af22 2825 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2826 );
a0d0e21e 2827 if (regmatch(cc->next))
4633a7c4 2828 sayYES;
2ab05381
IZ
2829 if (PL_regcc)
2830 PL_regcc->cur = ln;
3280af22 2831 PL_regcc = cc;
4633a7c4 2832 sayNO;
a0d0e21e
LW
2833 }
2834
2835 /* First just match a string of min scans. */
2836
2837 if (n < cc->min) {
2838 cc->cur = n;
2839 cc->lastloc = locinput;
4633a7c4
LW
2840 if (regmatch(cc->scan))
2841 sayYES;
2842 cc->cur = n - 1;
c277df42 2843 cc->lastloc = lastloc;
4633a7c4 2844 sayNO;
a0d0e21e
LW
2845 }
2846
2c2d71f5
JH
2847 if (scan->flags) {
2848 /* Check whether we already were at this position.
2849 Postpone detection until we know the match is not
2850 *that* much linear. */
2851 if (!PL_reg_maxiter) {
2852 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2853 PL_reg_leftiter = PL_reg_maxiter;
2854 }
2855 if (PL_reg_leftiter-- == 0) {
2856 I32 size = (PL_reg_maxiter + 7)/8;
2857 if (PL_reg_poscache) {
2858 if (PL_reg_poscache_size < size) {
2859 Renew(PL_reg_poscache, size, char);
2860 PL_reg_poscache_size = size;
2861 }
2862 Zero(PL_reg_poscache, size, char);
2863 }
2864 else {
2865 PL_reg_poscache_size = size;
2866 Newz(29, PL_reg_poscache, size, char);
2867 }
2868 DEBUG_r(
2869 PerlIO_printf(Perl_debug_log,
2870 "%sDetected a super-linear match, switching on caching%s...\n",
2871 PL_colors[4], PL_colors[5])
2872 );
2873 }
2874 if (PL_reg_leftiter < 0) {
2875 I32 o = locinput - PL_bostr, b;
2876
2877 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2878 b = o % 8;
2879 o /= 8;
2880 if (PL_reg_poscache[o] & (1<<b)) {
2881 DEBUG_r(
2882 PerlIO_printf(Perl_debug_log,
2883 "%*s already tried at this position...\n",
2884 REPORT_CODE_OFF+PL_regindent*2, "")
2885 );
7821416a 2886 sayNO_SILENT;
2c2d71f5
JH
2887 }
2888 PL_reg_poscache[o] |= (1<<b);
2889 }
2890 }
2891
a0d0e21e
LW
2892 /* Prefer next over scan for minimal matching. */
2893
2894 if (cc->minmod) {
3280af22 2895 PL_regcc = cc->oldcc;
2ab05381
IZ
2896 if (PL_regcc)
2897 ln = PL_regcc->cur;
5f05dabc 2898 cp = regcppush(cc->parenfloor);
02db2b7b 2899 REGCP_SET(lastcp);
5f05dabc 2900 if (regmatch(cc->next)) {
c277df42 2901 regcpblow(cp);
4633a7c4 2902 sayYES; /* All done. */
5f05dabc 2903 }
02db2b7b 2904 REGCP_UNWIND(lastcp);
5f05dabc 2905 regcppop();
2ab05381
IZ
2906 if (PL_regcc)
2907 PL_regcc->cur = ln;
3280af22 2908 PL_regcc = cc;
a0d0e21e 2909
c277df42 2910 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2911 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2912 && !(PL_reg_flags & RF_warned)) {
2913 PL_reg_flags |= RF_warned;
e476b1b5 2914 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2915 "Complex regular subexpression recursion",
2916 REG_INFTY - 1);
c277df42 2917 }
4633a7c4 2918 sayNO;
c277df42 2919 }
a687059c 2920
c277df42 2921 DEBUG_r(
c3464db5
DD
2922 PerlIO_printf(Perl_debug_log,
2923 "%*s trying longer...\n",
3280af22 2924 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2925 );
a0d0e21e 2926 /* Try scanning more and see if it helps. */
3280af22 2927 PL_reginput = locinput;
a0d0e21e
LW
2928 cc->cur = n;
2929 cc->lastloc = locinput;
5f05dabc 2930 cp = regcppush(cc->parenfloor);
02db2b7b 2931 REGCP_SET(lastcp);
5f05dabc 2932 if (regmatch(cc->scan)) {
c277df42 2933 regcpblow(cp);
4633a7c4 2934 sayYES;
5f05dabc 2935 }
02db2b7b 2936 REGCP_UNWIND(lastcp);
5f05dabc 2937 regcppop();
4633a7c4 2938 cc->cur = n - 1;
c277df42 2939 cc->lastloc = lastloc;
4633a7c4 2940 sayNO;
a0d0e21e
LW
2941 }
2942
2943 /* Prefer scan over next for maximal matching. */
2944
2945 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2946 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2947 cc->cur = n;
2948 cc->lastloc = locinput;
02db2b7b 2949 REGCP_SET(lastcp);
5f05dabc 2950 if (regmatch(cc->scan)) {
c277df42 2951 regcpblow(cp);
4633a7c4 2952 sayYES;
5f05dabc 2953 }
02db2b7b 2954 REGCP_UNWIND(lastcp);
a0d0e21e 2955 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2956 PL_reginput = locinput;
c277df42 2957 DEBUG_r(
c3464db5
DD
2958 PerlIO_printf(Perl_debug_log,
2959 "%*s failed, try continuation...\n",
3280af22 2960 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2961 );
2962 }
9041c2e3 2963 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2964 && !(PL_reg_flags & RF_warned)) {
3280af22 2965 PL_reg_flags |= RF_warned;
e476b1b5 2966 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2967 "Complex regular subexpression recursion",
2968 REG_INFTY - 1);
a0d0e21e
LW
2969 }
2970
2971 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2972 PL_regcc = cc->oldcc;
2ab05381
IZ
2973 if (PL_regcc)
2974 ln = PL_regcc->cur;
a0d0e21e 2975 if (regmatch(cc->next))
4633a7c4 2976 sayYES;
2ab05381
IZ
2977 if (PL_regcc)
2978 PL_regcc->cur = ln;
3280af22 2979 PL_regcc = cc;
4633a7c4 2980 cc->cur = n - 1;
c277df42 2981 cc->lastloc = lastloc;
4633a7c4 2982 sayNO;
a0d0e21e
LW
2983 }
2984 /* NOT REACHED */
9041c2e3 2985 case BRANCHJ:
c277df42
IZ
2986 next = scan + ARG(scan);
2987 if (next == scan)
2988 next = NULL;
2989 inner = NEXTOPER(NEXTOPER(scan));
2990 goto do_branch;
9041c2e3 2991 case BRANCH:
c277df42
IZ
2992 inner = NEXTOPER(scan);
2993 do_branch:
2994 {
c277df42
IZ
2995 c1 = OP(scan);
2996 if (OP(next) != c1) /* No choice. */
2997 next = inner; /* Avoid recursion. */
a0d0e21e 2998 else {
02db2b7b
IZ
2999 I32 lastparen = *PL_reglastparen;
3000 I32 unwind1;
3001 re_unwind_branch_t *uw;
3002
3003 /* Put unwinding data on stack */
3004 unwind1 = SSNEWt(1,re_unwind_branch_t);
3005 uw = SSPTRt(unwind1,re_unwind_branch_t);
3006 uw->prev = unwind;
3007 unwind = unwind1;
3008 uw->type = ((c1 == BRANCH)
3009 ? RE_UNWIND_BRANCH
3010 : RE_UNWIND_BRANCHJ);
3011 uw->lastparen = lastparen;
3012 uw->next = next;
3013 uw->locinput = locinput;
3014 uw->nextchr = nextchr;
3015#ifdef DEBUGGING
3016 uw->regindent = ++PL_regindent;
3017#endif
c277df42 3018
02db2b7b
IZ
3019 REGCP_SET(uw->lastcp);
3020
3021 /* Now go into the first branch */
3022 next = inner;
a687059c 3023 }
a0d0e21e
LW
3024 }
3025 break;
3026 case MINMOD:
3027 minmod = 1;
3028 break;
c277df42
IZ
3029 case CURLYM:
3030 {
00db4c45 3031 I32 l = 0;
c277df42 3032 CHECKPOINT lastcp;
9041c2e3 3033
c277df42
IZ
3034 /* We suppose that the next guy does not need
3035 backtracking: in particular, it is of constant length,
3036 and has no parenths to influence future backrefs. */
3037 ln = ARG1(scan); /* min to match */
3038 n = ARG2(scan); /* max to match */
c277df42
IZ
3039 paren = scan->flags;
3040 if (paren) {
3280af22
NIS
3041 if (paren > PL_regsize)
3042 PL_regsize = paren;
3043 if (paren > *PL_reglastparen)
3044 *PL_reglastparen = paren;
c277df42 3045 }
dc45a647 3046 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3047 if (paren)
3048 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3049 PL_reginput = locinput;
c277df42
IZ
3050 if (minmod) {
3051 minmod = 0;
3052 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3053 sayNO;
f31a99c8
HS
3054 /* if we matched something zero-length we don't need to
3055 backtrack - capturing parens are already defined, so
3056 the caveat in the maximal case doesn't apply
3057
3058 XXXX if ln == 0, we can redo this check first time
3059 through the following loop
3060 */
3061 if (ln && l == 0)
3062 n = ln; /* don't backtrack */
3280af22 3063 locinput = PL_reginput;
e2d8ce26 3064 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3065 regnode *text_node = next;
3066
3067 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3068 NEXT_IMPT(text_node);
5f80c4cf
JP
3069
3070 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3071 c1 = c2 = -1000;
3072 }
3073 else {
3074 c1 = (U8)*STRING(text_node);
3075 if (OP(next) == EXACTF)
3076 c2 = PL_fold[c1];
3077 else if (OP(text_node) == EXACTFL)
3078 c2 = PL_fold_locale[c1];
3079 else
3080 c2 = c1;
3081 }
a0ed51b3
LW
3082 }
3083 else
c277df42 3084 c1 = c2 = -1000;
02db2b7b 3085 REGCP_SET(lastcp);
5f4b28b2 3086 /* This may be improved if l == 0. */
c277df42
IZ
3087 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3088 /* If it could work, try it. */
3089 if (c1 == -1000 ||
3280af22
NIS
3090 UCHARAT(PL_reginput) == c1 ||
3091 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3092 {
3093 if (paren) {
f31a99c8 3094 if (ln) {
cf93c79d
IZ
3095 PL_regstartp[paren] =
3096 HOPc(PL_reginput, -l) - PL_bostr;
3097 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3098 }
3099 else
cf93c79d 3100 PL_regendp[paren] = -1;
c277df42
IZ
3101 }
3102 if (regmatch(next))
3103 sayYES;
02db2b7b 3104 REGCP_UNWIND(lastcp);
c277df42
IZ
3105 }
3106 /* Couldn't or didn't -- move forward. */
3280af22 3107 PL_reginput = locinput;
c277df42
IZ
3108 if (regrepeat_hard(scan, 1, &l)) {
3109 ln++;
3280af22 3110 locinput = PL_reginput;
c277df42
IZ
3111 }
3112 else
3113 sayNO;
3114 }
a0ed51b3
LW
3115 }
3116 else {
c277df42 3117 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3118 /* if we matched something zero-length we don't need to
3119 backtrack, unless the minimum count is zero and we
3120 are capturing the result - in that case the capture
3121 being defined or not may affect later execution
3122 */
3123 if (n != 0 && l == 0 && !(paren && ln == 0))
3124 ln = n; /* don't backtrack */
3280af22 3125 locinput = PL_reginput;
c277df42 3126 DEBUG_r(
5c0ca799 3127 PerlIO_printf(Perl_debug_log,
faccc32b 3128 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3129 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3130 (IV) n, (IV)l)
c277df42
IZ
3131 );
3132 if (n >= ln) {
e2d8ce26 3133 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3134 regnode *text_node = next;
3135
3136 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3137 NEXT_IMPT(text_node);
5f80c4cf
JP
3138
3139 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3140 c1 = c2 = -1000;
3141 }
3142 else {
3143 c1 = (U8)*STRING(text_node);
3144 if (OP(text_node) == EXACTF)
3145 c2 = PL_fold[c1];
3146 else if (OP(text_node) == EXACTFL)
3147 c2 = PL_fold_locale[c1];
3148 else
3149 c2 = c1;
3150 }
a0ed51b3
LW
3151 }
3152 else
c277df42
IZ
3153 c1 = c2 = -1000;
3154 }
02db2b7b 3155 REGCP_SET(lastcp);
c277df42
IZ
3156 while (n >= ln) {
3157 /* If it could work, try it. */
3158 if (c1 == -1000 ||
3280af22
NIS
3159 UCHARAT(PL_reginput) == c1 ||
3160 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3161 {
3162 DEBUG_r(
c3464db5 3163 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3164 "%*s trying tail with n=%"IVdf"...\n",
3165 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3166 );
3167 if (paren) {
3168 if (n) {
cf93c79d
IZ
3169 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3170 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3171 }
a0ed51b3 3172 else
cf93c79d 3173 PL_regendp[paren] = -1;
c277df42 3174 }
a0ed51b3
LW
3175 if (regmatch(next))
3176 sayYES;
02db2b7b 3177 REGCP_UNWIND(lastcp);
a0ed51b3 3178 }
c277df42
IZ
3179 /* Couldn't or didn't -- back up. */
3180 n--;
dfe13c55 3181 locinput = HOPc(locinput, -l);
3280af22 3182 PL_reginput = locinput;
c277df42
IZ
3183 }
3184 }
3185 sayNO;
3186 break;
3187 }
3188 case CURLYN:
3189 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3190 if (paren > PL_regsize)
3191 PL_regsize = paren;
3192 if (paren > *PL_reglastparen)
3193 *PL_reglastparen = paren;
c277df42
IZ
3194 ln = ARG1(scan); /* min to match */
3195 n = ARG2(scan); /* max to match */
dc45a647 3196 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3197 goto repeat;
a0d0e21e 3198 case CURLY:
c277df42 3199 paren = 0;
a0d0e21e
LW
3200 ln = ARG1(scan); /* min to match */
3201 n = ARG2(scan); /* max to match */
dc45a647 3202 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3203 goto repeat;
3204 case STAR:
3205 ln = 0;
c277df42 3206 n = REG_INFTY;
a0d0e21e 3207 scan = NEXTOPER(scan);
c277df42 3208 paren = 0;
a0d0e21e
LW
3209 goto repeat;
3210 case PLUS:
c277df42
IZ
3211 ln = 1;
3212 n = REG_INFTY;
3213 scan = NEXTOPER(scan);
3214 paren = 0;
3215 repeat:
a0d0e21e
LW
3216 /*
3217 * Lookahead to avoid useless match attempts
3218 * when we know what character comes next.
3219 */
5f80c4cf
JP
3220
3221 /*
3222 * Used to only do .*x and .*?x, but now it allows
3223 * for )'s, ('s and (?{ ... })'s to be in the way
3224 * of the quantifier and the EXACT-like node. -- japhy
3225 */
3226
e2d8ce26 3227 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3228 U8 *s;
3229 regnode *text_node = next;
3230
3231 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3232 NEXT_IMPT(text_node);
5f80c4cf
JP
3233
3234 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3235 c1 = c2 = -1000;
3236 }
3237 else {
3238 s = (U8*)STRING(text_node);
3239
3240 if (!UTF) {
3241 c2 = c1 = *s;
3242 if (OP(text_node) == EXACTF)
3243 c2 = PL_fold[c1];
3244 else if (OP(text_node) == EXACTFL)
3245 c2 = PL_fold_locale[c1];
1aa99e6b 3246 }
5f80c4cf
JP
3247 else { /* UTF */
3248 if (OP(text_node) == EXACTF) {
a2a2844f
JH
3249 STRLEN ulen1, ulen2;
3250 U8 tmpbuf1[UTF8_MAXLEN*2+1];
3251 U8 tmpbuf2[UTF8_MAXLEN*2+1];
3252
3253 to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
3254 to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
3255
3256 c1 = utf8_to_uvuni(tmpbuf1, 0);
3257 c2 = utf8_to_uvuni(tmpbuf2, 0);
5f80c4cf
JP
3258 }
3259 else {
3260 c2 = c1 = utf8_to_uvchr(s, NULL);
3261 }
1aa99e6b
IH
3262 }
3263 }
bbce6d69 3264 }
a0d0e21e 3265 else
bbce6d69 3266 c1 = c2 = -1000;
3280af22 3267 PL_reginput = locinput;
a0d0e21e 3268 if (minmod) {
c277df42 3269 CHECKPOINT lastcp;
a0d0e21e
LW
3270 minmod = 0;
3271 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3272 sayNO;
a0ed51b3 3273 locinput = PL_reginput;
02db2b7b 3274 REGCP_SET(lastcp);
0fe9bf95 3275 if (c1 != -1000) {
1aa99e6b 3276 char *e; /* Should not check after this */
0fe9bf95
IZ
3277 char *old = locinput;
3278
1aa99e6b 3279 if (n == REG_INFTY) {
0fe9bf95 3280 e = PL_regeol - 1;
1aa99e6b
IH
3281 if (do_utf8)
3282 while (UTF8_IS_CONTINUATION(*(U8*)e))
3283 e--;
3284 }
3285 else if (do_utf8) {
3286 int m = n - ln;
3287 for (e = locinput;
3288 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3289 e += UTF8SKIP(e);
3290 }
3291 else {
3292 e = locinput + n - ln;
3293 if (e >= PL_regeol)
3294 e = PL_regeol - 1;
3295 }
0fe9bf95 3296 while (1) {
1aa99e6b 3297 int count;
0fe9bf95 3298 /* Find place 'next' could work */
1aa99e6b
IH
3299 if (!do_utf8) {
3300 if (c1 == c2) {
a8e8ab15
JH
3301 while (locinput <= e &&
3302 UCHARAT(locinput) != c1)
1aa99e6b
IH
3303 locinput++;
3304 } else {
9041c2e3 3305 while (locinput <= e
a8e8ab15
JH
3306 && UCHARAT(locinput) != c1
3307 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3308 locinput++;
3309 }
3310 count = locinput - old;
3311 }
3312 else {
3313 STRLEN len;
3314 if (c1 == c2) {
3315 for (count = 0;
3316 locinput <= e &&
9041c2e3 3317 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3318 count++)
3319 locinput += len;
3320
3321 } else {
3322 for (count = 0; locinput <= e; count++) {
9041c2e3 3323 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3324 if (c == c1 || c == c2)
3325 break;
9041c2e3 3326 locinput += len;
1aa99e6b
IH
3327 }
3328 }
0fe9bf95 3329 }
9041c2e3 3330 if (locinput > e)
0fe9bf95
IZ
3331 sayNO;
3332 /* PL_reginput == old now */
3333 if (locinput != old) {
3334 ln = 1; /* Did some */
1aa99e6b 3335 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3336 sayNO;
3337 }
3338 /* PL_reginput == locinput now */
29d1e993 3339 TRYPAREN(paren, ln, locinput);
0fe9bf95 3340 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3341 REGCP_UNWIND(lastcp);
0fe9bf95 3342 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3343 old = locinput;
3344 if (do_utf8)
3345 locinput += UTF8SKIP(locinput);
3346 else
3347 locinput++;
0fe9bf95
IZ
3348 }
3349 }
3350 else
c277df42 3351 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3352 UV c;
3353 if (c1 != -1000) {
3354 if (do_utf8)
9041c2e3 3355 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3356 else
9041c2e3 3357 c = UCHARAT(PL_reginput);
2390ecbc
PP
3358 /* If it could work, try it. */
3359 if (c == c1 || c == c2)
3360 {
3361 TRYPAREN(paren, n, PL_reginput);
3362 REGCP_UNWIND(lastcp);
3363 }
1aa99e6b 3364 }
a0d0e21e 3365 /* If it could work, try it. */
2390ecbc 3366 else if (c1 == -1000)
bbce6d69 3367 {
29d1e993 3368 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3369 REGCP_UNWIND(lastcp);
bbce6d69 3370 }
c277df42 3371 /* Couldn't or didn't -- move forward. */
a0ed51b3 3372 PL_reginput = locinput;
a0d0e21e
LW
3373 if (regrepeat(scan, 1)) {
3374 ln++;
a0ed51b3
LW
3375 locinput = PL_reginput;
3376 }
3377 else
4633a7c4 3378 sayNO;
a0d0e21e
LW
3379 }
3380 }
3381 else {
c277df42 3382 CHECKPOINT lastcp;
a0d0e21e 3383 n = regrepeat(scan, n);
a0ed51b3 3384 locinput = PL_reginput;
22c35a8c 3385 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3386 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3387 ln = n; /* why back off? */
1aeab75a
GS
3388 /* ...because $ and \Z can match before *and* after
3389 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3390 We should back off by one in this case. */
3391 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3392 ln--;
3393 }
02db2b7b 3394 REGCP_SET(lastcp);
c277df42 3395 if (paren) {
8fa7f367 3396 UV c = 0;
c277df42 3397 while (n >= ln) {
1aa99e6b
IH
3398 if (c1 != -1000) {
3399 if (do_utf8)
9041c2e3 3400 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3401 else
9041c2e3 3402 c = UCHARAT(PL_reginput);
1aa99e6b 3403 }
c277df42 3404 /* If it could work, try it. */
1aa99e6b 3405 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3406 {
29d1e993 3407 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3408 REGCP_UNWIND(lastcp);
c277df42
IZ
3409 }
3410 /* Couldn't or didn't -- back up. */
3411 n--;
dfe13c55 3412 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3413 }
a0ed51b3
LW
3414 }
3415 else {
8fa7f367 3416 UV c = 0;
c277df42 3417 while (n >= ln) {
1aa99e6b
IH
3418 if (c1 != -1000) {
3419 if (do_utf8)
9041c2e3 3420 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3421 else
9041c2e3 3422 c = UCHARAT(PL_reginput);
1aa99e6b 3423 }
c277df42 3424 /* If it could work, try it. */
1aa99e6b 3425 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3426 {
29d1e993 3427 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3428 REGCP_UNWIND(lastcp);
c277df42
IZ
3429 }
3430 /* Couldn't or didn't -- back up. */
3431 n--;
dfe13c55 3432 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3433 }
a0d0e21e
LW
3434 }
3435 }
4633a7c4 3436 sayNO;
c277df42 3437 break;
a0d0e21e 3438 case END:
0f5d15d6
IZ
3439 if (PL_reg_call_cc) {
3440 re_cc_state *cur_call_cc = PL_reg_call_cc;
3441 CURCUR *cctmp = PL_regcc;
3442 regexp *re = PL_reg_re;
3443 CHECKPOINT cp, lastcp;
3444
3445 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3446 REGCP_SET(lastcp);
0f5d15d6
IZ
3447 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3448 the caller. */
3449 PL_reginput = locinput; /* Make position available to
3450 the callcc. */
3451 cache_re(PL_reg_call_cc->re);
3452 PL_regcc = PL_reg_call_cc->cc;
3453 PL_reg_call_cc = PL_reg_call_cc->prev;
3454 if (regmatch(cur_call_cc->node)) {
3455 PL_reg_call_cc = cur_call_cc;
3456 regcpblow(cp);
3457 sayYES;
3458 }
02db2b7b 3459 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3460 regcppop();
3461 PL_reg_call_cc = cur_call_cc;
3462 PL_regcc = cctmp;
3463 PL_reg_re = re;
3464 cache_re(re);
3465
3466 DEBUG_r(
3467 PerlIO_printf(Perl_debug_log,
3468 "%*s continuation failed...\n",
3469 REPORT_CODE_OFF+PL_regindent*2, "")
3470 );
7821416a 3471 sayNO_SILENT;
0f5d15d6 3472 }
7821416a
IZ
3473 if (locinput < PL_regtill) {
3474 DEBUG_r(PerlIO_printf(Perl_debug_log,
3475 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3476 PL_colors[4],
3477 (long)(locinput - PL_reg_starttry),
3478 (long)(PL_regtill - PL_reg_starttry),
3479 PL_colors[5]));
3480 sayNO_FINAL; /* Cannot match: too short. */
3481 }
3482 PL_reginput = locinput; /* put where regtry can find it */
3483 sayYES_FINAL; /* Success! */
7e5428c5 3484 case SUCCEED:
3280af22 3485 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3486 sayYES_LOUD; /* Success! */
c277df42
IZ
3487 case SUSPEND:
3488 n = 1;
9fe1d20c 3489 PL_reginput = locinput;
9041c2e3 3490 goto do_ifmatch;
a0d0e21e 3491 case UNLESSM:
c277df42 3492 n = 0;
a0ed51b3 3493 if (scan->flags) {
efb30f32
HS
3494 s = HOPBACKc(locinput, scan->flags);
3495 if (!s)
3496 goto say_yes;
3497 PL_reginput = s;
a0ed51b3
LW
3498 }
3499 else
3500 PL_reginput = locinput;
c277df42
IZ
3501 goto do_ifmatch;
3502 case IFMATCH:
3503 n = 1;
a0ed51b3 3504 if (scan->flags) {
efb30f32
HS
3505 s = HOPBACKc(locinput, scan->flags);
3506 if (!s)
3507 goto say_no;
3508 PL_reginput = s;
a0ed51b3
LW
3509 }
3510 else
3511 PL_reginput = locinput;
3512
c277df42 3513 do_ifmatch:
c277df42
IZ
3514 inner = NEXTOPER(NEXTOPER(scan));
3515 if (regmatch(inner) != n) {
3516 say_no:
3517 if (logical) {
3518 logical = 0;
3519 sw = 0;
3520 goto do_longjump;
a0ed51b3
LW
3521 }
3522 else
c277df42
IZ
3523 sayNO;
3524 }
3525 say_yes:
3526 if (logical) {
3527 logical = 0;
3528 sw = 1;
3529 }
fe44a5e8 3530 if (OP(scan) == SUSPEND) {
3280af22 3531 locinput = PL_reginput;
565764a8 3532 nextchr = UCHARAT(locinput);
fe44a5e8 3533 }
c277df42
IZ
3534 /* FALL THROUGH. */
3535 case LONGJMP:
3536 do_longjump:
3537 next = scan + ARG(scan);
3538 if (next == scan)
3539 next = NULL;
a0d0e21e
LW
3540 break;
3541 default:
b900a521 3542 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3543 PTR2UV(scan), OP(scan));
cea2e8a9 3544 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3545 }
02db2b7b 3546 reenter:
a0d0e21e
LW
3547 scan = next;
3548 }
a687059c 3549
a0d0e21e
LW
3550 /*
3551 * We get here only if there's trouble -- normally "case END" is
3552 * the terminating point.
3553 */
cea2e8a9 3554 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3555 /*NOTREACHED*/
4633a7c4
LW
3556 sayNO;
3557
7821416a
IZ
3558yes_loud:
3559 DEBUG_r(
3560 PerlIO_printf(Perl_debug_log,
3561 "%*s %scould match...%s\n",
3562 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3563 );
3564 goto yes;
3565yes_final:
3566 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3567 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3568yes:
3569#ifdef DEBUGGING
3280af22 3570 PL_regindent--;
4633a7c4 3571#endif
02db2b7b
IZ
3572
3573#if 0 /* Breaks $^R */
3574 if (unwind)
3575 regcpblow(firstcp);
3576#endif
4633a7c4
LW
3577 return 1;
3578
3579no:
7821416a
IZ
3580 DEBUG_r(
3581 PerlIO_printf(Perl_debug_log,
3582 "%*s %sfailed...%s\n",
3583 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3584 );
3585 goto do_no;
3586no_final:
3587do_no:
02db2b7b
IZ
3588 if (unwind) {
3589 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3590
3591 switch (uw->type) {
3592 case RE_UNWIND_BRANCH:
3593 case RE_UNWIND_BRANCHJ:
3594 {
3595 re_unwind_branch_t *uwb = &(uw->branch);
3596 I32 lastparen = uwb->lastparen;
9041c2e3 3597
02db2b7b
IZ
3598 REGCP_UNWIND(uwb->lastcp);
3599 for (n = *PL_reglastparen; n > lastparen; n--)
3600 PL_regendp[n] = -1;
3601 *PL_reglastparen = n;
3602 scan = next = uwb->next;
9041c2e3
NIS
3603 if ( !scan ||
3604 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3605 ? BRANCH : BRANCHJ) ) { /* Failure */
3606 unwind = uwb->prev;
3607#ifdef DEBUGGING
3608 PL_regindent--;
3609#endif
3610 goto do_no;
3611 }
3612 /* Have more choice yet. Reuse the same uwb. */
3613 /*SUPPRESS 560*/
3614 if ((n = (uwb->type == RE_UNWIND_BRANCH
3615 ? NEXT_OFF(next) : ARG(next))))
3616 next += n;
3617 else
3618 next = NULL; /* XXXX Needn't unwinding in this case... */
3619 uwb->next = next;
3620 next = NEXTOPER(scan);
3621 if (uwb->type == RE_UNWIND_BRANCHJ)
3622 next = NEXTOPER(next);
3623 locinput = uwb->locinput;
3624 nextchr = uwb->nextchr;
3625#ifdef DEBUGGING
3626 PL_regindent = uwb->regindent;
3627#endif
3628
3629 goto reenter;
3630 }
3631 /* NOT REACHED */
3632 default:
3633 Perl_croak(aTHX_ "regexp unwind memory corruption");
3634 }
3635 /* NOT REACHED */
3636 }
4633a7c4 3637#ifdef DEBUGGING
3280af22 3638 PL_regindent--;
4633a7c4 3639#endif
a0d0e21e 3640 return 0;
a687059c
LW
3641}
3642
3643/*
3644 - regrepeat - repeatedly match something simple, report how many
3645 */
3646/*
3647 * [This routine now assumes that it will only match on things of length 1.
3648 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3649 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3650 */
76e3520e 3651STATIC I32
cea2e8a9 3652S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3653{
a0d0e21e 3654 register char *scan;
a0d0e21e 3655 register I32 c;
3280af22 3656 register char *loceol = PL_regeol;
a0ed51b3 3657 register I32 hardcount = 0;
53c4c00c 3658 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3659
3280af22 3660 scan = PL_reginput;
c277df42 3661 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3662 loceol = scan + max;
a0d0e21e 3663 switch (OP(p)) {
22c35a8c 3664 case REG_ANY:
1aa99e6b 3665 if (do_utf8) {
ffc61ed2 3666 loceol = PL_regeol;
1aa99e6b 3667 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3668 scan += UTF8SKIP(scan);
3669 hardcount++;
3670 }
3671 } else {
3672 while (scan < loceol && *scan != '\n')
3673 scan++;
a0ed51b3
LW
3674 }
3675 break;
ffc61ed2 3676 case SANY:
3baa4c62 3677 scan = loceol;
a0ed51b3 3678 break;
f33976b4
DB
3679 case CANY:
3680 scan = loceol;
3681 break;
bbce6d69 3682 case EXACT: /* length of string is 1 */
cd439c50 3683 c = (U8)*STRING(p);
bbce6d69 3684 while (scan < loceol && UCHARAT(scan) == c)
3685 scan++;
3686 break;
3687 case EXACTF: /* length of string is 1 */
cd439c50 3688 c = (U8)*STRING(p);
bbce6d69 3689 while (scan < loceol &&
22c35a8c 3690 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3691 scan++;
3692 break;
3693 case EXACTFL: /* length of string is 1 */
3280af22 3694 PL_reg_flags |= RF_tainted;
cd439c50 3695 c = (U8)*STRING(p);
bbce6d69 3696 while (scan < loceol &&
22c35a8c 3697 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3698 scan++;
3699 break;
3700 case ANYOF:
ffc61ed2
JH
3701 if (do_utf8) {
3702 loceol = PL_regeol;
1aa99e6b
IH
3703 while (hardcount < max && scan < loceol &&
3704 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3705 scan += UTF8SKIP(scan);
3706 hardcount++;
3707 }
3708 } else {
3709 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3710 scan++;
3711 }
a0d0e21e
LW
3712 break;
3713 case ALNUM:
1aa99e6b 3714 if (do_utf8) {
ffc61ed2 3715 loceol = PL_regeol;
8269fa76 3716 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3717 while (hardcount < max && scan < loceol &&
3568d838 3718 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3719 scan += UTF8SKIP(scan);
3720 hardcount++;
3721 }
3722 } else {
3723 while (scan < loceol && isALNUM(*scan))
3724 scan++;
a0ed51b3
LW
3725 }
3726 break;
bbce6d69 3727 case ALNUML:
3280af22 3728 PL_reg_flags |= RF_tainted;
1aa99e6b 3729 if (do_utf8) {
ffc61ed2 3730 loceol = PL_regeol;
1aa99e6b
IH
3731 while (hardcount < max && scan < loceol &&
3732 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3733 scan += UTF8SKIP(scan);
3734 hardcount++;
3735 }
3736 } else {
3737 while (scan < loceol && isALNUM_LC(*scan))
3738 scan++;
a0ed51b3
LW
3739 }
3740 break;
a0d0e21e 3741 case NALNUM:
1aa99e6b 3742 if (do_utf8) {
ffc61ed2 3743 loceol = PL_regeol;
8269fa76 3744 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3745 while (hardcount < max && scan < loceol &&
3568d838 3746 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3747 scan += UTF8SKIP(scan);
3748 hardcount++;
3749 }
3750 } else {
3751 while (scan < loceol && !isALNUM(*scan))
3752 scan++;
a0ed51b3
LW
3753 }
3754 break;
bbce6d69 3755 case NALNUML:
3280af22 3756 PL_reg_flags |= RF_tainted;
1aa99e6b 3757 if (do_utf8) {
ffc61ed2 3758 loceol = PL_regeol;
1aa99e6b
IH
3759 while (hardcount < max && scan < loceol &&
3760 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3761 scan += UTF8SKIP(scan);
3762 hardcount++;
3763 }
3764 } else {
3765 while (scan < loceol && !isALNUM_LC(*scan))
3766 scan++;
a0ed51b3
LW
3767 }
3768 break;
a0d0e21e 3769 case SPACE:
1aa99e6b 3770 if (do_utf8) {
ffc61ed2 3771 loceol = PL_regeol;
8269fa76 3772 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3773 while (hardcount < max && scan < loceol &&
3568d838
JH
3774 (*scan == ' ' ||
3775 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3776 scan += UTF8SKIP(scan);
3777 hardcount++;
3778 }
3779 } else {
3780 while (scan < loceol && isSPACE(*scan))
3781 scan++;
a0ed51b3
LW
3782 }
3783 break;
bbce6d69 3784 case SPACEL:
3280af22 3785 PL_reg_flags |= RF_tainted;
1aa99e6b 3786 if (do_utf8) {
ffc61ed2 3787 loceol = PL_regeol;
1aa99e6b 3788 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3789 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3790 scan += UTF8SKIP(scan);
3791 hardcount++;
3792 }
3793 } else {
3794 while (scan < loceol && isSPACE_LC(*scan))
3795 scan++;
a0ed51b3
LW
3796 }
3797 break;
a0d0e21e 3798 case NSPACE:
1aa99e6b 3799 if (do_utf8) {
ffc61ed2 3800 loceol = PL_regeol;
8269fa76 3801 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3802 while (hardcount < max && scan < loceol &&
3568d838
JH
3803 !(*scan == ' ' ||
3804 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3805 scan += UTF8SKIP(scan);
3806 hardcount++;
3807 }
3808 } else {
3809 while (scan < loceol && !isSPACE(*scan))
3810 scan++;
3811 break;
a0ed51b3 3812 }
bbce6d69 3813 case NSPACEL:
3280af22 3814 PL_reg_flags |= RF_tainted;
1aa99e6b 3815 if (do_utf8) {
ffc61ed2 3816 loceol = PL_regeol;
1aa99e6b 3817 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3818 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3819 scan += UTF8SKIP(scan);
3820 hardcount++;
3821 }
3822 } else {
3823 while (scan < loceol && !isSPACE_LC(*scan))
3824 scan++;
a0ed51b3
LW
3825 }
3826 break;
a0d0e21e 3827 case DIGIT:
1aa99e6b 3828 if (do_utf8) {
ffc61ed2 3829 loceol = PL_regeol;
8269fa76 3830 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3831 while (hardcount < max && scan < loceol &&
3568d838 3832 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3833 scan += UTF8SKIP(scan);
3834 hardcount++;
3835 }
3836 } else {
3837 while (scan < loceol && isDIGIT(*scan))
3838 scan++;
a0ed51b3
LW
3839 }
3840 break;
a0d0e21e 3841 case NDIGIT:
1aa99e6b 3842 if (do_utf8) {
ffc61ed2 3843 loceol = PL_regeol;
8269fa76 3844 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3845 while (hardcount < max && scan < loceol &&
3568d838 3846 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3847 scan += UTF8SKIP(scan);
3848 hardcount++;
3849 }
3850 } else {
3851 while (scan < loceol && !isDIGIT(*scan))
3852 scan++;
a0ed51b3
LW
3853 }
3854 break;
a0d0e21e
LW
3855 default: /* Called on something of 0 width. */
3856 break; /* So match right here or not at all. */
3857 }
a687059c 3858
a0ed51b3
LW
3859 if (hardcount)
3860 c = hardcount;
3861 else
3862 c = scan - PL_reginput;
3280af22 3863 PL_reginput = scan;
a687059c 3864
9041c2e3 3865 DEBUG_r(
c277df42
IZ
3866 {
3867 SV *prop = sv_newmortal();
3868
3869 regprop(prop, p);
9041c2e3
NIS
3870 PerlIO_printf(Perl_debug_log,
3871 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3872 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3873 });
9041c2e3 3874
a0d0e21e 3875 return(c);
a687059c
LW
3876}
3877
3878/*
c277df42 3879 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3880 *
c277df42
IZ
3881 * The repeater is supposed to have constant length.
3882 */
3883
76e3520e 3884STATIC I32
cea2e8a9 3885S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3886{
b7953727 3887 register char *scan = Nullch;
c277df42 3888 register char *start;
3280af22 3889 register char *loceol = PL_regeol;
a0ed51b3 3890 I32 l = 0;
708e3b05 3891 I32 count = 0, res = 1;
a0ed51b3
LW
3892
3893 if (!max)
3894 return 0;
c277df42 3895
3280af22 3896 start = PL_reginput;
53c4c00c 3897 if (PL_reg_match_utf8) {
708e3b05 3898 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3899 if (!count++) {
3900 l = 0;
3901 while (start < PL_reginput) {
3902 l++;
3903 start += UTF8SKIP(start);
3904 }
3905 *lp = l;
3906 if (l == 0)
3907 return max;
3908 }
3909 if (count == max)
3910 return count;
3911 }
3912 }
3913 else {
708e3b05 3914 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3915 if (!count++) {
3916 *lp = l = PL_reginput - start;
3917 if (max != REG_INFTY && l*max < loceol - scan)
3918 loceol = scan + l*max;
3919 if (l == 0)
3920 return max;
c277df42
IZ
3921 }
3922 }
3923 }
708e3b05 3924 if (!res)
3280af22 3925 PL_reginput = scan;
9041c2e3 3926
a0ed51b3 3927 return count;
c277df42
IZ
3928}
3929
3930/*
ffc61ed2
JH
3931- regclass_swash - prepare the utf8 swash
3932*/
3933
3934SV *
3935Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3936{
3937 SV *sw = NULL;
3938 SV *si = NULL;
3939
3940 if (PL_regdata && PL_regdata->count) {
3941 U32 n = ARG(node);
3942
3943 if (PL_regdata->what[n] == 's') {
3944 SV *rv = (SV*)PL_regdata->data[n];
3945 AV *av = (AV*)SvRV((SV*)rv);
3946 SV **a;
9041c2e3 3947
ffc61ed2
JH
3948 si = *av_fetch(av, 0, FALSE);
3949 a = av_fetch(av, 1, FALSE);
9041c2e3 3950
ffc61ed2
JH
3951 if (a)
3952 sw = *a;
3953 else if (si && doinit) {
3954 sw = swash_init("utf8", "", si, 1, 0);
3955 (void)av_store(av, 1, sw);
3956 }
3957 }
3958 }
3959
3960 if (initsvp)
3961 *initsvp = si;
3962
3963 return sw;
3964}
3965
3966/*
cb8d8820 3967 - reginclass - determine if a character falls into a character class
bbce6d69 3968 */
3969
76e3520e 3970STATIC bool
ffc61ed2 3971S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3972{
ffc61ed2 3973 char flags = ANYOF_FLAGS(n);
bbce6d69 3974 bool match = FALSE;
1aa99e6b 3975 UV c;
3568d838 3976 STRLEN len = 0;
1aa99e6b 3977
3568d838 3978 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3979
ffc61ed2
JH
3980 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3981 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3982 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3983 match = TRUE;
bbce6d69 3984 }
3568d838 3985 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3986 match = TRUE;
ffc61ed2
JH
3987 if (!match) {
3988 SV *sw = regclass_swash(n, TRUE, 0);
3989
3990 if (sw) {
3568d838 3991 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3992 match = TRUE;
3993 else if (flags & ANYOF_FOLD) {
a2a2844f
JH
3994 STRLEN ulen;
3995 U8 tmpbuf[UTF8_MAXLEN*2+1];
3996
3997 toLOWER_utf8(p, tmpbuf, &ulen);
3568d838 3998 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3999 match = TRUE;
4000 }
4001 }
bbce6d69 4002 }
4003 }
1aa99e6b 4004 if (!match && c < 256) {
ffc61ed2
JH
4005 if (ANYOF_BITMAP_TEST(n, c))
4006 match = TRUE;
4007 else if (flags & ANYOF_FOLD) {
3568d838 4008 I32 f;
a0ed51b3 4009
ffc61ed2
JH
4010 if (flags & ANYOF_LOCALE) {
4011 PL_reg_flags |= RF_tainted;
4012 f = PL_fold_locale[c];
4013 }
4014 else
4015 f = PL_fold[c];
4016 if (f != c && ANYOF_BITMAP_TEST(n, f))
4017 match = TRUE;
4018 }
4019
4020 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4021 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4022 if (
4023 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4024 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4025 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4026 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4027 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4028 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4029 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4030 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4031 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4032 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4033 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4034 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4035 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4036 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4037 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4038 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4039 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4040 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4041 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4042 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4043 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4044 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4045 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4046 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4047 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4048 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4049 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4050 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4051 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4052 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4053 ) /* How's that for a conditional? */
4054 {
4055 match = TRUE;
4056 }
a0ed51b3 4057 }
a0ed51b3
LW
4058 }
4059
a0ed51b3
LW
4060 return (flags & ANYOF_INVERT) ? !match : match;
4061}
161b471a 4062
dfe13c55 4063STATIC U8 *
cea2e8a9 4064S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4065{
1aa99e6b
IH
4066 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4067}
4068
4069STATIC U8 *
4070S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4071{
a0ed51b3 4072 if (off >= 0) {
1aa99e6b 4073 while (off-- && s < lim) {
ffc61ed2 4074 /* XXX could check well-formedness here */
a0ed51b3 4075 s += UTF8SKIP(s);
ffc61ed2 4076 }
a0ed51b3
LW
4077 }
4078 else {
4079 while (off++) {
1aa99e6b 4080 if (s > lim) {
a0ed51b3 4081 s--;
ffc61ed2 4082 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4083 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4084 s--;
ffc61ed2
JH
4085 }
4086 /* XXX could check well-formedness here */
a0ed51b3
LW
4087 }
4088 }
4089 }
4090 return s;
4091}
161b471a 4092
dfe13c55 4093STATIC U8 *
1aa99e6b 4094S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4095{
1aa99e6b
IH
4096 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4097}
4098
4099STATIC U8 *
4100S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4101{
4102 if (off >= 0) {
1aa99e6b 4103 while (off-- && s < lim) {
ffc61ed2 4104 /* XXX could check well-formedness here */
a0ed51b3 4105 s += UTF8SKIP(s);
ffc61ed2 4106 }
a0ed51b3
LW
4107 if (off >= 0)
4108 return 0;
4109 }
4110 else {
4111 while (off++) {
1aa99e6b 4112 if (s > lim) {
a0ed51b3 4113 s--;
ffc61ed2 4114 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4115 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4116 s--;
ffc61ed2
JH
4117 }
4118 /* XXX could check well-formedness here */
a0ed51b3
LW
4119 }
4120 else
4121 break;
4122 }
4123 if (off <= 0)
4124 return 0;
4125 }
4126 return s;
4127}
51371543 4128
51371543 4129static void
acfe0abc 4130restore_pos(pTHX_ void *arg)
51371543 4131{
51371543
GS
4132 if (PL_reg_eval_set) {
4133 if (PL_reg_oldsaved) {
4134 PL_reg_re->subbeg = PL_reg_oldsaved;
4135 PL_reg_re->sublen = PL_reg_oldsavedlen;
4136 RX_MATCH_COPIED_on(PL_reg_re);
4137 }
4138 PL_reg_magic->mg_len = PL_reg_oldpos;
4139 PL_reg_eval_set = 0;
4140 PL_curpm = PL_reg_oldcurpm;
4141 }
4142}