This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH mg.c gv.c and others] ${^TAINT}
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
e2d8ce26
JP
132#define JUMPABLE(rn) ( \
133 OP(rn) == OPEN || OP(rn) == CLOSE || OP(rn) == EVAL || \
134 OP(rn) == SUSPEND || OP(rn) == IFMATCH \
135)
136
137#define NEAR_EXACT(rn) (PL_regkind[(U8)OP(rn)] == EXACT || JUMPABLE(rn))
138
139#define NEXT_IMPT(rn) STMT_START { \
140 while (JUMPABLE(rn)) \
141 if (OP(rn) == SUSPEND || OP(rn) == IFMATCH) \
142 rn = NEXTOPER(NEXTOPER(rn)); \
143 else rn += NEXT_OFF(rn); \
5f80c4cf 144} STMT_END
74750237 145
acfe0abc 146static void restore_pos(pTHX_ void *arg);
51371543 147
76e3520e 148STATIC CHECKPOINT
cea2e8a9 149S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 150{
3280af22 151 int retval = PL_savestack_ix;
b1ce53c5
JH
152#define REGCP_PAREN_ELEMS 4
153 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
154 int p;
155
e49a9654
IH
156 if (paren_elems_to_push < 0)
157 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
158
a01268b5 159#define REGCP_OTHER_ELEMS 6
b1ce53c5 160 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 161 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 162/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
163 SSPUSHINT(PL_regendp[p]);
164 SSPUSHINT(PL_regstartp[p]);
3280af22 165 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
166 SSPUSHINT(p);
167 }
b1ce53c5 168/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
169 SSPUSHINT(PL_regsize);
170 SSPUSHINT(*PL_reglastparen);
a01268b5 171 SSPUSHINT(*PL_reglastcloseparen);
3280af22 172 SSPUSHPTR(PL_reginput);
41123dfd
JH
173#define REGCP_FRAME_ELEMS 2
174/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
175 * are needed for the regexp context stack bookkeeping. */
176 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 177 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 178
a0d0e21e
LW
179 return retval;
180}
181
c277df42 182/* These are needed since we do not localize EVAL nodes: */
02db2b7b 183# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 184 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 185 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 186
02db2b7b 187# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 188 PerlIO_printf(Perl_debug_log, \
faccc32b 189 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 190 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 191
76e3520e 192STATIC char *
cea2e8a9 193S_regcppop(pTHX)
a0d0e21e 194{
b1ce53c5 195 I32 i;
a0d0e21e
LW
196 U32 paren = 0;
197 char *input;
cf93c79d 198 I32 tmps;
b1ce53c5
JH
199
200 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 201 i = SSPOPINT;
b1ce53c5
JH
202 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
203 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 204 input = (char *) SSPOPPTR;
a01268b5 205 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
206 *PL_reglastparen = SSPOPINT;
207 PL_regsize = SSPOPINT;
b1ce53c5
JH
208
209 /* Now restore the parentheses context. */
41123dfd
JH
210 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
211 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 212 paren = (U32)SSPOPINT;
3280af22 213 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
214 PL_regstartp[paren] = SSPOPINT;
215 tmps = SSPOPINT;
3280af22
NIS
216 if (paren <= *PL_reglastparen)
217 PL_regendp[paren] = tmps;
c277df42 218 DEBUG_r(
c3464db5 219 PerlIO_printf(Perl_debug_log,
b900a521 220 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 221 (UV)paren, (IV)PL_regstartp[paren],
b900a521 222 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 223 (IV)PL_regendp[paren],
3280af22 224 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 225 );
a0d0e21e 226 }
c277df42 227 DEBUG_r(
3280af22 228 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 229 PerlIO_printf(Perl_debug_log,
faccc32b
JH
230 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
231 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
232 }
233 );
daf18116 234#if 1
dafc8851
JH
235 /* It would seem that the similar code in regtry()
236 * already takes care of this, and in fact it is in
237 * a better location to since this code can #if 0-ed out
238 * but the code in regtry() is needed or otherwise tests
239 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
240 * (as of patchlevel 7877) will fail. Then again,
241 * this code seems to be necessary or otherwise
242 * building DynaLoader will fail:
243 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
244 * --jhi */
3280af22
NIS
245 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
246 if (paren > PL_regsize)
cf93c79d
IZ
247 PL_regstartp[paren] = -1;
248 PL_regendp[paren] = -1;
a0d0e21e 249 }
dafc8851 250#endif
a0d0e21e
LW
251 return input;
252}
253
0f5d15d6 254STATIC char *
cea2e8a9 255S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
256{
257 I32 tmp = PL_savestack_ix;
258
259 PL_savestack_ix = ss;
260 regcppop();
261 PL_savestack_ix = tmp;
942e002e 262 return Nullch;
0f5d15d6
IZ
263}
264
265typedef struct re_cc_state
266{
267 I32 ss;
268 regnode *node;
269 struct re_cc_state *prev;
270 CURCUR *cc;
271 regexp *re;
272} re_cc_state;
273
02db2b7b 274#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 275
29d1e993
HS
276#define TRYPAREN(paren, n, input) { \
277 if (paren) { \
278 if (n) { \
279 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
280 PL_regendp[paren] = input - PL_bostr; \
281 } \
282 else \
283 PL_regendp[paren] = -1; \
284 } \
285 if (regmatch(next)) \
286 sayYES; \
287 if (paren && n) \
288 PL_regendp[paren] = -1; \
289}
290
291
a687059c 292/*
e50aee73 293 * pregexec and friends
a687059c
LW
294 */
295
296/*
c277df42 297 - pregexec - match a regexp against a string
a687059c 298 */
c277df42 299I32
864dbfa3 300Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 301 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
302/* strend: pointer to null at end of string */
303/* strbeg: real beginning of string */
304/* minend: end of match must be >=minend after stringarg. */
305/* nosave: For optimizations. */
306{
307 return
9041c2e3 308 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
309 nosave ? 0 : REXEC_COPY_STR);
310}
0f5d15d6
IZ
311
312STATIC void
cea2e8a9 313S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
314{
315 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
316#ifdef DEBUGGING
317 PL_regprogram = prog->program;
318#endif
319 PL_regnpar = prog->nparens;
9041c2e3
NIS
320 PL_regdata = prog->data;
321 PL_reg_re = prog;
0f5d15d6 322}
22e551b9 323
9041c2e3 324/*
cad2e5aa
JH
325 * Need to implement the following flags for reg_anch:
326 *
327 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
328 * USE_INTUIT_ML
329 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
330 * INTUIT_AUTORITATIVE_ML
331 * INTUIT_ONCE_NOML - Intuit can match in one location only.
332 * INTUIT_ONCE_ML
333 *
334 * Another flag for this function: SECOND_TIME (so that float substrs
335 * with giant delta may be not rechecked).
336 */
337
338/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
339
2c2d71f5 340/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
341 Otherwise, only SvCUR(sv) is used to get strbeg. */
342
343/* XXXX We assume that strpos is strbeg unless sv. */
344
6eb5f6b9
JH
345/* XXXX Some places assume that there is a fixed substring.
346 An update may be needed if optimizer marks as "INTUITable"
347 RExen without fixed substrings. Similarly, it is assumed that
348 lengths of all the strings are no more than minlen, thus they
349 cannot come from lookahead.
350 (Or minlen should take into account lookahead.) */
351
2c2d71f5
JH
352/* A failure to find a constant substring means that there is no need to make
353 an expensive call to REx engine, thus we celebrate a failure. Similarly,
354 finding a substring too deep into the string means that less calls to
30944b6d
IZ
355 regtry() should be needed.
356
357 REx compiler's optimizer found 4 possible hints:
358 a) Anchored substring;
359 b) Fixed substring;
360 c) Whether we are anchored (beginning-of-line or \G);
361 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 362 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
363 string which does not contradict any of them.
364 */
2c2d71f5 365
6eb5f6b9
JH
366/* Most of decisions we do here should have been done at compile time.
367 The nodes of the REx which we used for the search should have been
368 deleted from the finite automaton. */
369
cad2e5aa
JH
370char *
371Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
372 char *strend, U32 flags, re_scream_pos_data *data)
373{
b7953727 374 register I32 start_shift = 0;
cad2e5aa 375 /* Should be nonnegative! */
b7953727 376 register I32 end_shift = 0;
2c2d71f5
JH
377 register char *s;
378 register SV *check;
a1933d95 379 char *strbeg;
cad2e5aa
JH
380 char *t;
381 I32 ml_anch;
6eb5f6b9 382 register char *other_last = Nullch; /* other substr checked before this */
b7953727 383 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
384#ifdef DEBUGGING
385 char *i_strpos = strpos;
386#endif
cad2e5aa
JH
387
388 DEBUG_r( if (!PL_colorset) reginitcolors() );
389 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 390 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
391 PL_colors[4],PL_colors[5],PL_colors[0],
392 prog->precomp,
393 PL_colors[1],
394 (strlen(prog->precomp) > 60 ? "..." : ""),
395 PL_colors[0],
b900a521 396 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
397 strpos, PL_colors[1],
398 (strend - strpos > 60 ? "..." : ""))
399 );
400
1aa99e6b
IH
401 if (prog->reganch & ROPT_UTF8)
402 PL_reg_flags |= RF_utf8;
403
404 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 405 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 406 goto fail;
2c2d71f5 407 }
a1933d95 408 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 409 PL_regeol = strend;
653099ff 410 check = prog->check_substr;
2c2d71f5 411 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
412 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
413 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 414 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 415
7e25d62c
JH
416 if (!ml_anch) {
417 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
418 | ROPT_IMPLICIT)) /* not a real BOL */
419 /* SvCUR is not set on references: SvRV and SvPVX overlap */
420 && sv && !SvROK(sv)
421 && (strpos != strbeg)) {
422 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
423 goto fail;
424 }
425 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 426 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 427 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
428 I32 slen;
429
1aa99e6b 430 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
431 if (SvTAIL(check)) {
432 slen = SvCUR(check); /* >= 1 */
cad2e5aa 433
9041c2e3 434 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
435 || (strend - s == slen && strend[-1] != '\n')) {
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
437 goto fail_finish;
cad2e5aa
JH
438 }
439 /* Now should match s[0..slen-2] */
440 slen--;
653099ff 441 if (slen && (*SvPVX(check) != *s
cad2e5aa 442 || (slen > 1
653099ff 443 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
444 report_neq:
445 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
446 goto fail_finish;
447 }
cad2e5aa 448 }
653099ff
GS
449 else if (*SvPVX(check) != *s
450 || ((slen = SvCUR(check)) > 1
451 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
452 goto report_neq;
453 goto success_at_start;
7e25d62c 454 }
cad2e5aa 455 }
2c2d71f5 456 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 457 s = strpos;
2c2d71f5 458 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 459 end_shift = prog->minlen - start_shift -
653099ff 460 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 461 if (!ml_anch) {
653099ff
GS
462 I32 end = prog->check_offset_max + CHR_SVLEN(check)
463 - (SvTAIL(check) != 0);
1aa99e6b 464 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
465
466 if (end_shift < eshift)
467 end_shift = eshift;
468 }
cad2e5aa 469 }
2c2d71f5 470 else { /* Can match at random position */
cad2e5aa
JH
471 ml_anch = 0;
472 s = strpos;
2c2d71f5
JH
473 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
474 /* Should be nonnegative! */
475 end_shift = prog->minlen - start_shift -
653099ff 476 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
477 }
478
2c2d71f5 479#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 480 if (end_shift < 0)
6bbae5e6 481 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
482#endif
483
2c2d71f5
JH
484 restart:
485 /* Find a possible match in the region s..strend by looking for
486 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 487 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
488 I32 p = -1; /* Internal iterator of scream. */
489 I32 *pp = data ? data->scream_pos : &p;
490
2c2d71f5
JH
491 if (PL_screamfirst[BmRARE(check)] >= 0
492 || ( BmRARE(check) == '\n'
493 && (BmPREVIOUS(check) == SvCUR(check) - 1)
494 && SvTAIL(check) ))
9041c2e3 495 s = screaminstr(sv, check,
2c2d71f5 496 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 497 else
2c2d71f5 498 goto fail_finish;
cad2e5aa
JH
499 if (data)
500 *data->scream_olds = s;
501 }
f33976b4 502 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
503 s = fbm_instr((U8*)(s + start_shift),
504 (U8*)(strend - end_shift),
505 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 506 else
1aa99e6b
IH
507 s = fbm_instr(HOP3(s, start_shift, strend),
508 HOP3(strend, -end_shift, strbeg),
2c2d71f5 509 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
510
511 /* Update the count-of-usability, remove useless subpatterns,
512 unshift s. */
2c2d71f5
JH
513
514 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
515 (s ? "Found" : "Did not find"),
516 ((check == prog->anchored_substr) ? "anchored" : "floating"),
517 PL_colors[0],
7b0972df
JH
518 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
519 SvPVX(check),
2c2d71f5
JH
520 PL_colors[1], (SvTAIL(check) ? "$" : ""),
521 (s ? " at offset " : "...\n") ) );
522
523 if (!s)
524 goto fail_finish;
525
6eb5f6b9
JH
526 check_at = s;
527
2c2d71f5 528 /* Finish the diagnostic message */
30944b6d 529 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
530
531 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
532 Start with the other substr.
533 XXXX no SCREAM optimization yet - and a very coarse implementation
534 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
535 *always* match. Probably should be marked during compile...
536 Probably it is right to do no SCREAM here...
537 */
538
539 if (prog->float_substr && prog->anchored_substr) {
30944b6d 540 /* Take into account the "other" substring. */
2c2d71f5
JH
541 /* XXXX May be hopelessly wrong for UTF... */
542 if (!other_last)
6eb5f6b9 543 other_last = strpos;
2c2d71f5 544 if (check == prog->float_substr) {
30944b6d
IZ
545 do_other_anchored:
546 {
1aa99e6b 547 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
548 char *s1 = s;
549
2c2d71f5
JH
550 t = s - prog->check_offset_max;
551 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
552 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 553 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 554 && t > strpos)))
30944b6d 555 /* EMPTY */;
2c2d71f5
JH
556 else
557 t = strpos;
1aa99e6b 558 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
559 if (t < other_last) /* These positions already checked */
560 t = other_last;
1aa99e6b 561 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
562 if (last < last1)
563 last1 = last;
564 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
565 /* On end-of-str: see comment below. */
566 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
567 HOP3(HOP3(last1, prog->anchored_offset, strend)
568 + SvCUR(prog->anchored_substr),
569 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
570 prog->anchored_substr,
571 PL_multiline ? FBMrf_MULTILINE : 0);
572 DEBUG_r(PerlIO_printf(Perl_debug_log,
573 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
574 (s ? "Found" : "Contradicts"),
575 PL_colors[0],
7b0972df
JH
576 (int)(SvCUR(prog->anchored_substr)
577 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
578 SvPVX(prog->anchored_substr),
579 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
580 if (!s) {
581 if (last1 >= last2) {
582 DEBUG_r(PerlIO_printf(Perl_debug_log,
583 ", giving up...\n"));
584 goto fail_finish;
585 }
586 DEBUG_r(PerlIO_printf(Perl_debug_log,
587 ", trying floating at offset %ld...\n",
1aa99e6b
IH
588 (long)(HOP3c(s1, 1, strend) - i_strpos)));
589 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
590 s = HOP3c(last, 1, strend);
2c2d71f5
JH
591 goto restart;
592 }
593 else {
594 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 595 (long)(s - i_strpos)));
1aa99e6b
IH
596 t = HOP3c(s, -prog->anchored_offset, strbeg);
597 other_last = HOP3c(s, 1, strend);
30944b6d 598 s = s1;
2c2d71f5
JH
599 if (t == strpos)
600 goto try_at_start;
2c2d71f5
JH
601 goto try_at_offset;
602 }
30944b6d 603 }
2c2d71f5
JH
604 }
605 else { /* Take into account the floating substring. */
606 char *last, *last1;
607 char *s1 = s;
608
1aa99e6b
IH
609 t = HOP3c(s, -start_shift, strbeg);
610 last1 = last =
611 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
612 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
613 last = HOP3c(t, prog->float_max_offset, strend);
614 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
615 if (s < other_last)
616 s = other_last;
2c2d71f5
JH
617 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
618 /* fbm_instr() takes into account exact value of end-of-str
619 if the check is SvTAIL(ed). Since false positives are OK,
620 and end-of-str is not later than strend we are OK. */
621 s = fbm_instr((unsigned char*)s,
622 (unsigned char*)last + SvCUR(prog->float_substr)
623 - (SvTAIL(prog->float_substr)!=0),
624 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
625 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
626 (s ? "Found" : "Contradicts"),
627 PL_colors[0],
7b0972df
JH
628 (int)(SvCUR(prog->float_substr)
629 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
630 SvPVX(prog->float_substr),
631 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
632 if (!s) {
633 if (last1 == last) {
634 DEBUG_r(PerlIO_printf(Perl_debug_log,
635 ", giving up...\n"));
636 goto fail_finish;
637 }
638 DEBUG_r(PerlIO_printf(Perl_debug_log,
639 ", trying anchored starting at offset %ld...\n",
30944b6d 640 (long)(s1 + 1 - i_strpos)));
803ff556 641 other_last = last;
1aa99e6b 642 s = HOP3c(t, 1, strend);
2c2d71f5
JH
643 goto restart;
644 }
645 else {
646 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 647 (long)(s - i_strpos)));
803ff556 648 other_last = s; /* Fix this later. --Hugo */
30944b6d 649 s = s1;
2c2d71f5
JH
650 if (t == strpos)
651 goto try_at_start;
2c2d71f5
JH
652 goto try_at_offset;
653 }
654 }
cad2e5aa 655 }
2c2d71f5
JH
656
657 t = s - prog->check_offset_max;
2c2d71f5
JH
658 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
659 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
660 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
661 && t > strpos))) {
2c2d71f5
JH
662 /* Fixed substring is found far enough so that the match
663 cannot start at strpos. */
664 try_at_offset:
cad2e5aa 665 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
666 /* Eventually fbm_*() should handle this, but often
667 anchored_offset is not 0, so this check will not be wasted. */
668 /* XXXX In the code below we prefer to look for "^" even in
669 presence of anchored substrings. And we search even
670 beyond the found float position. These pessimizations
671 are historical artefacts only. */
672 find_anchor:
2c2d71f5 673 while (t < strend - prog->minlen) {
cad2e5aa 674 if (*t == '\n') {
4ee3650e 675 if (t < check_at - prog->check_offset_min) {
30944b6d 676 if (prog->anchored_substr) {
4ee3650e
GS
677 /* Since we moved from the found position,
678 we definitely contradict the found anchored
30944b6d
IZ
679 substr. Due to the above check we do not
680 contradict "check" substr.
681 Thus we can arrive here only if check substr
682 is float. Redo checking for "other"=="fixed".
683 */
9041c2e3 684 strpos = t + 1;
30944b6d
IZ
685 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
686 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
687 goto do_other_anchored;
688 }
4ee3650e
GS
689 /* We don't contradict the found floating substring. */
690 /* XXXX Why not check for STCLASS? */
cad2e5aa 691 s = t + 1;
2c2d71f5 692 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 693 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
694 goto set_useful;
695 }
4ee3650e
GS
696 /* Position contradicts check-string */
697 /* XXXX probably better to look for check-string
698 than for "\n", so one should lower the limit for t? */
699 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 700 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 701 other_last = strpos = s = t + 1;
cad2e5aa
JH
702 goto restart;
703 }
704 t++;
705 }
2c2d71f5
JH
706 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
707 PL_colors[0],PL_colors[1]));
708 goto fail_finish;
cad2e5aa 709 }
f5952150
GS
710 else {
711 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
712 PL_colors[0],PL_colors[1]));
713 }
cad2e5aa
JH
714 s = t;
715 set_useful:
2c2d71f5 716 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
717 }
718 else {
f5952150 719 /* The found string does not prohibit matching at strpos,
2c2d71f5 720 - no optimization of calling REx engine can be performed,
f5952150
GS
721 unless it was an MBOL and we are not after MBOL,
722 or a future STCLASS check will fail this. */
2c2d71f5
JH
723 try_at_start:
724 /* Even in this situation we may use MBOL flag if strpos is offset
725 wrt the start of the string. */
05b4157f 726 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 727 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
728 /* May be due to an implicit anchor of m{.*foo} */
729 && !(prog->reganch & ROPT_IMPLICIT))
730 {
cad2e5aa
JH
731 t = strpos;
732 goto find_anchor;
733 }
30944b6d 734 DEBUG_r( if (ml_anch)
f5952150
GS
735 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
736 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 737 );
2c2d71f5 738 success_at_start:
30944b6d 739 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 740 && prog->check_substr /* Could be deleted already */
cad2e5aa 741 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
742 && prog->check_substr == prog->float_substr)
743 {
cad2e5aa 744 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 745 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
746 SvREFCNT_dec(prog->check_substr);
747 prog->check_substr = Nullsv; /* disable */
748 prog->float_substr = Nullsv; /* clear */
5e39e1e5 749 check = Nullsv; /* abort */
cad2e5aa 750 s = strpos;
3cf5c195
IZ
751 /* XXXX This is a remnant of the old implementation. It
752 looks wasteful, since now INTUIT can use many
6eb5f6b9 753 other heuristics. */
cad2e5aa
JH
754 prog->reganch &= ~RE_USE_INTUIT;
755 }
756 else
757 s = strpos;
758 }
759
6eb5f6b9
JH
760 /* Last resort... */
761 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
762 if (prog->regstclass) {
763 /* minlen == 0 is possible if regstclass is \b or \B,
764 and the fixed substr is ''$.
765 Since minlen is already taken into account, s+1 is before strend;
766 accidentally, minlen >= 1 guaranties no false positives at s + 1
767 even for \b or \B. But (minlen? 1 : 0) below assumes that
768 regstclass does not come from lookahead... */
769 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
770 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 771 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 772 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 773 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 774 : 1);
6eb5f6b9 775 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
776 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
777 : (prog->float_substr
778 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
779 cl_l, strend)
780 : strend);
a1933d95 781 char *startpos = strbeg;
6eb5f6b9
JH
782
783 t = s;
76384e4a 784 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 785 PL_regdata = prog->data;
76384e4a
GS
786 PL_bostr = startpos;
787 }
f33976b4 788 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
789 if (!s) {
790#ifdef DEBUGGING
b7953727 791 char *what = 0;
6eb5f6b9
JH
792#endif
793 if (endpos == strend) {
794 DEBUG_r( PerlIO_printf(Perl_debug_log,
795 "Could not match STCLASS...\n") );
796 goto fail;
797 }
66e933ab
GS
798 DEBUG_r( PerlIO_printf(Perl_debug_log,
799 "This position contradicts STCLASS...\n") );
653099ff
GS
800 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
801 goto fail;
6eb5f6b9
JH
802 /* Contradict one of substrings */
803 if (prog->anchored_substr) {
6eb5f6b9
JH
804 if (prog->anchored_substr == check) {
805 DEBUG_r( what = "anchored" );
806 hop_and_restart:
1aa99e6b 807 s = HOP3c(t, 1, strend);
66e933ab
GS
808 if (s + start_shift + end_shift > strend) {
809 /* XXXX Should be taken into account earlier? */
810 DEBUG_r( PerlIO_printf(Perl_debug_log,
811 "Could not match STCLASS...\n") );
812 goto fail;
813 }
5e39e1e5
HS
814 if (!check)
815 goto giveup;
6eb5f6b9 816 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 817 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
818 what, (long)(s + start_shift - i_strpos)) );
819 goto restart;
820 }
66e933ab 821 /* Have both, check_string is floating */
6eb5f6b9
JH
822 if (t + start_shift >= check_at) /* Contradicts floating=check */
823 goto retry_floating_check;
824 /* Recheck anchored substring, but not floating... */
9041c2e3 825 s = check_at;
5e39e1e5
HS
826 if (!check)
827 goto giveup;
6eb5f6b9 828 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 829 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
830 (long)(other_last - i_strpos)) );
831 goto do_other_anchored;
832 }
60e71179
GS
833 /* Another way we could have checked stclass at the
834 current position only: */
835 if (ml_anch) {
836 s = t = t + 1;
5e39e1e5
HS
837 if (!check)
838 goto giveup;
60e71179 839 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
840 "Looking for /%s^%s/m starting at offset %ld...\n",
841 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 842 goto try_at_offset;
66e933ab 843 }
60e71179
GS
844 if (!prog->float_substr) /* Could have been deleted */
845 goto fail;
6eb5f6b9
JH
846 /* Check is floating subtring. */
847 retry_floating_check:
848 t = check_at - start_shift;
849 DEBUG_r( what = "floating" );
850 goto hop_and_restart;
851 }
b7953727
JH
852 if (t != s) {
853 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 854 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
855 (long)(t - i_strpos), (long)(s - i_strpos))
856 );
857 }
858 else {
859 DEBUG_r(PerlIO_printf(Perl_debug_log,
860 "Does not contradict STCLASS...\n");
861 );
862 }
6eb5f6b9 863 }
5e39e1e5
HS
864 giveup:
865 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
866 PL_colors[4], (check ? "Guessed" : "Giving up"),
867 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 868 return s;
2c2d71f5
JH
869
870 fail_finish: /* Substring not found */
66e933ab
GS
871 if (prog->check_substr) /* could be removed already */
872 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 873 fail:
2c2d71f5 874 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
875 PL_colors[4],PL_colors[5]));
876 return Nullch;
877}
9661b544 878
6eb5f6b9 879/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
880STATIC char *
881S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 882{
6eb5f6b9
JH
883 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
884 char *m;
d8093b23
G
885 STRLEN ln;
886 unsigned int c1;
887 unsigned int c2;
6eb5f6b9
JH
888 char *e;
889 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 890 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 891
6eb5f6b9
JH
892 /* We know what class it must start with. */
893 switch (OP(c)) {
6eb5f6b9 894 case ANYOF:
a0ed51b3 895 while (s < strend) {
ffc61ed2 896 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
897 if (tmp && (norun || regtry(prog, s)))
898 goto got_it;
899 else
900 tmp = doevery;
a0ed51b3 901 }
6eb5f6b9
JH
902 else
903 tmp = 1;
ffc61ed2 904 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 905 }
6eb5f6b9 906 break;
f33976b4
DB
907 case CANY:
908 while (s < strend) {
909 if (tmp && (norun || regtry(prog, s)))
910 goto got_it;
911 else
912 tmp = doevery;
913 s++;
914 }
915 break;
6eb5f6b9
JH
916 case EXACTF:
917 m = STRING(c);
918 ln = STR_LEN(c);
1aa99e6b
IH
919 if (UTF) {
920 c1 = to_utf8_lower((U8*)m);
921 c2 = to_utf8_upper((U8*)m);
922 }
923 else {
924 c1 = *(U8*)m;
925 c2 = PL_fold[c1];
926 }
6eb5f6b9
JH
927 goto do_exactf;
928 case EXACTFL:
929 m = STRING(c);
930 ln = STR_LEN(c);
d8093b23 931 c1 = *(U8*)m;
6eb5f6b9
JH
932 c2 = PL_fold_locale[c1];
933 do_exactf:
934 e = strend - ln;
b3c9acc1 935
6eb5f6b9
JH
936 if (norun && e < s)
937 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
938
939 if (do_utf8) {
940 STRLEN len;
941 if (c1 == c2)
942 while (s <= e) {
9041c2e3 943 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
944 && regtry(prog, s) )
945 goto got_it;
946 s += len;
947 }
948 else
949 while (s <= e) {
9041c2e3 950 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
951 if ( (c == c1 || c == c2) && regtry(prog, s) )
952 goto got_it;
953 s += len;
954 }
955 }
956 else {
957 if (c1 == c2)
958 while (s <= e) {
959 if ( *(U8*)s == c1
960 && (ln == 1 || !(OP(c) == EXACTF
961 ? ibcmp(s, m, ln)
962 : ibcmp_locale(s, m, ln)))
963 && (norun || regtry(prog, s)) )
964 goto got_it;
965 s++;
966 }
967 else
968 while (s <= e) {
969 if ( (*(U8*)s == c1 || *(U8*)s == c2)
970 && (ln == 1 || !(OP(c) == EXACTF
971 ? ibcmp(s, m, ln)
972 : ibcmp_locale(s, m, ln)))
973 && (norun || regtry(prog, s)) )
974 goto got_it;
975 s++;
976 }
b3c9acc1
IZ
977 }
978 break;
bbce6d69 979 case BOUNDL:
3280af22 980 PL_reg_flags |= RF_tainted;
bbce6d69 981 /* FALL THROUGH */
a0d0e21e 982 case BOUND:
ffc61ed2 983 if (do_utf8) {
12d33761 984 if (s == PL_bostr)
ffc61ed2
JH
985 tmp = '\n';
986 else {
1aa99e6b 987 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 988
0064a8a9
JH
989 if (s > (char*)r)
990 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
991 }
992 tmp = ((OP(c) == BOUND ?
9041c2e3 993 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 994 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
995 while (s < strend) {
996 if (tmp == !(OP(c) == BOUND ?
3568d838 997 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
998 isALNUM_LC_utf8((U8*)s)))
999 {
1000 tmp = !tmp;
1001 if ((norun || regtry(prog, s)))
1002 goto got_it;
1003 }
1004 s += UTF8SKIP(s);
a687059c 1005 }
a0d0e21e 1006 }
667bb95a 1007 else {
12d33761 1008 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1009 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1010 while (s < strend) {
1011 if (tmp ==
1012 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1013 tmp = !tmp;
1014 if ((norun || regtry(prog, s)))
1015 goto got_it;
1016 }
1017 s++;
a0ed51b3 1018 }
a0ed51b3 1019 }
6eb5f6b9 1020 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1021 goto got_it;
1022 break;
bbce6d69 1023 case NBOUNDL:
3280af22 1024 PL_reg_flags |= RF_tainted;
bbce6d69 1025 /* FALL THROUGH */
a0d0e21e 1026 case NBOUND:
ffc61ed2 1027 if (do_utf8) {
12d33761 1028 if (s == PL_bostr)
ffc61ed2
JH
1029 tmp = '\n';
1030 else {
1aa99e6b 1031 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1032
0064a8a9
JH
1033 if (s > (char*)r)
1034 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1035 }
1036 tmp = ((OP(c) == NBOUND ?
9041c2e3 1037 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1038 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1039 while (s < strend) {
1040 if (tmp == !(OP(c) == NBOUND ?
3568d838 1041 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1042 isALNUM_LC_utf8((U8*)s)))
1043 tmp = !tmp;
1044 else if ((norun || regtry(prog, s)))
1045 goto got_it;
1046 s += UTF8SKIP(s);
1047 }
a0d0e21e 1048 }
667bb95a 1049 else {
12d33761 1050 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1051 tmp = ((OP(c) == NBOUND ?
1052 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1053 while (s < strend) {
1054 if (tmp ==
1055 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1056 tmp = !tmp;
1057 else if ((norun || regtry(prog, s)))
1058 goto got_it;
1059 s++;
1060 }
a0ed51b3 1061 }
6eb5f6b9 1062 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1063 goto got_it;
1064 break;
a0d0e21e 1065 case ALNUM:
ffc61ed2 1066 if (do_utf8) {
8269fa76 1067 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1068 while (s < strend) {
3568d838 1069 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1070 if (tmp && (norun || regtry(prog, s)))
1071 goto got_it;
1072 else
1073 tmp = doevery;
1074 }
bbce6d69 1075 else
ffc61ed2
JH
1076 tmp = 1;
1077 s += UTF8SKIP(s);
bbce6d69 1078 }
bbce6d69 1079 }
ffc61ed2
JH
1080 else {
1081 while (s < strend) {
1082 if (isALNUM(*s)) {
1083 if (tmp && (norun || regtry(prog, s)))
1084 goto got_it;
1085 else
1086 tmp = doevery;
1087 }
a0ed51b3 1088 else
ffc61ed2
JH
1089 tmp = 1;
1090 s++;
a0ed51b3 1091 }
a0ed51b3
LW
1092 }
1093 break;
bbce6d69 1094 case ALNUML:
3280af22 1095 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1096 if (do_utf8) {
1097 while (s < strend) {
1098 if (isALNUM_LC_utf8((U8*)s)) {
1099 if (tmp && (norun || regtry(prog, s)))
1100 goto got_it;
1101 else
1102 tmp = doevery;
1103 }
a687059c 1104 else
ffc61ed2
JH
1105 tmp = 1;
1106 s += UTF8SKIP(s);
a0d0e21e 1107 }
a0d0e21e 1108 }
ffc61ed2
JH
1109 else {
1110 while (s < strend) {
1111 if (isALNUM_LC(*s)) {
1112 if (tmp && (norun || regtry(prog, s)))
1113 goto got_it;
1114 else
1115 tmp = doevery;
1116 }
a0ed51b3 1117 else
ffc61ed2
JH
1118 tmp = 1;
1119 s++;
a0ed51b3 1120 }
a0ed51b3
LW
1121 }
1122 break;
a0d0e21e 1123 case NALNUM:
ffc61ed2 1124 if (do_utf8) {
8269fa76 1125 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1126 while (s < strend) {
3568d838 1127 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1128 if (tmp && (norun || regtry(prog, s)))
1129 goto got_it;
1130 else
1131 tmp = doevery;
1132 }
bbce6d69 1133 else
ffc61ed2
JH
1134 tmp = 1;
1135 s += UTF8SKIP(s);
bbce6d69 1136 }
bbce6d69 1137 }
ffc61ed2
JH
1138 else {
1139 while (s < strend) {
1140 if (!isALNUM(*s)) {
1141 if (tmp && (norun || regtry(prog, s)))
1142 goto got_it;
1143 else
1144 tmp = doevery;
1145 }
a0ed51b3 1146 else
ffc61ed2
JH
1147 tmp = 1;
1148 s++;
a0ed51b3 1149 }
a0ed51b3
LW
1150 }
1151 break;
bbce6d69 1152 case NALNUML:
3280af22 1153 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1154 if (do_utf8) {
1155 while (s < strend) {
1156 if (!isALNUM_LC_utf8((U8*)s)) {
1157 if (tmp && (norun || regtry(prog, s)))
1158 goto got_it;
1159 else
1160 tmp = doevery;
1161 }
a687059c 1162 else
ffc61ed2
JH
1163 tmp = 1;
1164 s += UTF8SKIP(s);
a687059c 1165 }
a0d0e21e 1166 }
ffc61ed2
JH
1167 else {
1168 while (s < strend) {
1169 if (!isALNUM_LC(*s)) {
1170 if (tmp && (norun || regtry(prog, s)))
1171 goto got_it;
1172 else
1173 tmp = doevery;
1174 }
a0ed51b3 1175 else
ffc61ed2
JH
1176 tmp = 1;
1177 s++;
a0ed51b3 1178 }
a0ed51b3
LW
1179 }
1180 break;
a0d0e21e 1181 case SPACE:
ffc61ed2 1182 if (do_utf8) {
8269fa76 1183 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1184 while (s < strend) {
3568d838 1185 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1186 if (tmp && (norun || regtry(prog, s)))
1187 goto got_it;
1188 else
1189 tmp = doevery;
1190 }
a0d0e21e 1191 else
ffc61ed2
JH
1192 tmp = 1;
1193 s += UTF8SKIP(s);
2304df62 1194 }
a0d0e21e 1195 }
ffc61ed2
JH
1196 else {
1197 while (s < strend) {
1198 if (isSPACE(*s)) {
1199 if (tmp && (norun || regtry(prog, s)))
1200 goto got_it;
1201 else
1202 tmp = doevery;
1203 }
a0ed51b3 1204 else
ffc61ed2
JH
1205 tmp = 1;
1206 s++;
a0ed51b3 1207 }
a0ed51b3
LW
1208 }
1209 break;
bbce6d69 1210 case SPACEL:
3280af22 1211 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1212 if (do_utf8) {
1213 while (s < strend) {
1214 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1215 if (tmp && (norun || regtry(prog, s)))
1216 goto got_it;
1217 else
1218 tmp = doevery;
1219 }
bbce6d69 1220 else
ffc61ed2
JH
1221 tmp = 1;
1222 s += UTF8SKIP(s);
bbce6d69 1223 }
bbce6d69 1224 }
ffc61ed2
JH
1225 else {
1226 while (s < strend) {
1227 if (isSPACE_LC(*s)) {
1228 if (tmp && (norun || regtry(prog, s)))
1229 goto got_it;
1230 else
1231 tmp = doevery;
1232 }
a0ed51b3 1233 else
ffc61ed2
JH
1234 tmp = 1;
1235 s++;
a0ed51b3 1236 }
a0ed51b3
LW
1237 }
1238 break;
a0d0e21e 1239 case NSPACE:
ffc61ed2 1240 if (do_utf8) {
8269fa76 1241 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1242 while (s < strend) {
3568d838 1243 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1244 if (tmp && (norun || regtry(prog, s)))
1245 goto got_it;
1246 else
1247 tmp = doevery;
1248 }
a0d0e21e 1249 else
ffc61ed2
JH
1250 tmp = 1;
1251 s += UTF8SKIP(s);
a687059c 1252 }
a0d0e21e 1253 }
ffc61ed2
JH
1254 else {
1255 while (s < strend) {
1256 if (!isSPACE(*s)) {
1257 if (tmp && (norun || regtry(prog, s)))
1258 goto got_it;
1259 else
1260 tmp = doevery;
1261 }
a0ed51b3 1262 else
ffc61ed2
JH
1263 tmp = 1;
1264 s++;
a0ed51b3 1265 }
a0ed51b3
LW
1266 }
1267 break;
bbce6d69 1268 case NSPACEL:
3280af22 1269 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1270 if (do_utf8) {
1271 while (s < strend) {
1272 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1273 if (tmp && (norun || regtry(prog, s)))
1274 goto got_it;
1275 else
1276 tmp = doevery;
1277 }
bbce6d69 1278 else
ffc61ed2
JH
1279 tmp = 1;
1280 s += UTF8SKIP(s);
bbce6d69 1281 }
bbce6d69 1282 }
ffc61ed2
JH
1283 else {
1284 while (s < strend) {
1285 if (!isSPACE_LC(*s)) {
1286 if (tmp && (norun || regtry(prog, s)))
1287 goto got_it;
1288 else
1289 tmp = doevery;
1290 }
a0ed51b3 1291 else
ffc61ed2
JH
1292 tmp = 1;
1293 s++;
a0ed51b3 1294 }
a0ed51b3
LW
1295 }
1296 break;
a0d0e21e 1297 case DIGIT:
ffc61ed2 1298 if (do_utf8) {
8269fa76 1299 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1300 while (s < strend) {
3568d838 1301 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1302 if (tmp && (norun || regtry(prog, s)))
1303 goto got_it;
1304 else
1305 tmp = doevery;
1306 }
a0d0e21e 1307 else
ffc61ed2
JH
1308 tmp = 1;
1309 s += UTF8SKIP(s);
2b69d0c2 1310 }
a0d0e21e 1311 }
ffc61ed2
JH
1312 else {
1313 while (s < strend) {
1314 if (isDIGIT(*s)) {
1315 if (tmp && (norun || regtry(prog, s)))
1316 goto got_it;
1317 else
1318 tmp = doevery;
1319 }
a0ed51b3 1320 else
ffc61ed2
JH
1321 tmp = 1;
1322 s++;
a0ed51b3 1323 }
a0ed51b3
LW
1324 }
1325 break;
b8c5462f
JH
1326 case DIGITL:
1327 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1328 if (do_utf8) {
1329 while (s < strend) {
1330 if (isDIGIT_LC_utf8((U8*)s)) {
1331 if (tmp && (norun || regtry(prog, s)))
1332 goto got_it;
1333 else
1334 tmp = doevery;
1335 }
b8c5462f 1336 else
ffc61ed2
JH
1337 tmp = 1;
1338 s += UTF8SKIP(s);
b8c5462f 1339 }
b8c5462f 1340 }
ffc61ed2
JH
1341 else {
1342 while (s < strend) {
1343 if (isDIGIT_LC(*s)) {
1344 if (tmp && (norun || regtry(prog, s)))
1345 goto got_it;
1346 else
1347 tmp = doevery;
1348 }
b8c5462f 1349 else
ffc61ed2
JH
1350 tmp = 1;
1351 s++;
b8c5462f 1352 }
b8c5462f
JH
1353 }
1354 break;
a0d0e21e 1355 case NDIGIT:
ffc61ed2 1356 if (do_utf8) {
8269fa76 1357 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1358 while (s < strend) {
3568d838 1359 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1360 if (tmp && (norun || regtry(prog, s)))
1361 goto got_it;
1362 else
1363 tmp = doevery;
1364 }
a0d0e21e 1365 else
ffc61ed2
JH
1366 tmp = 1;
1367 s += UTF8SKIP(s);
a687059c 1368 }
a0d0e21e 1369 }
ffc61ed2
JH
1370 else {
1371 while (s < strend) {
1372 if (!isDIGIT(*s)) {
1373 if (tmp && (norun || regtry(prog, s)))
1374 goto got_it;
1375 else
1376 tmp = doevery;
1377 }
a0ed51b3 1378 else
ffc61ed2
JH
1379 tmp = 1;
1380 s++;
a0ed51b3 1381 }
a0ed51b3
LW
1382 }
1383 break;
b8c5462f
JH
1384 case NDIGITL:
1385 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1386 if (do_utf8) {
1387 while (s < strend) {
1388 if (!isDIGIT_LC_utf8((U8*)s)) {
1389 if (tmp && (norun || regtry(prog, s)))
1390 goto got_it;
1391 else
1392 tmp = doevery;
1393 }
b8c5462f 1394 else
ffc61ed2
JH
1395 tmp = 1;
1396 s += UTF8SKIP(s);
b8c5462f 1397 }
a0ed51b3 1398 }
ffc61ed2
JH
1399 else {
1400 while (s < strend) {
1401 if (!isDIGIT_LC(*s)) {
1402 if (tmp && (norun || regtry(prog, s)))
1403 goto got_it;
1404 else
1405 tmp = doevery;
1406 }
cf93c79d 1407 else
ffc61ed2
JH
1408 tmp = 1;
1409 s++;
b8c5462f 1410 }
b8c5462f
JH
1411 }
1412 break;
b3c9acc1 1413 default:
3c3eec57
GS
1414 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1415 break;
d6a28714 1416 }
6eb5f6b9
JH
1417 return 0;
1418 got_it:
1419 return s;
1420}
1421
1422/*
1423 - regexec_flags - match a regexp against a string
1424 */
1425I32
1426Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1427 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1428/* strend: pointer to null at end of string */
1429/* strbeg: real beginning of string */
1430/* minend: end of match must be >=minend after stringarg. */
1431/* data: May be used for some additional optimizations. */
1432/* nosave: For optimizations. */
1433{
6eb5f6b9
JH
1434 register char *s;
1435 register regnode *c;
1436 register char *startpos = stringarg;
6eb5f6b9
JH
1437 I32 minlen; /* must match at least this many chars */
1438 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1439 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1440 constant substr. */ /* CC */
1441 I32 end_shift = 0; /* Same for the end. */ /* CC */
1442 I32 scream_pos = -1; /* Internal iterator of scream. */
1443 char *scream_olds;
1444 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1445 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1446
1447 PL_regcc = 0;
1448
1449 cache_re(prog);
1450#ifdef DEBUGGING
aea4f609 1451 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1452#endif
1453
1454 /* Be paranoid... */
1455 if (prog == NULL || startpos == NULL) {
1456 Perl_croak(aTHX_ "NULL regexp parameter");
1457 return 0;
1458 }
1459
1460 minlen = prog->minlen;
f33976b4 1461 if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
3baa4c62 1462 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1463 }
1464 else {
f33976b4 1465 if (strend - startpos < minlen) goto phooey;
1aa99e6b 1466 }
6eb5f6b9 1467
6eb5f6b9
JH
1468 /* Check validity of program. */
1469 if (UCHARAT(prog->program) != REG_MAGIC) {
1470 Perl_croak(aTHX_ "corrupted regexp program");
1471 }
1472
1473 PL_reg_flags = 0;
1474 PL_reg_eval_set = 0;
1475 PL_reg_maxiter = 0;
1476
1477 if (prog->reganch & ROPT_UTF8)
1478 PL_reg_flags |= RF_utf8;
1479
1480 /* Mark beginning of line for ^ and lookbehind. */
1481 PL_regbol = startpos;
1482 PL_bostr = strbeg;
1483 PL_reg_sv = sv;
1484
1485 /* Mark end of line for $ (and such) */
1486 PL_regeol = strend;
1487
1488 /* see how far we have to get to not match where we matched before */
1489 PL_regtill = startpos+minend;
1490
1491 /* We start without call_cc context. */
1492 PL_reg_call_cc = 0;
1493
1494 /* If there is a "must appear" string, look for it. */
1495 s = startpos;
1496
1497 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1498 MAGIC *mg;
1499
1500 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1501 PL_reg_ganch = startpos;
1502 else if (sv && SvTYPE(sv) >= SVt_PVMG
1503 && SvMAGIC(sv)
14befaf4
DM
1504 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1505 && mg->mg_len >= 0) {
6eb5f6b9
JH
1506 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1507 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1508 if (s > PL_reg_ganch)
6eb5f6b9
JH
1509 goto phooey;
1510 s = PL_reg_ganch;
1511 }
1512 }
1513 else /* pos() not defined */
1514 PL_reg_ganch = strbeg;
1515 }
1516
699c3c34
JH
1517 if (do_utf8 == (UTF!=0) &&
1518 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1519 re_scream_pos_data d;
1520
1521 d.scream_olds = &scream_olds;
1522 d.scream_pos = &scream_pos;
1523 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1524 if (!s)
1525 goto phooey; /* not present */
1526 }
1527
1528 DEBUG_r( if (!PL_colorset) reginitcolors() );
1529 DEBUG_r(PerlIO_printf(Perl_debug_log,
1530 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1531 PL_colors[4],PL_colors[5],PL_colors[0],
1532 prog->precomp,
1533 PL_colors[1],
1534 (strlen(prog->precomp) > 60 ? "..." : ""),
1535 PL_colors[0],
1536 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1537 startpos, PL_colors[1],
1538 (strend - startpos > 60 ? "..." : ""))
1539 );
1540
1541 /* Simplest case: anchored match need be tried only once. */
1542 /* [unless only anchor is BOL and multiline is set] */
1543 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1544 if (s == startpos && regtry(prog, startpos))
1545 goto got_it;
1546 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1547 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1548 {
1549 char *end;
1550
1551 if (minlen)
1552 dontbother = minlen - 1;
1aa99e6b 1553 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1554 /* for multiline we only have to try after newlines */
1555 if (prog->check_substr) {
1556 if (s == startpos)
1557 goto after_try;
1558 while (1) {
1559 if (regtry(prog, s))
1560 goto got_it;
1561 after_try:
1562 if (s >= end)
1563 goto phooey;
1564 if (prog->reganch & RE_USE_INTUIT) {
1565 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1566 if (!s)
1567 goto phooey;
1568 }
1569 else
1570 s++;
1571 }
1572 } else {
1573 if (s > startpos)
1574 s--;
1575 while (s < end) {
1576 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1577 if (regtry(prog, s))
1578 goto got_it;
1579 }
1580 }
1581 }
1582 }
1583 goto phooey;
1584 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1585 if (regtry(prog, PL_reg_ganch))
1586 goto got_it;
1587 goto phooey;
1588 }
1589
1590 /* Messy cases: unanchored match. */
9041c2e3 1591 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1592 /* we have /x+whatever/ */
1593 /* it must be a one character string (XXXX Except UTF?) */
1594 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1595#ifdef DEBUGGING
1596 int did_match = 0;
1597#endif
1598
1aa99e6b 1599 if (do_utf8) {
6eb5f6b9
JH
1600 while (s < strend) {
1601 if (*s == ch) {
bf93d4cc 1602 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1603 if (regtry(prog, s)) goto got_it;
1604 s += UTF8SKIP(s);
1605 while (s < strend && *s == ch)
1606 s += UTF8SKIP(s);
1607 }
1608 s += UTF8SKIP(s);
1609 }
1610 }
1611 else {
1612 while (s < strend) {
1613 if (*s == ch) {
bf93d4cc 1614 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1615 if (regtry(prog, s)) goto got_it;
1616 s++;
1617 while (s < strend && *s == ch)
1618 s++;
1619 }
1620 s++;
1621 }
1622 }
b7953727 1623 DEBUG_r(if (!did_match)
bf93d4cc 1624 PerlIO_printf(Perl_debug_log,
b7953727
JH
1625 "Did not find anchored character...\n")
1626 );
6eb5f6b9
JH
1627 }
1628 /*SUPPRESS 560*/
1aa99e6b
IH
1629 else if (do_utf8 == (UTF!=0) &&
1630 (prog->anchored_substr != Nullsv
9041c2e3 1631 || (prog->float_substr != Nullsv
1aa99e6b 1632 && prog->float_max_offset < strend - s))) {
9041c2e3 1633 SV *must = prog->anchored_substr
6eb5f6b9 1634 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1635 I32 back_max =
6eb5f6b9 1636 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1637 I32 back_min =
6eb5f6b9 1638 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1639 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1640 -(I32)(CHR_SVLEN(must)
1aa99e6b 1641 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1642 char *last1; /* Last position checked before */
bf93d4cc
GS
1643#ifdef DEBUGGING
1644 int did_match = 0;
1645#endif
6eb5f6b9
JH
1646
1647 if (s > PL_bostr)
1648 last1 = HOPc(s, -1);
1649 else
1650 last1 = s - 1; /* bogus */
1651
1652 /* XXXX check_substr already used to find `s', can optimize if
1653 check_substr==must. */
1654 scream_pos = -1;
1655 dontbother = end_shift;
1656 strend = HOPc(strend, -dontbother);
1657 while ( (s <= last) &&
9041c2e3 1658 ((flags & REXEC_SCREAM)
1aa99e6b 1659 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1660 end_shift, &scream_pos, 0))
1aa99e6b 1661 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1662 (unsigned char*)strend, must,
6eb5f6b9 1663 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1664 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1665 if (HOPc(s, -back_max) > last1) {
1666 last1 = HOPc(s, -back_min);
1667 s = HOPc(s, -back_max);
1668 }
1669 else {
1670 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1671
1672 last1 = HOPc(s, -back_min);
1673 s = t;
1674 }
1aa99e6b 1675 if (do_utf8) {
6eb5f6b9
JH
1676 while (s <= last1) {
1677 if (regtry(prog, s))
1678 goto got_it;
1679 s += UTF8SKIP(s);
1680 }
1681 }
1682 else {
1683 while (s <= last1) {
1684 if (regtry(prog, s))
1685 goto got_it;
1686 s++;
1687 }
1688 }
1689 }
b7953727
JH
1690 DEBUG_r(if (!did_match)
1691 PerlIO_printf(Perl_debug_log,
1692 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1693 ((must == prog->anchored_substr)
1694 ? "anchored" : "floating"),
1695 PL_colors[0],
1696 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1697 SvPVX(must),
b7953727
JH
1698 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1699 );
6eb5f6b9
JH
1700 goto phooey;
1701 }
155aba94 1702 else if ((c = prog->regstclass)) {
66e933ab
GS
1703 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1704 /* don't bother with what can't match */
6eb5f6b9 1705 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1706 DEBUG_r({
1707 SV *prop = sv_newmortal();
1708 regprop(prop, c);
1709 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1710 });
6eb5f6b9
JH
1711 if (find_byclass(prog, c, s, strend, startpos, 0))
1712 goto got_it;
bf93d4cc 1713 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1714 }
1715 else {
1716 dontbother = 0;
1717 if (prog->float_substr != Nullsv) { /* Trim the end. */
1718 char *last;
d6a28714
JH
1719
1720 if (flags & REXEC_SCREAM) {
1721 last = screaminstr(sv, prog->float_substr, s - strbeg,
1722 end_shift, &scream_pos, 1); /* last one */
1723 if (!last)
ffc61ed2 1724 last = scream_olds; /* Only one occurrence. */
b8c5462f 1725 }
d6a28714
JH
1726 else {
1727 STRLEN len;
1728 char *little = SvPV(prog->float_substr, len);
1729
1730 if (SvTAIL(prog->float_substr)) {
1731 if (memEQ(strend - len + 1, little, len - 1))
1732 last = strend - len + 1;
1733 else if (!PL_multiline)
9041c2e3 1734 last = memEQ(strend - len, little, len)
d6a28714 1735 ? strend - len : Nullch;
b8c5462f 1736 else
d6a28714
JH
1737 goto find_last;
1738 } else {
1739 find_last:
9041c2e3 1740 if (len)
d6a28714 1741 last = rninstr(s, strend, little, little + len);
b8c5462f 1742 else
d6a28714 1743 last = strend; /* matching `$' */
b8c5462f 1744 }
b8c5462f 1745 }
bf93d4cc
GS
1746 if (last == NULL) {
1747 DEBUG_r(PerlIO_printf(Perl_debug_log,
1748 "%sCan't trim the tail, match fails (should not happen)%s\n",
1749 PL_colors[4],PL_colors[5]));
1750 goto phooey; /* Should not happen! */
1751 }
d6a28714
JH
1752 dontbother = strend - last + prog->float_min_offset;
1753 }
1754 if (minlen && (dontbother < minlen))
1755 dontbother = minlen - 1;
1756 strend -= dontbother; /* this one's always in bytes! */
1757 /* We don't know much -- general case. */
1aa99e6b 1758 if (do_utf8) {
d6a28714
JH
1759 for (;;) {
1760 if (regtry(prog, s))
1761 goto got_it;
1762 if (s >= strend)
1763 break;
b8c5462f 1764 s += UTF8SKIP(s);
d6a28714
JH
1765 };
1766 }
1767 else {
1768 do {
1769 if (regtry(prog, s))
1770 goto got_it;
1771 } while (s++ < strend);
1772 }
1773 }
1774
1775 /* Failure. */
1776 goto phooey;
1777
1778got_it:
1779 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1780
1781 if (PL_reg_eval_set) {
1782 /* Preserve the current value of $^R */
1783 if (oreplsv != GvSV(PL_replgv))
1784 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1785 restored, the value remains
1786 the same. */
acfe0abc 1787 restore_pos(aTHX_ 0);
d6a28714
JH
1788 }
1789
1790 /* make sure $`, $&, $', and $digit will work later */
1791 if ( !(flags & REXEC_NOT_FIRST) ) {
1792 if (RX_MATCH_COPIED(prog)) {
1793 Safefree(prog->subbeg);
1794 RX_MATCH_COPIED_off(prog);
1795 }
1796 if (flags & REXEC_COPY_STR) {
1797 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1798
1799 s = savepvn(strbeg, i);
1800 prog->subbeg = s;
1801 prog->sublen = i;
1802 RX_MATCH_COPIED_on(prog);
1803 }
1804 else {
1805 prog->subbeg = strbeg;
1806 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1807 }
1808 }
9041c2e3 1809
d6a28714
JH
1810 return 1;
1811
1812phooey:
bf93d4cc
GS
1813 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1814 PL_colors[4],PL_colors[5]));
d6a28714 1815 if (PL_reg_eval_set)
acfe0abc 1816 restore_pos(aTHX_ 0);
d6a28714
JH
1817 return 0;
1818}
1819
1820/*
1821 - regtry - try match at specific point
1822 */
1823STATIC I32 /* 0 failure, 1 success */
1824S_regtry(pTHX_ regexp *prog, char *startpos)
1825{
d6a28714
JH
1826 register I32 i;
1827 register I32 *sp;
1828 register I32 *ep;
1829 CHECKPOINT lastcp;
1830
02db2b7b
IZ
1831#ifdef DEBUGGING
1832 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1833#endif
d6a28714
JH
1834 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1835 MAGIC *mg;
1836
1837 PL_reg_eval_set = RS_init;
1838 DEBUG_r(DEBUG_s(
b900a521
JH
1839 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1840 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1841 ));
e8347627 1842 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1843 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1844 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1845 SAVETMPS;
1846 /* Apparently this is not needed, judging by wantarray. */
e8347627 1847 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1848 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1849
1850 if (PL_reg_sv) {
1851 /* Make $_ available to executed code. */
1852 if (PL_reg_sv != DEFSV) {
4d1ff10f 1853 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1854 SAVESPTR(DEFSV);
1855 DEFSV = PL_reg_sv;
b8c5462f 1856 }
d6a28714 1857
9041c2e3 1858 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1859 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1860 /* prepare for quick setting of pos */
14befaf4
DM
1861 sv_magic(PL_reg_sv, (SV*)0,
1862 PERL_MAGIC_regex_global, Nullch, 0);
1863 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1864 mg->mg_len = -1;
b8c5462f 1865 }
d6a28714
JH
1866 PL_reg_magic = mg;
1867 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1868 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1869 }
09687e5a 1870 if (!PL_reg_curpm) {
0f79a09d 1871 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1872#ifdef USE_ITHREADS
1873 {
1874 SV* repointer = newSViv(0);
577e12cc 1875 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1876 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1877 av_push(PL_regex_padav,repointer);
1878 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1879 PL_regex_pad = AvARRAY(PL_regex_padav);
1880 }
1881#endif
1882 }
aaa362c4 1883 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1884 PL_reg_oldcurpm = PL_curpm;
1885 PL_curpm = PL_reg_curpm;
1886 if (RX_MATCH_COPIED(prog)) {
1887 /* Here is a serious problem: we cannot rewrite subbeg,
1888 since it may be needed if this match fails. Thus
1889 $` inside (?{}) could fail... */
1890 PL_reg_oldsaved = prog->subbeg;
1891 PL_reg_oldsavedlen = prog->sublen;
1892 RX_MATCH_COPIED_off(prog);
1893 }
1894 else
1895 PL_reg_oldsaved = Nullch;
1896 prog->subbeg = PL_bostr;
1897 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1898 }
1899 prog->startp[0] = startpos - PL_bostr;
1900 PL_reginput = startpos;
1901 PL_regstartp = prog->startp;
1902 PL_regendp = prog->endp;
1903 PL_reglastparen = &prog->lastparen;
a01268b5 1904 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1905 prog->lastparen = 0;
1906 PL_regsize = 0;
1907 DEBUG_r(PL_reg_starttry = startpos);
1908 if (PL_reg_start_tmpl <= prog->nparens) {
1909 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1910 if(PL_reg_start_tmp)
1911 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1912 else
1913 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1914 }
1915
1916 /* XXXX What this code is doing here?!!! There should be no need
1917 to do this again and again, PL_reglastparen should take care of
3dd2943c 1918 this! --ilya*/
dafc8851
JH
1919
1920 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1921 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1922 * PL_reglastparen), is not needed at all by the test suite
1923 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1924 * enough, for building DynaLoader, or otherwise this
1925 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1926 * will happen. Meanwhile, this code *is* needed for the
1927 * above-mentioned test suite tests to succeed. The common theme
1928 * on those tests seems to be returning null fields from matches.
1929 * --jhi */
dafc8851 1930#if 1
d6a28714
JH
1931 sp = prog->startp;
1932 ep = prog->endp;
1933 if (prog->nparens) {
09e8ae3b 1934 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1935 *++sp = -1;
1936 *++ep = -1;
1937 }
1938 }
dafc8851 1939#endif
02db2b7b 1940 REGCP_SET(lastcp);
d6a28714
JH
1941 if (regmatch(prog->program + 1)) {
1942 prog->endp[0] = PL_reginput - PL_bostr;
1943 return 1;
1944 }
02db2b7b 1945 REGCP_UNWIND(lastcp);
d6a28714
JH
1946 return 0;
1947}
1948
02db2b7b
IZ
1949#define RE_UNWIND_BRANCH 1
1950#define RE_UNWIND_BRANCHJ 2
1951
1952union re_unwind_t;
1953
1954typedef struct { /* XX: makes sense to enlarge it... */
1955 I32 type;
1956 I32 prev;
1957 CHECKPOINT lastcp;
1958} re_unwind_generic_t;
1959
1960typedef struct {
1961 I32 type;
1962 I32 prev;
1963 CHECKPOINT lastcp;
1964 I32 lastparen;
1965 regnode *next;
1966 char *locinput;
1967 I32 nextchr;
1968#ifdef DEBUGGING
1969 int regindent;
1970#endif
1971} re_unwind_branch_t;
1972
1973typedef union re_unwind_t {
1974 I32 type;
1975 re_unwind_generic_t generic;
1976 re_unwind_branch_t branch;
1977} re_unwind_t;
1978
8ba1375e
MJD
1979#define sayYES goto yes
1980#define sayNO goto no
1981#define sayYES_FINAL goto yes_final
1982#define sayYES_LOUD goto yes_loud
1983#define sayNO_FINAL goto no_final
1984#define sayNO_SILENT goto do_no
1985#define saySAME(x) if (x) goto yes; else goto no
1986
1987#define REPORT_CODE_OFF 24
1988
d6a28714
JH
1989/*
1990 - regmatch - main matching routine
1991 *
1992 * Conceptually the strategy is simple: check to see whether the current
1993 * node matches, call self recursively to see whether the rest matches,
1994 * and then act accordingly. In practice we make some effort to avoid
1995 * recursion, in particular by going through "ordinary" nodes (that don't
1996 * need to know whether the rest of the match failed) by a loop instead of
1997 * by recursion.
1998 */
1999/* [lwall] I've hoisted the register declarations to the outer block in order to
2000 * maybe save a little bit of pushing and popping on the stack. It also takes
2001 * advantage of machines that use a register save mask on subroutine entry.
2002 */
2003STATIC I32 /* 0 failure, 1 success */
2004S_regmatch(pTHX_ regnode *prog)
2005{
d6a28714
JH
2006 register regnode *scan; /* Current node. */
2007 regnode *next; /* Next node. */
2008 regnode *inner; /* Next node in internal branch. */
2009 register I32 nextchr; /* renamed nextchr - nextchar colides with
2010 function of same name */
2011 register I32 n; /* no or next */
b7953727
JH
2012 register I32 ln = 0; /* len or last */
2013 register char *s = Nullch; /* operand or save */
d6a28714 2014 register char *locinput = PL_reginput;
b7953727 2015 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2016 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2017 I32 unwind = 0;
b7953727 2018#if 0
02db2b7b 2019 I32 firstcp = PL_savestack_ix;
b7953727 2020#endif
53c4c00c 2021 register bool do_utf8 = PL_reg_match_utf8;
02db2b7b 2022
d6a28714
JH
2023#ifdef DEBUGGING
2024 PL_regindent++;
2025#endif
2026
2027 /* Note that nextchr is a byte even in UTF */
2028 nextchr = UCHARAT(locinput);
2029 scan = prog;
2030 while (scan != NULL) {
8ba1375e 2031
d6a28714
JH
2032 DEBUG_r( {
2033 SV *prop = sv_newmortal();
2034 int docolor = *PL_colors[0];
2035 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2036 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2037 /* The part of the string before starttry has one color
2038 (pref0_len chars), between starttry and current
2039 position another one (pref_len - pref0_len chars),
2040 after the current position the third one.
2041 We assume that pref0_len <= pref_len, otherwise we
2042 decrease pref0_len. */
9041c2e3 2043 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2044 ? (5 + taill) - l : locinput - PL_bostr;
2045 int pref0_len;
d6a28714 2046
1aa99e6b
IH
2047 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2048 pref_len++;
2049 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2050 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2051 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2052 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2053 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2054 l--;
d6a28714
JH
2055 if (pref0_len < 0)
2056 pref0_len = 0;
2057 if (pref0_len > pref_len)
2058 pref0_len = pref_len;
2059 regprop(prop, scan);
9041c2e3 2060 PerlIO_printf(Perl_debug_log,
b900a521 2061 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2062 (IV)(locinput - PL_bostr),
2063 PL_colors[4], pref0_len,
d6a28714 2064 locinput - pref_len, PL_colors[5],
9041c2e3 2065 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2066 locinput - pref_len + pref0_len, PL_colors[3],
2067 (docolor ? "" : "> <"),
2068 PL_colors[0], l, locinput, PL_colors[1],
2069 15 - l - pref_len + 1,
2070 "",
b900a521 2071 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2072 SvPVX(prop));
2073 } );
2074
2075 next = scan + NEXT_OFF(scan);
2076 if (next == scan)
2077 next = NULL;
2078
2079 switch (OP(scan)) {
2080 case BOL:
12d33761
HS
2081 if (locinput == PL_bostr || (PL_multiline &&
2082 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2083 {
2084 /* regtill = regbol; */
b8c5462f
JH
2085 break;
2086 }
d6a28714
JH
2087 sayNO;
2088 case MBOL:
12d33761
HS
2089 if (locinput == PL_bostr ||
2090 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2091 {
b8c5462f
JH
2092 break;
2093 }
d6a28714
JH
2094 sayNO;
2095 case SBOL:
c2a73568 2096 if (locinput == PL_bostr)
b8c5462f 2097 break;
d6a28714
JH
2098 sayNO;
2099 case GPOS:
2100 if (locinput == PL_reg_ganch)
2101 break;
2102 sayNO;
2103 case EOL:
2104 if (PL_multiline)
2105 goto meol;
2106 else
2107 goto seol;
2108 case MEOL:
2109 meol:
2110 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2111 sayNO;
b8c5462f 2112 break;
d6a28714
JH
2113 case SEOL:
2114 seol:
2115 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2116 sayNO;
d6a28714 2117 if (PL_regeol - locinput > 1)
b8c5462f 2118 sayNO;
b8c5462f 2119 break;
d6a28714
JH
2120 case EOS:
2121 if (PL_regeol != locinput)
b8c5462f 2122 sayNO;
d6a28714 2123 break;
ffc61ed2 2124 case SANY:
d6a28714 2125 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2126 sayNO;
f33976b4
DB
2127 if (do_utf8) {
2128 locinput += PL_utf8skip[nextchr];
2129 if (locinput > PL_regeol)
2130 sayNO;
2131 nextchr = UCHARAT(locinput);
2132 }
2133 else
2134 nextchr = UCHARAT(++locinput);
2135 break;
2136 case CANY:
2137 if (!nextchr && locinput >= PL_regeol)
2138 sayNO;
b8c5462f 2139 nextchr = UCHARAT(++locinput);
a0d0e21e 2140 break;
ffc61ed2 2141 case REG_ANY:
1aa99e6b
IH
2142 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2143 sayNO;
2144 if (do_utf8) {
b8c5462f 2145 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2146 if (locinput > PL_regeol)
2147 sayNO;
a0ed51b3 2148 nextchr = UCHARAT(locinput);
a0ed51b3 2149 }
1aa99e6b
IH
2150 else
2151 nextchr = UCHARAT(++locinput);
a0ed51b3 2152 break;
d6a28714 2153 case EXACT:
cd439c50
IZ
2154 s = STRING(scan);
2155 ln = STR_LEN(scan);
1aa99e6b
IH
2156 if (do_utf8 != (UTF!=0)) {
2157 char *l = locinput;
2158 char *e = s + ln;
2159 STRLEN len;
2160 if (do_utf8)
2161 while (s < e) {
2162 if (l >= PL_regeol)
2163 sayNO;
9041c2e3 2164 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2165 sayNO;
2166 s++;
2167 l += len;
2168 }
2169 else
2170 while (s < e) {
2171 if (l >= PL_regeol)
2172 sayNO;
9041c2e3 2173 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2174 sayNO;
2175 s += len;
2176 l++;
2177 }
2178 locinput = l;
2179 nextchr = UCHARAT(locinput);
2180 break;
2181 }
d6a28714
JH
2182 /* Inline the first character, for speed. */
2183 if (UCHARAT(s) != nextchr)
2184 sayNO;
2185 if (PL_regeol - locinput < ln)
2186 sayNO;
2187 if (ln > 1 && memNE(s, locinput, ln))
2188 sayNO;
2189 locinput += ln;
2190 nextchr = UCHARAT(locinput);
2191 break;
2192 case EXACTFL:
b8c5462f
JH
2193 PL_reg_flags |= RF_tainted;
2194 /* FALL THROUGH */
d6a28714 2195 case EXACTF:
cd439c50
IZ
2196 s = STRING(scan);
2197 ln = STR_LEN(scan);
d6a28714 2198
1aa99e6b 2199 if (do_utf8) {
d6a28714 2200 char *l = locinput;
1aa99e6b
IH
2201 char *e;
2202 e = s + ln;
d6a28714
JH
2203 c1 = OP(scan) == EXACTF;
2204 while (s < e) {
1aa99e6b 2205 if (l >= PL_regeol) {
d6a28714
JH
2206 sayNO;
2207 }
2b9d42f0 2208 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2209 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2210 sayNO;
2211 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2212 l += UTF8SKIP(l);
b8c5462f 2213 }
d6a28714 2214 locinput = l;
a0ed51b3
LW
2215 nextchr = UCHARAT(locinput);
2216 break;
2217 }
d6a28714
JH
2218
2219 /* Inline the first character, for speed. */
2220 if (UCHARAT(s) != nextchr &&
2221 UCHARAT(s) != ((OP(scan) == EXACTF)
2222 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2223 sayNO;
d6a28714 2224 if (PL_regeol - locinput < ln)
b8c5462f 2225 sayNO;
d6a28714
JH
2226 if (ln > 1 && (OP(scan) == EXACTF
2227 ? ibcmp(s, locinput, ln)
2228 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2229 sayNO;
d6a28714
JH
2230 locinput += ln;
2231 nextchr = UCHARAT(locinput);
a0d0e21e 2232 break;
d6a28714 2233 case ANYOF:
ffc61ed2
JH
2234 if (do_utf8) {
2235 if (!reginclass(scan, (U8*)locinput, do_utf8))
2236 sayNO;
2237 if (locinput >= PL_regeol)
2238 sayNO;
2239 locinput += PL_utf8skip[nextchr];
b8c5462f 2240 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2241 }
2242 else {
2243 if (nextchr < 0)
2244 nextchr = UCHARAT(locinput);
2245 if (!reginclass(scan, (U8*)locinput, do_utf8))
2246 sayNO;
2247 if (!nextchr && locinput >= PL_regeol)
2248 sayNO;
2249 nextchr = UCHARAT(++locinput);
2250 }
b8c5462f 2251 break;
d6a28714 2252 case ALNUML:
b8c5462f
JH
2253 PL_reg_flags |= RF_tainted;
2254 /* FALL THROUGH */
d6a28714 2255 case ALNUM:
b8c5462f 2256 if (!nextchr)
4633a7c4 2257 sayNO;
ffc61ed2 2258 if (do_utf8) {
ad24be35 2259 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2260 if (!(OP(scan) == ALNUM
3568d838 2261 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2262 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2263 {
2264 sayNO;
a0ed51b3 2265 }
b8c5462f 2266 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2267 nextchr = UCHARAT(locinput);
2268 break;
2269 }
ffc61ed2 2270 if (!(OP(scan) == ALNUM
d6a28714 2271 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2272 sayNO;
b8c5462f 2273 nextchr = UCHARAT(++locinput);
a0d0e21e 2274 break;
d6a28714 2275 case NALNUML:
b8c5462f
JH
2276 PL_reg_flags |= RF_tainted;
2277 /* FALL THROUGH */
d6a28714
JH
2278 case NALNUM:
2279 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2280 sayNO;
ffc61ed2 2281 if (do_utf8) {
8269fa76 2282 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2283 if (OP(scan) == NALNUM
3568d838 2284 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2285 : isALNUM_LC_utf8((U8*)locinput))
2286 {
b8c5462f 2287 sayNO;
d6a28714 2288 }
b8c5462f
JH
2289 locinput += PL_utf8skip[nextchr];
2290 nextchr = UCHARAT(locinput);
2291 break;
2292 }
ffc61ed2 2293 if (OP(scan) == NALNUM
d6a28714 2294 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2295 sayNO;
76e3520e 2296 nextchr = UCHARAT(++locinput);
a0d0e21e 2297 break;
d6a28714
JH
2298 case BOUNDL:
2299 case NBOUNDL:
3280af22 2300 PL_reg_flags |= RF_tainted;
bbce6d69 2301 /* FALL THROUGH */
d6a28714
JH
2302 case BOUND:
2303 case NBOUND:
2304 /* was last char in word? */
ffc61ed2 2305 if (do_utf8) {
12d33761
HS
2306 if (locinput == PL_bostr)
2307 ln = '\n';
ffc61ed2
JH
2308 else {
2309 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2310
2b9d42f0 2311 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2312 }
2313 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2314 ln = isALNUM_uni(ln);
8269fa76 2315 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2316 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2317 }
2318 else {
9041c2e3 2319 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2320 n = isALNUM_LC_utf8((U8*)locinput);
2321 }
a0ed51b3 2322 }
d6a28714 2323 else {
12d33761
HS
2324 ln = (locinput != PL_bostr) ?
2325 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2326 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2327 ln = isALNUM(ln);
2328 n = isALNUM(nextchr);
2329 }
2330 else {
2331 ln = isALNUM_LC(ln);
2332 n = isALNUM_LC(nextchr);
2333 }
d6a28714 2334 }
ffc61ed2
JH
2335 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2336 OP(scan) == BOUNDL))
2337 sayNO;
a0ed51b3 2338 break;
d6a28714 2339 case SPACEL:
3280af22 2340 PL_reg_flags |= RF_tainted;
bbce6d69 2341 /* FALL THROUGH */
d6a28714 2342 case SPACE:
9442cb0e 2343 if (!nextchr)
4633a7c4 2344 sayNO;
1aa99e6b 2345 if (do_utf8) {
fd400ab9 2346 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2347 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2348 if (!(OP(scan) == SPACE
3568d838 2349 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2350 : isSPACE_LC_utf8((U8*)locinput)))
2351 {
2352 sayNO;
2353 }
2354 locinput += PL_utf8skip[nextchr];
2355 nextchr = UCHARAT(locinput);
2356 break;
d6a28714 2357 }
ffc61ed2
JH
2358 if (!(OP(scan) == SPACE
2359 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2360 sayNO;
2361 nextchr = UCHARAT(++locinput);
2362 }
2363 else {
2364 if (!(OP(scan) == SPACE
2365 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2366 sayNO;
2367 nextchr = UCHARAT(++locinput);
a0ed51b3 2368 }
a0ed51b3 2369 break;
d6a28714 2370 case NSPACEL:
3280af22 2371 PL_reg_flags |= RF_tainted;
bbce6d69 2372 /* FALL THROUGH */
d6a28714 2373 case NSPACE:
9442cb0e 2374 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2375 sayNO;
1aa99e6b 2376 if (do_utf8) {
8269fa76 2377 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2378 if (OP(scan) == NSPACE
3568d838 2379 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2380 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2381 {
2382 sayNO;
2383 }
2384 locinput += PL_utf8skip[nextchr];
2385 nextchr = UCHARAT(locinput);
2386 break;
a0ed51b3 2387 }
ffc61ed2 2388 if (OP(scan) == NSPACE
d6a28714 2389 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2390 sayNO;
76e3520e 2391 nextchr = UCHARAT(++locinput);
a0d0e21e 2392 break;
d6a28714 2393 case DIGITL:
a0ed51b3
LW
2394 PL_reg_flags |= RF_tainted;
2395 /* FALL THROUGH */
d6a28714 2396 case DIGIT:
9442cb0e 2397 if (!nextchr)
a0ed51b3 2398 sayNO;
1aa99e6b 2399 if (do_utf8) {
8269fa76 2400 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2401 if (!(OP(scan) == DIGIT
3568d838 2402 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2403 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2404 {
a0ed51b3 2405 sayNO;
dfe13c55 2406 }
6f06b55f 2407 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2408 nextchr = UCHARAT(locinput);
2409 break;
2410 }
ffc61ed2 2411 if (!(OP(scan) == DIGIT
9442cb0e 2412 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2413 sayNO;
2414 nextchr = UCHARAT(++locinput);
2415 break;
d6a28714 2416 case NDIGITL:
b8c5462f
JH
2417 PL_reg_flags |= RF_tainted;
2418 /* FALL THROUGH */
d6a28714 2419 case NDIGIT:
9442cb0e 2420 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2421 sayNO;
1aa99e6b 2422 if (do_utf8) {
8269fa76 2423 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2424 if (OP(scan) == NDIGIT
3568d838 2425 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2426 : isDIGIT_LC_utf8((U8*)locinput))
2427 {
a0ed51b3 2428 sayNO;
9442cb0e 2429 }
6f06b55f 2430 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2431 nextchr = UCHARAT(locinput);
2432 break;
2433 }
ffc61ed2 2434 if (OP(scan) == NDIGIT
9442cb0e 2435 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2436 sayNO;
2437 nextchr = UCHARAT(++locinput);
2438 break;
2439 case CLUMP:
8269fa76 2440 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2441 if (locinput >= PL_regeol ||
2442 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2443 sayNO;
6f06b55f 2444 locinput += PL_utf8skip[nextchr];
3568d838
JH
2445 while (locinput < PL_regeol &&
2446 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2447 locinput += UTF8SKIP(locinput);
2448 if (locinput > PL_regeol)
2449 sayNO;
2450 nextchr = UCHARAT(locinput);
2451 break;
c8756f30 2452 case REFFL:
3280af22 2453 PL_reg_flags |= RF_tainted;
c8756f30 2454 /* FALL THROUGH */
c277df42 2455 case REF:
c8756f30 2456 case REFF:
c277df42 2457 n = ARG(scan); /* which paren pair */
cf93c79d 2458 ln = PL_regstartp[n];
2c2d71f5 2459 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2460 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2461 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2462 if (ln == PL_regendp[n])
a0d0e21e 2463 break;
a0ed51b3 2464
cf93c79d 2465 s = PL_bostr + ln;
1aa99e6b 2466 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2467 char *l = locinput;
cf93c79d 2468 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2469 /*
2470 * Note that we can't do the "other character" lookup trick as
2471 * in the 8-bit case (no pun intended) because in Unicode we
2472 * have to map both upper and title case to lower case.
2473 */
2474 if (OP(scan) == REFF) {
2475 while (s < e) {
2476 if (l >= PL_regeol)
2477 sayNO;
dfe13c55 2478 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2479 sayNO;
2480 s += UTF8SKIP(s);
2481 l += UTF8SKIP(l);
2482 }
2483 }
2484 else {
2485 while (s < e) {
2486 if (l >= PL_regeol)
2487 sayNO;
dfe13c55 2488 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2489 sayNO;
2490 s += UTF8SKIP(s);
2491 l += UTF8SKIP(l);
2492 }
2493 }
2494 locinput = l;
2495 nextchr = UCHARAT(locinput);
2496 break;
2497 }
2498
a0d0e21e 2499 /* Inline the first character, for speed. */
76e3520e 2500 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2501 (OP(scan) == REF ||
2502 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2503 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2504 sayNO;
cf93c79d 2505 ln = PL_regendp[n] - ln;
3280af22 2506 if (locinput + ln > PL_regeol)
4633a7c4 2507 sayNO;
c8756f30
AK
2508 if (ln > 1 && (OP(scan) == REF
2509 ? memNE(s, locinput, ln)
2510 : (OP(scan) == REFF
2511 ? ibcmp(s, locinput, ln)
2512 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2513 sayNO;
a0d0e21e 2514 locinput += ln;
76e3520e 2515 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2516 break;
2517
2518 case NOTHING:
c277df42 2519 case TAIL:
a0d0e21e
LW
2520 break;
2521 case BACK:
2522 break;
c277df42
IZ
2523 case EVAL:
2524 {
2525 dSP;
533c011a 2526 OP_4tree *oop = PL_op;
3280af22
NIS
2527 COP *ocurcop = PL_curcop;
2528 SV **ocurpad = PL_curpad;
c277df42 2529 SV *ret;
9041c2e3 2530
c277df42 2531 n = ARG(scan);
533c011a 2532 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2533 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2534 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2535 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2536
cea2e8a9 2537 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2538 SPAGAIN;
2539 ret = POPs;
2540 PUTBACK;
9041c2e3 2541
0f5d15d6
IZ
2542 PL_op = oop;
2543 PL_curpad = ocurpad;
2544 PL_curcop = ocurcop;
c277df42 2545 if (logical) {
0f5d15d6
IZ
2546 if (logical == 2) { /* Postponed subexpression. */
2547 regexp *re;
22c35a8c 2548 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2549 re_cc_state state;
0f5d15d6
IZ
2550 CHECKPOINT cp, lastcp;
2551
2552 if(SvROK(ret) || SvRMAGICAL(ret)) {
2553 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2554
2555 if(SvMAGICAL(sv))
14befaf4 2556 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2557 }
2558 if (mg) {
2559 re = (regexp *)mg->mg_obj;
df0003d4 2560 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2561 }
2562 else {
2563 STRLEN len;
2564 char *t = SvPV(ret, len);
2565 PMOP pm;
2566 char *oprecomp = PL_regprecomp;
2567 I32 osize = PL_regsize;
2568 I32 onpar = PL_regnpar;
2569
5fcd1c1b 2570 Zero(&pm, 1, PMOP);
cea2e8a9 2571 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2572 if (!(SvFLAGS(ret)
0f5d15d6 2573 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2574 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2575 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2576 PL_regprecomp = oprecomp;
2577 PL_regsize = osize;
2578 PL_regnpar = onpar;
2579 }
2580 DEBUG_r(
9041c2e3 2581 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2582 "Entering embedded `%s%.60s%s%s'\n",
2583 PL_colors[0],
2584 re->precomp,
2585 PL_colors[1],
2586 (strlen(re->precomp) > 60 ? "..." : ""))
2587 );
2588 state.node = next;
2589 state.prev = PL_reg_call_cc;
2590 state.cc = PL_regcc;
2591 state.re = PL_reg_re;
2592
2ab05381 2593 PL_regcc = 0;
9041c2e3 2594
0f5d15d6 2595 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2596 REGCP_SET(lastcp);
0f5d15d6
IZ
2597 cache_re(re);
2598 state.ss = PL_savestack_ix;
2599 *PL_reglastparen = 0;
a01268b5 2600 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2601 PL_reg_call_cc = &state;
2602 PL_reginput = locinput;
2c2d71f5
JH
2603
2604 /* XXXX This is too dramatic a measure... */
2605 PL_reg_maxiter = 0;
2606
0f5d15d6 2607 if (regmatch(re->program + 1)) {
2c914db6
IZ
2608 /* Even though we succeeded, we need to restore
2609 global variables, since we may be wrapped inside
2610 SUSPEND, thus the match may be not finished yet. */
2611
2612 /* XXXX Do this only if SUSPENDed? */
2613 PL_reg_call_cc = state.prev;
2614 PL_regcc = state.cc;
2615 PL_reg_re = state.re;
2616 cache_re(PL_reg_re);
2617
2618 /* XXXX This is too dramatic a measure... */
2619 PL_reg_maxiter = 0;
2620
2621 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2622 ReREFCNT_dec(re);
2623 regcpblow(cp);
2624 sayYES;
2625 }
0f5d15d6 2626 ReREFCNT_dec(re);
02db2b7b 2627 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2628 regcppop();
2629 PL_reg_call_cc = state.prev;
2630 PL_regcc = state.cc;
2631 PL_reg_re = state.re;
d3790889 2632 cache_re(PL_reg_re);
2c2d71f5
JH
2633
2634 /* XXXX This is too dramatic a measure... */
2635 PL_reg_maxiter = 0;
2636
8e514ae6 2637 logical = 0;
0f5d15d6
IZ
2638 sayNO;
2639 }
c277df42 2640 sw = SvTRUE(ret);
0f5d15d6 2641 logical = 0;
a0ed51b3
LW
2642 }
2643 else
3280af22 2644 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2645 break;
2646 }
a0d0e21e 2647 case OPEN:
c277df42 2648 n = ARG(scan); /* which paren pair */
3280af22
NIS
2649 PL_reg_start_tmp[n] = locinput;
2650 if (n > PL_regsize)
2651 PL_regsize = n;
a0d0e21e
LW
2652 break;
2653 case CLOSE:
c277df42 2654 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2655 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2656 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2657 if (n > *PL_reglastparen)
2658 *PL_reglastparen = n;
a01268b5 2659 *PL_reglastcloseparen = n;
a0d0e21e 2660 break;
c277df42
IZ
2661 case GROUPP:
2662 n = ARG(scan); /* which paren pair */
cf93c79d 2663 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2664 break;
2665 case IFTHEN:
2c2d71f5 2666 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2667 if (sw)
2668 next = NEXTOPER(NEXTOPER(scan));
2669 else {
2670 next = scan + ARG(scan);
2671 if (OP(next) == IFTHEN) /* Fake one. */
2672 next = NEXTOPER(NEXTOPER(next));
2673 }
2674 break;
2675 case LOGICAL:
0f5d15d6 2676 logical = scan->flags;
c277df42 2677 break;
2ab05381
IZ
2678/*******************************************************************
2679 PL_regcc contains infoblock about the innermost (...)* loop, and
2680 a pointer to the next outer infoblock.
2681
2682 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2683
2684 1) After matching X, regnode for CURLYX is processed;
2685
9041c2e3 2686 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2687 regmatch() recursively with the starting point at WHILEM node;
2688
2689 3) Each hit of WHILEM node tries to match A and Z (in the order
2690 depending on the current iteration, min/max of {min,max} and
2691 greediness). The information about where are nodes for "A"
2692 and "Z" is read from the infoblock, as is info on how many times "A"
2693 was already matched, and greediness.
2694
2695 4) After A matches, the same WHILEM node is hit again.
2696
2697 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2698 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2699 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2700 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2701 of the external loop.
2702
2703 Currently present infoblocks form a tree with a stem formed by PL_curcc
2704 and whatever it mentions via ->next, and additional attached trees
2705 corresponding to temporarily unset infoblocks as in "5" above.
2706
9041c2e3 2707 In the following picture infoblocks for outer loop of
2ab05381
IZ
2708 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2709 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2710 infoblocks are drawn below the "reset" infoblock.
2711
2712 In fact in the picture below we do not show failed matches for Z and T
2713 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2714 more obvious *why* one needs to *temporary* unset infoblocks.]
2715
2716 Matched REx position InfoBlocks Comment
2717 (Y(A)*?Z)*?T x
2718 Y(A)*?Z)*?T x <- O
2719 Y (A)*?Z)*?T x <- O
2720 Y A)*?Z)*?T x <- O <- I
2721 YA )*?Z)*?T x <- O <- I
2722 YA A)*?Z)*?T x <- O <- I
2723 YAA )*?Z)*?T x <- O <- I
2724 YAA Z)*?T x <- O # Temporary unset I
2725 I
2726
2727 YAAZ Y(A)*?Z)*?T x <- O
2728 I
2729
2730 YAAZY (A)*?Z)*?T x <- O
2731 I
2732
2733 YAAZY A)*?Z)*?T x <- O <- I
2734 I
2735
2736 YAAZYA )*?Z)*?T x <- O <- I
2737 I
2738
2739 YAAZYA Z)*?T x <- O # Temporary unset I
2740 I,I
2741
2742 YAAZYAZ )*?T x <- O
2743 I,I
2744
2745 YAAZYAZ T x # Temporary unset O
2746 O
2747 I,I
2748
2749 YAAZYAZT x
2750 O
2751 I,I
2752 *******************************************************************/
a0d0e21e
LW
2753 case CURLYX: {
2754 CURCUR cc;
3280af22 2755 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2756 /* No need to save/restore up to this paren */
2757 I32 parenfloor = scan->flags;
c277df42
IZ
2758
2759 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2760 next += ARG(next);
3280af22
NIS
2761 cc.oldcc = PL_regcc;
2762 PL_regcc = &cc;
cb434fcc
IZ
2763 /* XXXX Probably it is better to teach regpush to support
2764 parenfloor > PL_regsize... */
2765 if (parenfloor > *PL_reglastparen)
2766 parenfloor = *PL_reglastparen; /* Pessimization... */
2767 cc.parenfloor = parenfloor;
a0d0e21e
LW
2768 cc.cur = -1;
2769 cc.min = ARG1(scan);
2770 cc.max = ARG2(scan);
c277df42 2771 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2772 cc.next = next;
2773 cc.minmod = minmod;
2774 cc.lastloc = 0;
3280af22 2775 PL_reginput = locinput;
a0d0e21e
LW
2776 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2777 regcpblow(cp);
3280af22 2778 PL_regcc = cc.oldcc;
4633a7c4 2779 saySAME(n);
a0d0e21e
LW
2780 }
2781 /* NOT REACHED */
2782 case WHILEM: {
2783 /*
2784 * This is really hard to understand, because after we match
2785 * what we're trying to match, we must make sure the rest of
2c2d71f5 2786 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2787 * to go back UP the parse tree by recursing ever deeper. And
2788 * if it fails, we have to reset our parent's current state
2789 * that we can try again after backing off.
2790 */
2791
c277df42 2792 CHECKPOINT cp, lastcp;
3280af22 2793 CURCUR* cc = PL_regcc;
c277df42
IZ
2794 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2795
4633a7c4 2796 n = cc->cur + 1; /* how many we know we matched */
3280af22 2797 PL_reginput = locinput;
a0d0e21e 2798
c277df42 2799 DEBUG_r(
9041c2e3
NIS
2800 PerlIO_printf(Perl_debug_log,
2801 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2802 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2803 (long)n, (long)cc->min,
c277df42
IZ
2804 (long)cc->max, (long)cc)
2805 );
4633a7c4 2806
a0d0e21e
LW
2807 /* If degenerate scan matches "", assume scan done. */
2808
579cf2c3 2809 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2810 PL_regcc = cc->oldcc;
2ab05381
IZ
2811 if (PL_regcc)
2812 ln = PL_regcc->cur;
c277df42 2813 DEBUG_r(
c3464db5
DD
2814 PerlIO_printf(Perl_debug_log,
2815 "%*s empty match detected, try continuation...\n",
3280af22 2816 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2817 );
a0d0e21e 2818 if (regmatch(cc->next))
4633a7c4 2819 sayYES;
2ab05381
IZ
2820 if (PL_regcc)
2821 PL_regcc->cur = ln;
3280af22 2822 PL_regcc = cc;
4633a7c4 2823 sayNO;
a0d0e21e
LW
2824 }
2825
2826 /* First just match a string of min scans. */
2827
2828 if (n < cc->min) {
2829 cc->cur = n;
2830 cc->lastloc = locinput;
4633a7c4
LW
2831 if (regmatch(cc->scan))
2832 sayYES;
2833 cc->cur = n - 1;
c277df42 2834 cc->lastloc = lastloc;
4633a7c4 2835 sayNO;
a0d0e21e
LW
2836 }
2837
2c2d71f5
JH
2838 if (scan->flags) {
2839 /* Check whether we already were at this position.
2840 Postpone detection until we know the match is not
2841 *that* much linear. */
2842 if (!PL_reg_maxiter) {
2843 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2844 PL_reg_leftiter = PL_reg_maxiter;
2845 }
2846 if (PL_reg_leftiter-- == 0) {
2847 I32 size = (PL_reg_maxiter + 7)/8;
2848 if (PL_reg_poscache) {
2849 if (PL_reg_poscache_size < size) {
2850 Renew(PL_reg_poscache, size, char);
2851 PL_reg_poscache_size = size;
2852 }
2853 Zero(PL_reg_poscache, size, char);
2854 }
2855 else {
2856 PL_reg_poscache_size = size;
2857 Newz(29, PL_reg_poscache, size, char);
2858 }
2859 DEBUG_r(
2860 PerlIO_printf(Perl_debug_log,
2861 "%sDetected a super-linear match, switching on caching%s...\n",
2862 PL_colors[4], PL_colors[5])
2863 );
2864 }
2865 if (PL_reg_leftiter < 0) {
2866 I32 o = locinput - PL_bostr, b;
2867
2868 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2869 b = o % 8;
2870 o /= 8;
2871 if (PL_reg_poscache[o] & (1<<b)) {
2872 DEBUG_r(
2873 PerlIO_printf(Perl_debug_log,
2874 "%*s already tried at this position...\n",
2875 REPORT_CODE_OFF+PL_regindent*2, "")
2876 );
7821416a 2877 sayNO_SILENT;
2c2d71f5
JH
2878 }
2879 PL_reg_poscache[o] |= (1<<b);
2880 }
2881 }
2882
a0d0e21e
LW
2883 /* Prefer next over scan for minimal matching. */
2884
2885 if (cc->minmod) {
3280af22 2886 PL_regcc = cc->oldcc;
2ab05381
IZ
2887 if (PL_regcc)
2888 ln = PL_regcc->cur;
5f05dabc 2889 cp = regcppush(cc->parenfloor);
02db2b7b 2890 REGCP_SET(lastcp);
5f05dabc 2891 if (regmatch(cc->next)) {
c277df42 2892 regcpblow(cp);
4633a7c4 2893 sayYES; /* All done. */
5f05dabc 2894 }
02db2b7b 2895 REGCP_UNWIND(lastcp);
5f05dabc 2896 regcppop();
2ab05381
IZ
2897 if (PL_regcc)
2898 PL_regcc->cur = ln;
3280af22 2899 PL_regcc = cc;
a0d0e21e 2900
c277df42 2901 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2902 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2903 && !(PL_reg_flags & RF_warned)) {
2904 PL_reg_flags |= RF_warned;
e476b1b5 2905 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2906 "Complex regular subexpression recursion",
2907 REG_INFTY - 1);
c277df42 2908 }
4633a7c4 2909 sayNO;
c277df42 2910 }
a687059c 2911
c277df42 2912 DEBUG_r(
c3464db5
DD
2913 PerlIO_printf(Perl_debug_log,
2914 "%*s trying longer...\n",
3280af22 2915 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2916 );
a0d0e21e 2917 /* Try scanning more and see if it helps. */
3280af22 2918 PL_reginput = locinput;
a0d0e21e
LW
2919 cc->cur = n;
2920 cc->lastloc = locinput;
5f05dabc 2921 cp = regcppush(cc->parenfloor);
02db2b7b 2922 REGCP_SET(lastcp);
5f05dabc 2923 if (regmatch(cc->scan)) {
c277df42 2924 regcpblow(cp);
4633a7c4 2925 sayYES;
5f05dabc 2926 }
02db2b7b 2927 REGCP_UNWIND(lastcp);
5f05dabc 2928 regcppop();
4633a7c4 2929 cc->cur = n - 1;
c277df42 2930 cc->lastloc = lastloc;
4633a7c4 2931 sayNO;
a0d0e21e
LW
2932 }
2933
2934 /* Prefer scan over next for maximal matching. */
2935
2936 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2937 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2938 cc->cur = n;
2939 cc->lastloc = locinput;
02db2b7b 2940 REGCP_SET(lastcp);
5f05dabc 2941 if (regmatch(cc->scan)) {
c277df42 2942 regcpblow(cp);
4633a7c4 2943 sayYES;
5f05dabc 2944 }
02db2b7b 2945 REGCP_UNWIND(lastcp);
a0d0e21e 2946 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2947 PL_reginput = locinput;
c277df42 2948 DEBUG_r(
c3464db5
DD
2949 PerlIO_printf(Perl_debug_log,
2950 "%*s failed, try continuation...\n",
3280af22 2951 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2952 );
2953 }
9041c2e3 2954 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2955 && !(PL_reg_flags & RF_warned)) {
3280af22 2956 PL_reg_flags |= RF_warned;
e476b1b5 2957 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2958 "Complex regular subexpression recursion",
2959 REG_INFTY - 1);
a0d0e21e
LW
2960 }
2961
2962 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2963 PL_regcc = cc->oldcc;
2ab05381
IZ
2964 if (PL_regcc)
2965 ln = PL_regcc->cur;
a0d0e21e 2966 if (regmatch(cc->next))
4633a7c4 2967 sayYES;
2ab05381
IZ
2968 if (PL_regcc)
2969 PL_regcc->cur = ln;
3280af22 2970 PL_regcc = cc;
4633a7c4 2971 cc->cur = n - 1;
c277df42 2972 cc->lastloc = lastloc;
4633a7c4 2973 sayNO;
a0d0e21e
LW
2974 }
2975 /* NOT REACHED */
9041c2e3 2976 case BRANCHJ:
c277df42
IZ
2977 next = scan + ARG(scan);
2978 if (next == scan)
2979 next = NULL;
2980 inner = NEXTOPER(NEXTOPER(scan));
2981 goto do_branch;
9041c2e3 2982 case BRANCH:
c277df42
IZ
2983 inner = NEXTOPER(scan);
2984 do_branch:
2985 {
c277df42
IZ
2986 c1 = OP(scan);
2987 if (OP(next) != c1) /* No choice. */
2988 next = inner; /* Avoid recursion. */
a0d0e21e 2989 else {
02db2b7b
IZ
2990 I32 lastparen = *PL_reglastparen;
2991 I32 unwind1;
2992 re_unwind_branch_t *uw;
2993
2994 /* Put unwinding data on stack */
2995 unwind1 = SSNEWt(1,re_unwind_branch_t);
2996 uw = SSPTRt(unwind1,re_unwind_branch_t);
2997 uw->prev = unwind;
2998 unwind = unwind1;
2999 uw->type = ((c1 == BRANCH)
3000 ? RE_UNWIND_BRANCH
3001 : RE_UNWIND_BRANCHJ);
3002 uw->lastparen = lastparen;
3003 uw->next = next;
3004 uw->locinput = locinput;
3005 uw->nextchr = nextchr;
3006#ifdef DEBUGGING
3007 uw->regindent = ++PL_regindent;
3008#endif
c277df42 3009
02db2b7b
IZ
3010 REGCP_SET(uw->lastcp);
3011
3012 /* Now go into the first branch */
3013 next = inner;
a687059c 3014 }
a0d0e21e
LW
3015 }
3016 break;
3017 case MINMOD:
3018 minmod = 1;
3019 break;
c277df42
IZ
3020 case CURLYM:
3021 {
00db4c45 3022 I32 l = 0;
c277df42 3023 CHECKPOINT lastcp;
9041c2e3 3024
c277df42
IZ
3025 /* We suppose that the next guy does not need
3026 backtracking: in particular, it is of constant length,
3027 and has no parenths to influence future backrefs. */
3028 ln = ARG1(scan); /* min to match */
3029 n = ARG2(scan); /* max to match */
c277df42
IZ
3030 paren = scan->flags;
3031 if (paren) {
3280af22
NIS
3032 if (paren > PL_regsize)
3033 PL_regsize = paren;
3034 if (paren > *PL_reglastparen)
3035 *PL_reglastparen = paren;
c277df42 3036 }
dc45a647 3037 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3038 if (paren)
3039 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3040 PL_reginput = locinput;
c277df42
IZ
3041 if (minmod) {
3042 minmod = 0;
3043 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3044 sayNO;
f31a99c8
HS
3045 /* if we matched something zero-length we don't need to
3046 backtrack - capturing parens are already defined, so
3047 the caveat in the maximal case doesn't apply
3048
3049 XXXX if ln == 0, we can redo this check first time
3050 through the following loop
3051 */
3052 if (ln && l == 0)
3053 n = ln; /* don't backtrack */
3280af22 3054 locinput = PL_reginput;
e2d8ce26 3055 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3056 regnode *text_node = next;
3057
3058 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3059 NEXT_IMPT(text_node);
5f80c4cf
JP
3060
3061 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3062 c1 = c2 = -1000;
3063 }
3064 else {
3065 c1 = (U8)*STRING(text_node);
3066 if (OP(next) == EXACTF)
3067 c2 = PL_fold[c1];
3068 else if (OP(text_node) == EXACTFL)
3069 c2 = PL_fold_locale[c1];
3070 else
3071 c2 = c1;
3072 }
a0ed51b3
LW
3073 }
3074 else
c277df42 3075 c1 = c2 = -1000;
02db2b7b 3076 REGCP_SET(lastcp);
5f4b28b2 3077 /* This may be improved if l == 0. */
c277df42
IZ
3078 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3079 /* If it could work, try it. */
3080 if (c1 == -1000 ||
3280af22
NIS
3081 UCHARAT(PL_reginput) == c1 ||
3082 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3083 {
3084 if (paren) {
f31a99c8 3085 if (ln) {
cf93c79d
IZ
3086 PL_regstartp[paren] =
3087 HOPc(PL_reginput, -l) - PL_bostr;
3088 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3089 }
3090 else
cf93c79d 3091 PL_regendp[paren] = -1;
c277df42
IZ
3092 }
3093 if (regmatch(next))
3094 sayYES;
02db2b7b 3095 REGCP_UNWIND(lastcp);
c277df42
IZ
3096 }
3097 /* Couldn't or didn't -- move forward. */
3280af22 3098 PL_reginput = locinput;
c277df42
IZ
3099 if (regrepeat_hard(scan, 1, &l)) {
3100 ln++;
3280af22 3101 locinput = PL_reginput;
c277df42
IZ
3102 }
3103 else
3104 sayNO;
3105 }
a0ed51b3
LW
3106 }
3107 else {
c277df42 3108 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3109 /* if we matched something zero-length we don't need to
3110 backtrack, unless the minimum count is zero and we
3111 are capturing the result - in that case the capture
3112 being defined or not may affect later execution
3113 */
3114 if (n != 0 && l == 0 && !(paren && ln == 0))
3115 ln = n; /* don't backtrack */
3280af22 3116 locinput = PL_reginput;
c277df42 3117 DEBUG_r(
5c0ca799 3118 PerlIO_printf(Perl_debug_log,
faccc32b 3119 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3120 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3121 (IV) n, (IV)l)
c277df42
IZ
3122 );
3123 if (n >= ln) {
e2d8ce26 3124 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3125 regnode *text_node = next;
3126
3127 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3128 NEXT_IMPT(text_node);
5f80c4cf
JP
3129
3130 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3131 c1 = c2 = -1000;
3132 }
3133 else {
3134 c1 = (U8)*STRING(text_node);
3135 if (OP(text_node) == EXACTF)
3136 c2 = PL_fold[c1];
3137 else if (OP(text_node) == EXACTFL)
3138 c2 = PL_fold_locale[c1];
3139 else
3140 c2 = c1;
3141 }
a0ed51b3
LW
3142 }
3143 else
c277df42
IZ
3144 c1 = c2 = -1000;
3145 }
02db2b7b 3146 REGCP_SET(lastcp);
c277df42
IZ
3147 while (n >= ln) {
3148 /* If it could work, try it. */
3149 if (c1 == -1000 ||
3280af22
NIS
3150 UCHARAT(PL_reginput) == c1 ||
3151 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3152 {
3153 DEBUG_r(
c3464db5 3154 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3155 "%*s trying tail with n=%"IVdf"...\n",
3156 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3157 );
3158 if (paren) {
3159 if (n) {
cf93c79d
IZ
3160 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3161 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3162 }
a0ed51b3 3163 else
cf93c79d 3164 PL_regendp[paren] = -1;
c277df42 3165 }
a0ed51b3
LW
3166 if (regmatch(next))
3167 sayYES;
02db2b7b 3168 REGCP_UNWIND(lastcp);
a0ed51b3 3169 }
c277df42
IZ
3170 /* Couldn't or didn't -- back up. */
3171 n--;
dfe13c55 3172 locinput = HOPc(locinput, -l);
3280af22 3173 PL_reginput = locinput;
c277df42
IZ
3174 }
3175 }
3176 sayNO;
3177 break;
3178 }
3179 case CURLYN:
3180 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3181 if (paren > PL_regsize)
3182 PL_regsize = paren;
3183 if (paren > *PL_reglastparen)
3184 *PL_reglastparen = paren;
c277df42
IZ
3185 ln = ARG1(scan); /* min to match */
3186 n = ARG2(scan); /* max to match */
dc45a647 3187 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3188 goto repeat;
a0d0e21e 3189 case CURLY:
c277df42 3190 paren = 0;
a0d0e21e
LW
3191 ln = ARG1(scan); /* min to match */
3192 n = ARG2(scan); /* max to match */
dc45a647 3193 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3194 goto repeat;
3195 case STAR:
3196 ln = 0;
c277df42 3197 n = REG_INFTY;
a0d0e21e 3198 scan = NEXTOPER(scan);
c277df42 3199 paren = 0;
a0d0e21e
LW
3200 goto repeat;
3201 case PLUS:
c277df42
IZ
3202 ln = 1;
3203 n = REG_INFTY;
3204 scan = NEXTOPER(scan);
3205 paren = 0;
3206 repeat:
a0d0e21e
LW
3207 /*
3208 * Lookahead to avoid useless match attempts
3209 * when we know what character comes next.
3210 */
5f80c4cf
JP
3211
3212 /*
3213 * Used to only do .*x and .*?x, but now it allows
3214 * for )'s, ('s and (?{ ... })'s to be in the way
3215 * of the quantifier and the EXACT-like node. -- japhy
3216 */
3217
e2d8ce26 3218 if (NEAR_EXACT(next)) {
5f80c4cf
JP
3219 U8 *s;
3220 regnode *text_node = next;
3221
3222 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3223 NEXT_IMPT(text_node);
5f80c4cf
JP
3224
3225 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3226 c1 = c2 = -1000;
3227 }
3228 else {
3229 s = (U8*)STRING(text_node);
3230
3231 if (!UTF) {
3232 c2 = c1 = *s;
3233 if (OP(text_node) == EXACTF)
3234 c2 = PL_fold[c1];
3235 else if (OP(text_node) == EXACTFL)
3236 c2 = PL_fold_locale[c1];
1aa99e6b 3237 }
5f80c4cf
JP
3238 else { /* UTF */
3239 if (OP(text_node) == EXACTF) {
3240 c1 = to_utf8_lower(s);
3241 c2 = to_utf8_upper(s);
3242 }
3243 else {
3244 c2 = c1 = utf8_to_uvchr(s, NULL);
3245 }
1aa99e6b
IH
3246 }
3247 }
bbce6d69 3248 }
a0d0e21e 3249 else
bbce6d69 3250 c1 = c2 = -1000;
3280af22 3251 PL_reginput = locinput;
a0d0e21e 3252 if (minmod) {
c277df42 3253 CHECKPOINT lastcp;
a0d0e21e
LW
3254 minmod = 0;
3255 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3256 sayNO;
a0ed51b3 3257 locinput = PL_reginput;
02db2b7b 3258 REGCP_SET(lastcp);
0fe9bf95 3259 if (c1 != -1000) {
1aa99e6b 3260 char *e; /* Should not check after this */
0fe9bf95
IZ
3261 char *old = locinput;
3262
1aa99e6b 3263 if (n == REG_INFTY) {
0fe9bf95 3264 e = PL_regeol - 1;
1aa99e6b
IH
3265 if (do_utf8)
3266 while (UTF8_IS_CONTINUATION(*(U8*)e))
3267 e--;
3268 }
3269 else if (do_utf8) {
3270 int m = n - ln;
3271 for (e = locinput;
3272 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3273 e += UTF8SKIP(e);
3274 }
3275 else {
3276 e = locinput + n - ln;
3277 if (e >= PL_regeol)
3278 e = PL_regeol - 1;
3279 }
0fe9bf95 3280 while (1) {
1aa99e6b 3281 int count;
0fe9bf95 3282 /* Find place 'next' could work */
1aa99e6b
IH
3283 if (!do_utf8) {
3284 if (c1 == c2) {
a8e8ab15
JH
3285 while (locinput <= e &&
3286 UCHARAT(locinput) != c1)
1aa99e6b
IH
3287 locinput++;
3288 } else {
9041c2e3 3289 while (locinput <= e
a8e8ab15
JH
3290 && UCHARAT(locinput) != c1
3291 && UCHARAT(locinput) != c2)
1aa99e6b
IH
3292 locinput++;
3293 }
3294 count = locinput - old;
3295 }
3296 else {
3297 STRLEN len;
3298 if (c1 == c2) {
3299 for (count = 0;
3300 locinput <= e &&
9041c2e3 3301 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3302 count++)
3303 locinput += len;
3304
3305 } else {
3306 for (count = 0; locinput <= e; count++) {
9041c2e3 3307 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3308 if (c == c1 || c == c2)
3309 break;
9041c2e3 3310 locinput += len;
1aa99e6b
IH
3311 }
3312 }
0fe9bf95 3313 }
9041c2e3 3314 if (locinput > e)
0fe9bf95
IZ
3315 sayNO;
3316 /* PL_reginput == old now */
3317 if (locinput != old) {
3318 ln = 1; /* Did some */
1aa99e6b 3319 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3320 sayNO;
3321 }
3322 /* PL_reginput == locinput now */
29d1e993 3323 TRYPAREN(paren, ln, locinput);
0fe9bf95 3324 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3325 REGCP_UNWIND(lastcp);
0fe9bf95 3326 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3327 old = locinput;
3328 if (do_utf8)
3329 locinput += UTF8SKIP(locinput);
3330 else
3331 locinput++;
0fe9bf95
IZ
3332 }
3333 }
3334 else
c277df42 3335 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3336 UV c;
3337 if (c1 != -1000) {
3338 if (do_utf8)
9041c2e3 3339 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3340 else
9041c2e3 3341 c = UCHARAT(PL_reginput);
2390ecbc
PP
3342 /* If it could work, try it. */
3343 if (c == c1 || c == c2)
3344 {
3345 TRYPAREN(paren, n, PL_reginput);
3346 REGCP_UNWIND(lastcp);
3347 }
1aa99e6b 3348 }
a0d0e21e 3349 /* If it could work, try it. */
2390ecbc 3350 else if (c1 == -1000)
bbce6d69 3351 {
29d1e993 3352 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3353 REGCP_UNWIND(lastcp);
bbce6d69 3354 }
c277df42 3355 /* Couldn't or didn't -- move forward. */
a0ed51b3 3356 PL_reginput = locinput;
a0d0e21e
LW
3357 if (regrepeat(scan, 1)) {
3358 ln++;
a0ed51b3
LW
3359 locinput = PL_reginput;
3360 }
3361 else
4633a7c4 3362 sayNO;
a0d0e21e
LW
3363 }
3364 }
3365 else {
c277df42 3366 CHECKPOINT lastcp;
a0d0e21e 3367 n = regrepeat(scan, n);
a0ed51b3 3368 locinput = PL_reginput;
22c35a8c 3369 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3370 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3371 ln = n; /* why back off? */
1aeab75a
GS
3372 /* ...because $ and \Z can match before *and* after
3373 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3374 We should back off by one in this case. */
3375 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3376 ln--;
3377 }
02db2b7b 3378 REGCP_SET(lastcp);
c277df42 3379 if (paren) {
8fa7f367 3380 UV c = 0;
c277df42 3381 while (n >= ln) {
1aa99e6b
IH
3382 if (c1 != -1000) {
3383 if (do_utf8)
9041c2e3 3384 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3385 else
9041c2e3 3386 c = UCHARAT(PL_reginput);
1aa99e6b 3387 }
c277df42 3388 /* If it could work, try it. */
1aa99e6b 3389 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3390 {
29d1e993 3391 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3392 REGCP_UNWIND(lastcp);
c277df42
IZ
3393 }
3394 /* Couldn't or didn't -- back up. */
3395 n--;
dfe13c55 3396 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3397 }
a0ed51b3
LW
3398 }
3399 else {
8fa7f367 3400 UV c = 0;
c277df42 3401 while (n >= ln) {
1aa99e6b
IH
3402 if (c1 != -1000) {
3403 if (do_utf8)
9041c2e3 3404 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3405 else
9041c2e3 3406 c = UCHARAT(PL_reginput);
1aa99e6b 3407 }
c277df42 3408 /* If it could work, try it. */
1aa99e6b 3409 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3410 {
29d1e993 3411 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3412 REGCP_UNWIND(lastcp);
c277df42
IZ
3413 }
3414 /* Couldn't or didn't -- back up. */
3415 n--;
dfe13c55 3416 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3417 }
a0d0e21e
LW
3418 }
3419 }
4633a7c4 3420 sayNO;
c277df42 3421 break;
a0d0e21e 3422 case END:
0f5d15d6
IZ
3423 if (PL_reg_call_cc) {
3424 re_cc_state *cur_call_cc = PL_reg_call_cc;
3425 CURCUR *cctmp = PL_regcc;
3426 regexp *re = PL_reg_re;
3427 CHECKPOINT cp, lastcp;
3428
3429 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3430 REGCP_SET(lastcp);
0f5d15d6
IZ
3431 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3432 the caller. */
3433 PL_reginput = locinput; /* Make position available to
3434 the callcc. */
3435 cache_re(PL_reg_call_cc->re);
3436 PL_regcc = PL_reg_call_cc->cc;
3437 PL_reg_call_cc = PL_reg_call_cc->prev;
3438 if (regmatch(cur_call_cc->node)) {
3439 PL_reg_call_cc = cur_call_cc;
3440 regcpblow(cp);
3441 sayYES;
3442 }
02db2b7b 3443 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3444 regcppop();
3445 PL_reg_call_cc = cur_call_cc;
3446 PL_regcc = cctmp;
3447 PL_reg_re = re;
3448 cache_re(re);
3449
3450 DEBUG_r(
3451 PerlIO_printf(Perl_debug_log,
3452 "%*s continuation failed...\n",
3453 REPORT_CODE_OFF+PL_regindent*2, "")
3454 );
7821416a 3455 sayNO_SILENT;
0f5d15d6 3456 }
7821416a
IZ
3457 if (locinput < PL_regtill) {
3458 DEBUG_r(PerlIO_printf(Perl_debug_log,
3459 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3460 PL_colors[4],
3461 (long)(locinput - PL_reg_starttry),
3462 (long)(PL_regtill - PL_reg_starttry),
3463 PL_colors[5]));
3464 sayNO_FINAL; /* Cannot match: too short. */
3465 }
3466 PL_reginput = locinput; /* put where regtry can find it */
3467 sayYES_FINAL; /* Success! */
7e5428c5 3468 case SUCCEED:
3280af22 3469 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3470 sayYES_LOUD; /* Success! */
c277df42
IZ
3471 case SUSPEND:
3472 n = 1;
9fe1d20c 3473 PL_reginput = locinput;
9041c2e3 3474 goto do_ifmatch;
a0d0e21e 3475 case UNLESSM:
c277df42 3476 n = 0;
a0ed51b3 3477 if (scan->flags) {
efb30f32
HS
3478 s = HOPBACKc(locinput, scan->flags);
3479 if (!s)
3480 goto say_yes;
3481 PL_reginput = s;
a0ed51b3
LW
3482 }
3483 else
3484 PL_reginput = locinput;
c277df42
IZ
3485 goto do_ifmatch;
3486 case IFMATCH:
3487 n = 1;
a0ed51b3 3488 if (scan->flags) {
efb30f32
HS
3489 s = HOPBACKc(locinput, scan->flags);
3490 if (!s)
3491 goto say_no;
3492 PL_reginput = s;
a0ed51b3
LW
3493 }
3494 else
3495 PL_reginput = locinput;
3496
c277df42 3497 do_ifmatch:
c277df42
IZ
3498 inner = NEXTOPER(NEXTOPER(scan));
3499 if (regmatch(inner) != n) {
3500 say_no:
3501 if (logical) {
3502 logical = 0;
3503 sw = 0;
3504 goto do_longjump;
a0ed51b3
LW
3505 }
3506 else
c277df42
IZ
3507 sayNO;
3508 }
3509 say_yes:
3510 if (logical) {
3511 logical = 0;
3512 sw = 1;
3513 }
fe44a5e8 3514 if (OP(scan) == SUSPEND) {
3280af22 3515 locinput = PL_reginput;
565764a8 3516 nextchr = UCHARAT(locinput);
fe44a5e8 3517 }
c277df42
IZ
3518 /* FALL THROUGH. */
3519 case LONGJMP:
3520 do_longjump:
3521 next = scan + ARG(scan);
3522 if (next == scan)
3523 next = NULL;
a0d0e21e
LW
3524 break;
3525 default:
b900a521 3526 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3527 PTR2UV(scan), OP(scan));
cea2e8a9 3528 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3529 }
02db2b7b 3530 reenter:
a0d0e21e
LW
3531 scan = next;
3532 }
a687059c 3533
a0d0e21e
LW
3534 /*
3535 * We get here only if there's trouble -- normally "case END" is
3536 * the terminating point.
3537 */
cea2e8a9 3538 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3539 /*NOTREACHED*/
4633a7c4
LW
3540 sayNO;
3541
7821416a
IZ
3542yes_loud:
3543 DEBUG_r(
3544 PerlIO_printf(Perl_debug_log,
3545 "%*s %scould match...%s\n",
3546 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3547 );
3548 goto yes;
3549yes_final:
3550 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3551 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3552yes:
3553#ifdef DEBUGGING
3280af22 3554 PL_regindent--;
4633a7c4 3555#endif
02db2b7b
IZ
3556
3557#if 0 /* Breaks $^R */
3558 if (unwind)
3559 regcpblow(firstcp);
3560#endif
4633a7c4
LW
3561 return 1;
3562
3563no:
7821416a
IZ
3564 DEBUG_r(
3565 PerlIO_printf(Perl_debug_log,
3566 "%*s %sfailed...%s\n",
3567 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3568 );
3569 goto do_no;
3570no_final:
3571do_no:
02db2b7b
IZ
3572 if (unwind) {
3573 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3574
3575 switch (uw->type) {
3576 case RE_UNWIND_BRANCH:
3577 case RE_UNWIND_BRANCHJ:
3578 {
3579 re_unwind_branch_t *uwb = &(uw->branch);
3580 I32 lastparen = uwb->lastparen;
9041c2e3 3581
02db2b7b
IZ
3582 REGCP_UNWIND(uwb->lastcp);
3583 for (n = *PL_reglastparen; n > lastparen; n--)
3584 PL_regendp[n] = -1;
3585 *PL_reglastparen = n;
3586 scan = next = uwb->next;
9041c2e3
NIS
3587 if ( !scan ||
3588 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3589 ? BRANCH : BRANCHJ) ) { /* Failure */
3590 unwind = uwb->prev;
3591#ifdef DEBUGGING
3592 PL_regindent--;
3593#endif
3594 goto do_no;
3595 }
3596 /* Have more choice yet. Reuse the same uwb. */
3597 /*SUPPRESS 560*/
3598 if ((n = (uwb->type == RE_UNWIND_BRANCH
3599 ? NEXT_OFF(next) : ARG(next))))
3600 next += n;
3601 else
3602 next = NULL; /* XXXX Needn't unwinding in this case... */
3603 uwb->next = next;
3604 next = NEXTOPER(scan);
3605 if (uwb->type == RE_UNWIND_BRANCHJ)
3606 next = NEXTOPER(next);
3607 locinput = uwb->locinput;
3608 nextchr = uwb->nextchr;
3609#ifdef DEBUGGING
3610 PL_regindent = uwb->regindent;
3611#endif
3612
3613 goto reenter;
3614 }
3615 /* NOT REACHED */
3616 default:
3617 Perl_croak(aTHX_ "regexp unwind memory corruption");
3618 }
3619 /* NOT REACHED */
3620 }
4633a7c4 3621#ifdef DEBUGGING
3280af22 3622 PL_regindent--;
4633a7c4 3623#endif
a0d0e21e 3624 return 0;
a687059c
LW
3625}
3626
3627/*
3628 - regrepeat - repeatedly match something simple, report how many
3629 */
3630/*
3631 * [This routine now assumes that it will only match on things of length 1.
3632 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3633 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3634 */
76e3520e 3635STATIC I32
cea2e8a9 3636S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3637{
a0d0e21e 3638 register char *scan;
a0d0e21e 3639 register I32 c;
3280af22 3640 register char *loceol = PL_regeol;
a0ed51b3 3641 register I32 hardcount = 0;
53c4c00c 3642 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3643
3280af22 3644 scan = PL_reginput;
c277df42 3645 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3646 loceol = scan + max;
a0d0e21e 3647 switch (OP(p)) {
22c35a8c 3648 case REG_ANY:
1aa99e6b 3649 if (do_utf8) {
ffc61ed2 3650 loceol = PL_regeol;
1aa99e6b 3651 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3652 scan += UTF8SKIP(scan);
3653 hardcount++;
3654 }
3655 } else {
3656 while (scan < loceol && *scan != '\n')
3657 scan++;
a0ed51b3
LW
3658 }
3659 break;
ffc61ed2 3660 case SANY:
3baa4c62 3661 scan = loceol;
a0ed51b3 3662 break;
f33976b4
DB
3663 case CANY:
3664 scan = loceol;
3665 break;
bbce6d69 3666 case EXACT: /* length of string is 1 */
cd439c50 3667 c = (U8)*STRING(p);
bbce6d69 3668 while (scan < loceol && UCHARAT(scan) == c)
3669 scan++;
3670 break;
3671 case EXACTF: /* length of string is 1 */
cd439c50 3672 c = (U8)*STRING(p);
bbce6d69 3673 while (scan < loceol &&
22c35a8c 3674 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3675 scan++;
3676 break;
3677 case EXACTFL: /* length of string is 1 */
3280af22 3678 PL_reg_flags |= RF_tainted;
cd439c50 3679 c = (U8)*STRING(p);
bbce6d69 3680 while (scan < loceol &&
22c35a8c 3681 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3682 scan++;
3683 break;
3684 case ANYOF:
ffc61ed2
JH
3685 if (do_utf8) {
3686 loceol = PL_regeol;
1aa99e6b
IH
3687 while (hardcount < max && scan < loceol &&
3688 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3689 scan += UTF8SKIP(scan);
3690 hardcount++;
3691 }
3692 } else {
3693 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3694 scan++;
3695 }
a0d0e21e
LW
3696 break;
3697 case ALNUM:
1aa99e6b 3698 if (do_utf8) {
ffc61ed2 3699 loceol = PL_regeol;
8269fa76 3700 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3701 while (hardcount < max && scan < loceol &&
3568d838 3702 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3703 scan += UTF8SKIP(scan);
3704 hardcount++;
3705 }
3706 } else {
3707 while (scan < loceol && isALNUM(*scan))
3708 scan++;
a0ed51b3
LW
3709 }
3710 break;
bbce6d69 3711 case ALNUML:
3280af22 3712 PL_reg_flags |= RF_tainted;
1aa99e6b 3713 if (do_utf8) {
ffc61ed2 3714 loceol = PL_regeol;
1aa99e6b
IH
3715 while (hardcount < max && scan < loceol &&
3716 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3717 scan += UTF8SKIP(scan);
3718 hardcount++;
3719 }
3720 } else {
3721 while (scan < loceol && isALNUM_LC(*scan))
3722 scan++;
a0ed51b3
LW
3723 }
3724 break;
a0d0e21e 3725 case NALNUM:
1aa99e6b 3726 if (do_utf8) {
ffc61ed2 3727 loceol = PL_regeol;
8269fa76 3728 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3729 while (hardcount < max && scan < loceol &&
3568d838 3730 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3731 scan += UTF8SKIP(scan);
3732 hardcount++;
3733 }
3734 } else {
3735 while (scan < loceol && !isALNUM(*scan))
3736 scan++;
a0ed51b3
LW
3737 }
3738 break;
bbce6d69 3739 case NALNUML:
3280af22 3740 PL_reg_flags |= RF_tainted;
1aa99e6b 3741 if (do_utf8) {
ffc61ed2 3742 loceol = PL_regeol;
1aa99e6b
IH
3743 while (hardcount < max && scan < loceol &&
3744 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3745 scan += UTF8SKIP(scan);
3746 hardcount++;
3747 }
3748 } else {
3749 while (scan < loceol && !isALNUM_LC(*scan))
3750 scan++;
a0ed51b3
LW
3751 }
3752 break;
a0d0e21e 3753 case SPACE:
1aa99e6b 3754 if (do_utf8) {
ffc61ed2 3755 loceol = PL_regeol;
8269fa76 3756 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3757 while (hardcount < max && scan < loceol &&
3568d838
JH
3758 (*scan == ' ' ||
3759 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3760 scan += UTF8SKIP(scan);
3761 hardcount++;
3762 }
3763 } else {
3764 while (scan < loceol && isSPACE(*scan))
3765 scan++;
a0ed51b3
LW
3766 }
3767 break;
bbce6d69 3768 case SPACEL:
3280af22 3769 PL_reg_flags |= RF_tainted;
1aa99e6b 3770 if (do_utf8) {
ffc61ed2 3771 loceol = PL_regeol;
1aa99e6b 3772 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3773 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3774 scan += UTF8SKIP(scan);
3775 hardcount++;
3776 }
3777 } else {
3778 while (scan < loceol && isSPACE_LC(*scan))
3779 scan++;
a0ed51b3
LW
3780 }
3781 break;
a0d0e21e 3782 case NSPACE:
1aa99e6b 3783 if (do_utf8) {
ffc61ed2 3784 loceol = PL_regeol;
8269fa76 3785 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3786 while (hardcount < max && scan < loceol &&
3568d838
JH
3787 !(*scan == ' ' ||
3788 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3789 scan += UTF8SKIP(scan);
3790 hardcount++;
3791 }
3792 } else {
3793 while (scan < loceol && !isSPACE(*scan))
3794 scan++;
3795 break;
a0ed51b3 3796 }
bbce6d69 3797 case NSPACEL:
3280af22 3798 PL_reg_flags |= RF_tainted;
1aa99e6b 3799 if (do_utf8) {
ffc61ed2 3800 loceol = PL_regeol;
1aa99e6b 3801 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3802 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3803 scan += UTF8SKIP(scan);
3804 hardcount++;
3805 }
3806 } else {
3807 while (scan < loceol && !isSPACE_LC(*scan))
3808 scan++;
a0ed51b3
LW
3809 }
3810 break;
a0d0e21e 3811 case DIGIT:
1aa99e6b 3812 if (do_utf8) {
ffc61ed2 3813 loceol = PL_regeol;
8269fa76 3814 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3815 while (hardcount < max && scan < loceol &&
3568d838 3816 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3817 scan += UTF8SKIP(scan);
3818 hardcount++;
3819 }
3820 } else {
3821 while (scan < loceol && isDIGIT(*scan))
3822 scan++;
a0ed51b3
LW
3823 }
3824 break;
a0d0e21e 3825 case NDIGIT:
1aa99e6b 3826 if (do_utf8) {
ffc61ed2 3827 loceol = PL_regeol;
8269fa76 3828 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3829 while (hardcount < max && scan < loceol &&
3568d838 3830 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3831 scan += UTF8SKIP(scan);
3832 hardcount++;
3833 }
3834 } else {
3835 while (scan < loceol && !isDIGIT(*scan))
3836 scan++;
a0ed51b3
LW
3837 }
3838 break;
a0d0e21e
LW
3839 default: /* Called on something of 0 width. */
3840 break; /* So match right here or not at all. */
3841 }
a687059c 3842
a0ed51b3
LW
3843 if (hardcount)
3844 c = hardcount;
3845 else
3846 c = scan - PL_reginput;
3280af22 3847 PL_reginput = scan;
a687059c 3848
9041c2e3 3849 DEBUG_r(
c277df42
IZ
3850 {
3851 SV *prop = sv_newmortal();
3852
3853 regprop(prop, p);
9041c2e3
NIS
3854 PerlIO_printf(Perl_debug_log,
3855 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3856 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3857 });
9041c2e3 3858
a0d0e21e 3859 return(c);
a687059c
LW
3860}
3861
3862/*
c277df42 3863 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3864 *
c277df42
IZ
3865 * The repeater is supposed to have constant length.
3866 */
3867
76e3520e 3868STATIC I32
cea2e8a9 3869S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3870{
b7953727 3871 register char *scan = Nullch;
c277df42 3872 register char *start;
3280af22 3873 register char *loceol = PL_regeol;
a0ed51b3 3874 I32 l = 0;
708e3b05 3875 I32 count = 0, res = 1;
a0ed51b3
LW
3876
3877 if (!max)
3878 return 0;
c277df42 3879
3280af22 3880 start = PL_reginput;
53c4c00c 3881 if (PL_reg_match_utf8) {
708e3b05 3882 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3883 if (!count++) {
3884 l = 0;
3885 while (start < PL_reginput) {
3886 l++;
3887 start += UTF8SKIP(start);
3888 }
3889 *lp = l;
3890 if (l == 0)
3891 return max;
3892 }
3893 if (count == max)
3894 return count;
3895 }
3896 }
3897 else {
708e3b05 3898 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3899 if (!count++) {
3900 *lp = l = PL_reginput - start;
3901 if (max != REG_INFTY && l*max < loceol - scan)
3902 loceol = scan + l*max;
3903 if (l == 0)
3904 return max;
c277df42
IZ
3905 }
3906 }
3907 }
708e3b05 3908 if (!res)
3280af22 3909 PL_reginput = scan;
9041c2e3 3910
a0ed51b3 3911 return count;
c277df42
IZ
3912}
3913
3914/*
ffc61ed2
JH
3915- regclass_swash - prepare the utf8 swash
3916*/
3917
3918SV *
3919Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3920{
3921 SV *sw = NULL;
3922 SV *si = NULL;
3923
3924 if (PL_regdata && PL_regdata->count) {
3925 U32 n = ARG(node);
3926
3927 if (PL_regdata->what[n] == 's') {
3928 SV *rv = (SV*)PL_regdata->data[n];
3929 AV *av = (AV*)SvRV((SV*)rv);
3930 SV **a;
9041c2e3 3931
ffc61ed2
JH
3932 si = *av_fetch(av, 0, FALSE);
3933 a = av_fetch(av, 1, FALSE);
9041c2e3 3934
ffc61ed2
JH
3935 if (a)
3936 sw = *a;
3937 else if (si && doinit) {
3938 sw = swash_init("utf8", "", si, 1, 0);
3939 (void)av_store(av, 1, sw);
3940 }
3941 }
3942 }
3943
3944 if (initsvp)
3945 *initsvp = si;
3946
3947 return sw;
3948}
3949
3950/*
cb8d8820 3951 - reginclass - determine if a character falls into a character class
bbce6d69 3952 */
3953
76e3520e 3954STATIC bool
ffc61ed2 3955S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3956{
ffc61ed2 3957 char flags = ANYOF_FLAGS(n);
bbce6d69 3958 bool match = FALSE;
1aa99e6b 3959 UV c;
3568d838 3960 STRLEN len = 0;
1aa99e6b 3961
3568d838 3962 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3963
ffc61ed2
JH
3964 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3965 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3966 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3967 match = TRUE;
bbce6d69 3968 }
3568d838 3969 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3970 match = TRUE;
ffc61ed2
JH
3971 if (!match) {
3972 SV *sw = regclass_swash(n, TRUE, 0);
3973
3974 if (sw) {
3568d838 3975 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3976 match = TRUE;
3977 else if (flags & ANYOF_FOLD) {
3978 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3979
ffc61ed2
JH
3980 if (flags & ANYOF_LOCALE) {
3981 PL_reg_flags |= RF_tainted;
9041c2e3 3982 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3983 }
1aa99e6b 3984 else
9041c2e3 3985 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3986 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3987 match = TRUE;
3988 }
3989 }
bbce6d69 3990 }
3991 }
1aa99e6b 3992 if (!match && c < 256) {
ffc61ed2
JH
3993 if (ANYOF_BITMAP_TEST(n, c))
3994 match = TRUE;
3995 else if (flags & ANYOF_FOLD) {
3568d838 3996 I32 f;
a0ed51b3 3997
ffc61ed2
JH
3998 if (flags & ANYOF_LOCALE) {
3999 PL_reg_flags |= RF_tainted;
4000 f = PL_fold_locale[c];
4001 }
4002 else
4003 f = PL_fold[c];
4004 if (f != c && ANYOF_BITMAP_TEST(n, f))
4005 match = TRUE;
4006 }
4007
4008 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4009 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4010 if (
4011 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4012 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4013 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4014 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4015 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4016 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4017 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4018 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4019 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4020 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4021 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4022 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4023 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4024 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4025 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4026 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4027 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4028 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4029 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4030 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4031 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4032 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4033 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4034 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4035 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4036 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4037 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4038 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4039 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4040 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4041 ) /* How's that for a conditional? */
4042 {
4043 match = TRUE;
4044 }
a0ed51b3 4045 }
a0ed51b3
LW
4046 }
4047
a0ed51b3
LW
4048 return (flags & ANYOF_INVERT) ? !match : match;
4049}
161b471a 4050
dfe13c55 4051STATIC U8 *
cea2e8a9 4052S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4053{
1aa99e6b
IH
4054 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4055}
4056
4057STATIC U8 *
4058S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4059{
a0ed51b3 4060 if (off >= 0) {
1aa99e6b 4061 while (off-- && s < lim) {
ffc61ed2 4062 /* XXX could check well-formedness here */
a0ed51b3 4063 s += UTF8SKIP(s);
ffc61ed2 4064 }
a0ed51b3
LW
4065 }
4066 else {
4067 while (off++) {
1aa99e6b 4068 if (s > lim) {
a0ed51b3 4069 s--;
ffc61ed2 4070 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4071 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4072 s--;
ffc61ed2
JH
4073 }
4074 /* XXX could check well-formedness here */
a0ed51b3
LW
4075 }
4076 }
4077 }
4078 return s;
4079}
161b471a 4080
dfe13c55 4081STATIC U8 *
1aa99e6b 4082S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4083{
1aa99e6b
IH
4084 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4085}
4086
4087STATIC U8 *
4088S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4089{
4090 if (off >= 0) {
1aa99e6b 4091 while (off-- && s < lim) {
ffc61ed2 4092 /* XXX could check well-formedness here */
a0ed51b3 4093 s += UTF8SKIP(s);
ffc61ed2 4094 }
a0ed51b3
LW
4095 if (off >= 0)
4096 return 0;
4097 }
4098 else {
4099 while (off++) {
1aa99e6b 4100 if (s > lim) {
a0ed51b3 4101 s--;
ffc61ed2 4102 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4103 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4104 s--;
ffc61ed2
JH
4105 }
4106 /* XXX could check well-formedness here */
a0ed51b3
LW
4107 }
4108 else
4109 break;
4110 }
4111 if (off <= 0)
4112 return 0;
4113 }
4114 return s;
4115}
51371543 4116
51371543 4117static void
acfe0abc 4118restore_pos(pTHX_ void *arg)
51371543 4119{
51371543
GS
4120 if (PL_reg_eval_set) {
4121 if (PL_reg_oldsaved) {
4122 PL_reg_re->subbeg = PL_reg_oldsaved;
4123 PL_reg_re->sublen = PL_reg_oldsavedlen;
4124 RX_MATCH_COPIED_on(PL_reg_re);
4125 }
4126 PL_reg_magic->mg_len = PL_reg_oldpos;
4127 PL_reg_eval_set = 0;
4128 PL_curpm = PL_reg_oldcurpm;
4129 }
4130}