This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
may be uninitialized warning forperl@12003
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
a687059c
LW
83#include "regcomp.h"
84
c277df42
IZ
85#define RF_tainted 1 /* tainted information used? */
86#define RF_warned 2 /* warned about big count? */
ce862d02 87#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
88#define RF_utf8 8 /* String contains multibyte chars? */
89
90#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
91
92#define RS_init 1 /* eval environment created */
93#define RS_set 2 /* replsv value is set */
c277df42 94
a687059c
LW
95#ifndef STATIC
96#define STATIC static
97#endif
98
c277df42
IZ
99/*
100 * Forwards.
101 */
102
a0ed51b3 103#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
53c4c00c 104#define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
a0ed51b3 105
dfe13c55
GS
106#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
107#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
53c4c00c
JH
108#define HOP(pos,off) (PL_reg_match_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off))
109#define HOPMAYBE(pos,off) (PL_reg_match_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
110#define HOPc(pos,off) ((char*)HOP(pos,off))
111#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 112
efb30f32 113#define HOPBACK(pos, off) ( \
53c4c00c 114 (UTF && PL_reg_match_utf8) \
efb30f32
HS
115 ? reghopmaybe((U8*)pos, -off) \
116 : (pos - off >= PL_bostr) \
117 ? (U8*)(pos - off) \
118 : (U8*)NULL \
119)
120#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
121
1aa99e6b
IH
122#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
123#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
53c4c00c
JH
124#define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
125#define HOPMAYBE3(pos,off,lim) (PL_reg_match_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
1aa99e6b
IH
126#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
127#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
128
8269fa76 129#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 130
5f80c4cf 131/* for use after a quantifier and before an EXACT-like node -- japhy */
74750237
JP
132#define NEXT_IMPT(to_rn) STMT_START { \
133 while (OP(to_rn) == OPEN || OP(to_rn) == CLOSE || OP(to_rn) == EVAL) \
5f80c4cf
JP
134 to_rn += NEXT_OFF(to_rn); \
135} STMT_END
74750237 136
acfe0abc 137static void restore_pos(pTHX_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
e49a9654
IH
147 if (paren_elems_to_push < 0)
148 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
149
a01268b5 150#define REGCP_OTHER_ELEMS 6
b1ce53c5 151 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 152 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 153/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
154 SSPUSHINT(PL_regendp[p]);
155 SSPUSHINT(PL_regstartp[p]);
3280af22 156 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
157 SSPUSHINT(p);
158 }
b1ce53c5 159/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
160 SSPUSHINT(PL_regsize);
161 SSPUSHINT(*PL_reglastparen);
a01268b5 162 SSPUSHINT(*PL_reglastcloseparen);
3280af22 163 SSPUSHPTR(PL_reginput);
41123dfd
JH
164#define REGCP_FRAME_ELEMS 2
165/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
166 * are needed for the regexp context stack bookkeeping. */
167 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 168 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 169
a0d0e21e
LW
170 return retval;
171}
172
c277df42 173/* These are needed since we do not localize EVAL nodes: */
02db2b7b 174# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 175 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 176 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 177
02db2b7b 178# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 179 PerlIO_printf(Perl_debug_log, \
faccc32b 180 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 181 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 182
76e3520e 183STATIC char *
cea2e8a9 184S_regcppop(pTHX)
a0d0e21e 185{
b1ce53c5 186 I32 i;
a0d0e21e
LW
187 U32 paren = 0;
188 char *input;
cf93c79d 189 I32 tmps;
b1ce53c5
JH
190
191 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 192 i = SSPOPINT;
b1ce53c5
JH
193 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
194 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 195 input = (char *) SSPOPPTR;
a01268b5 196 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
197 *PL_reglastparen = SSPOPINT;
198 PL_regsize = SSPOPINT;
b1ce53c5
JH
199
200 /* Now restore the parentheses context. */
41123dfd
JH
201 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
202 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 203 paren = (U32)SSPOPINT;
3280af22 204 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
205 PL_regstartp[paren] = SSPOPINT;
206 tmps = SSPOPINT;
3280af22
NIS
207 if (paren <= *PL_reglastparen)
208 PL_regendp[paren] = tmps;
c277df42 209 DEBUG_r(
c3464db5 210 PerlIO_printf(Perl_debug_log,
b900a521 211 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 212 (UV)paren, (IV)PL_regstartp[paren],
b900a521 213 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 214 (IV)PL_regendp[paren],
3280af22 215 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 216 );
a0d0e21e 217 }
c277df42 218 DEBUG_r(
3280af22 219 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 220 PerlIO_printf(Perl_debug_log,
faccc32b
JH
221 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
222 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
223 }
224 );
daf18116 225#if 1
dafc8851
JH
226 /* It would seem that the similar code in regtry()
227 * already takes care of this, and in fact it is in
228 * a better location to since this code can #if 0-ed out
229 * but the code in regtry() is needed or otherwise tests
230 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
231 * (as of patchlevel 7877) will fail. Then again,
232 * this code seems to be necessary or otherwise
233 * building DynaLoader will fail:
234 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
235 * --jhi */
3280af22
NIS
236 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
237 if (paren > PL_regsize)
cf93c79d
IZ
238 PL_regstartp[paren] = -1;
239 PL_regendp[paren] = -1;
a0d0e21e 240 }
dafc8851 241#endif
a0d0e21e
LW
242 return input;
243}
244
0f5d15d6 245STATIC char *
cea2e8a9 246S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
247{
248 I32 tmp = PL_savestack_ix;
249
250 PL_savestack_ix = ss;
251 regcppop();
252 PL_savestack_ix = tmp;
942e002e 253 return Nullch;
0f5d15d6
IZ
254}
255
256typedef struct re_cc_state
257{
258 I32 ss;
259 regnode *node;
260 struct re_cc_state *prev;
261 CURCUR *cc;
262 regexp *re;
263} re_cc_state;
264
02db2b7b 265#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 266
29d1e993
HS
267#define TRYPAREN(paren, n, input) { \
268 if (paren) { \
269 if (n) { \
270 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
271 PL_regendp[paren] = input - PL_bostr; \
272 } \
273 else \
274 PL_regendp[paren] = -1; \
275 } \
276 if (regmatch(next)) \
277 sayYES; \
278 if (paren && n) \
279 PL_regendp[paren] = -1; \
280}
281
282
a687059c 283/*
e50aee73 284 * pregexec and friends
a687059c
LW
285 */
286
287/*
c277df42 288 - pregexec - match a regexp against a string
a687059c 289 */
c277df42 290I32
864dbfa3 291Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 292 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
293/* strend: pointer to null at end of string */
294/* strbeg: real beginning of string */
295/* minend: end of match must be >=minend after stringarg. */
296/* nosave: For optimizations. */
297{
298 return
9041c2e3 299 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
300 nosave ? 0 : REXEC_COPY_STR);
301}
0f5d15d6
IZ
302
303STATIC void
cea2e8a9 304S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
305{
306 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
307#ifdef DEBUGGING
308 PL_regprogram = prog->program;
309#endif
310 PL_regnpar = prog->nparens;
9041c2e3
NIS
311 PL_regdata = prog->data;
312 PL_reg_re = prog;
0f5d15d6 313}
22e551b9 314
9041c2e3 315/*
cad2e5aa
JH
316 * Need to implement the following flags for reg_anch:
317 *
318 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
319 * USE_INTUIT_ML
320 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
321 * INTUIT_AUTORITATIVE_ML
322 * INTUIT_ONCE_NOML - Intuit can match in one location only.
323 * INTUIT_ONCE_ML
324 *
325 * Another flag for this function: SECOND_TIME (so that float substrs
326 * with giant delta may be not rechecked).
327 */
328
329/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
330
2c2d71f5 331/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
332 Otherwise, only SvCUR(sv) is used to get strbeg. */
333
334/* XXXX We assume that strpos is strbeg unless sv. */
335
6eb5f6b9
JH
336/* XXXX Some places assume that there is a fixed substring.
337 An update may be needed if optimizer marks as "INTUITable"
338 RExen without fixed substrings. Similarly, it is assumed that
339 lengths of all the strings are no more than minlen, thus they
340 cannot come from lookahead.
341 (Or minlen should take into account lookahead.) */
342
2c2d71f5
JH
343/* A failure to find a constant substring means that there is no need to make
344 an expensive call to REx engine, thus we celebrate a failure. Similarly,
345 finding a substring too deep into the string means that less calls to
30944b6d
IZ
346 regtry() should be needed.
347
348 REx compiler's optimizer found 4 possible hints:
349 a) Anchored substring;
350 b) Fixed substring;
351 c) Whether we are anchored (beginning-of-line or \G);
352 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 353 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
354 string which does not contradict any of them.
355 */
2c2d71f5 356
6eb5f6b9
JH
357/* Most of decisions we do here should have been done at compile time.
358 The nodes of the REx which we used for the search should have been
359 deleted from the finite automaton. */
360
cad2e5aa
JH
361char *
362Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
363 char *strend, U32 flags, re_scream_pos_data *data)
364{
b7953727 365 register I32 start_shift = 0;
cad2e5aa 366 /* Should be nonnegative! */
b7953727 367 register I32 end_shift = 0;
2c2d71f5
JH
368 register char *s;
369 register SV *check;
a1933d95 370 char *strbeg;
cad2e5aa
JH
371 char *t;
372 I32 ml_anch;
6eb5f6b9 373 register char *other_last = Nullch; /* other substr checked before this */
b7953727 374 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
375#ifdef DEBUGGING
376 char *i_strpos = strpos;
377#endif
cad2e5aa
JH
378
379 DEBUG_r( if (!PL_colorset) reginitcolors() );
380 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 381 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
382 PL_colors[4],PL_colors[5],PL_colors[0],
383 prog->precomp,
384 PL_colors[1],
385 (strlen(prog->precomp) > 60 ? "..." : ""),
386 PL_colors[0],
b900a521 387 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
388 strpos, PL_colors[1],
389 (strend - strpos > 60 ? "..." : ""))
390 );
391
1aa99e6b
IH
392 if (prog->reganch & ROPT_UTF8)
393 PL_reg_flags |= RF_utf8;
394
395 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 396 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 397 goto fail;
2c2d71f5 398 }
a1933d95 399 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 400 PL_regeol = strend;
653099ff 401 check = prog->check_substr;
2c2d71f5 402 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
403 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
404 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 405 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 406
7e25d62c
JH
407 if (!ml_anch) {
408 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
409 | ROPT_IMPLICIT)) /* not a real BOL */
410 /* SvCUR is not set on references: SvRV and SvPVX overlap */
411 && sv && !SvROK(sv)
412 && (strpos != strbeg)) {
413 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
414 goto fail;
415 }
416 if (prog->check_offset_min == prog->check_offset_max &&
cce850e4 417 !(prog->reganch & ROPT_CANY_SEEN)) {
2c2d71f5 418 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
419 I32 slen;
420
1aa99e6b 421 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
422 if (SvTAIL(check)) {
423 slen = SvCUR(check); /* >= 1 */
cad2e5aa 424
9041c2e3 425 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
426 || (strend - s == slen && strend[-1] != '\n')) {
427 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
428 goto fail_finish;
cad2e5aa
JH
429 }
430 /* Now should match s[0..slen-2] */
431 slen--;
653099ff 432 if (slen && (*SvPVX(check) != *s
cad2e5aa 433 || (slen > 1
653099ff 434 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
435 report_neq:
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
437 goto fail_finish;
438 }
cad2e5aa 439 }
653099ff
GS
440 else if (*SvPVX(check) != *s
441 || ((slen = SvCUR(check)) > 1
442 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
443 goto report_neq;
444 goto success_at_start;
7e25d62c 445 }
cad2e5aa 446 }
2c2d71f5 447 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 448 s = strpos;
2c2d71f5 449 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 450 end_shift = prog->minlen - start_shift -
653099ff 451 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 452 if (!ml_anch) {
653099ff
GS
453 I32 end = prog->check_offset_max + CHR_SVLEN(check)
454 - (SvTAIL(check) != 0);
1aa99e6b 455 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
456
457 if (end_shift < eshift)
458 end_shift = eshift;
459 }
cad2e5aa 460 }
2c2d71f5 461 else { /* Can match at random position */
cad2e5aa
JH
462 ml_anch = 0;
463 s = strpos;
2c2d71f5
JH
464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
465 /* Should be nonnegative! */
466 end_shift = prog->minlen - start_shift -
653099ff 467 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
468 }
469
2c2d71f5 470#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 471 if (end_shift < 0)
6bbae5e6 472 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
473#endif
474
2c2d71f5
JH
475 restart:
476 /* Find a possible match in the region s..strend by looking for
477 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 478 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
479 I32 p = -1; /* Internal iterator of scream. */
480 I32 *pp = data ? data->scream_pos : &p;
481
2c2d71f5
JH
482 if (PL_screamfirst[BmRARE(check)] >= 0
483 || ( BmRARE(check) == '\n'
484 && (BmPREVIOUS(check) == SvCUR(check) - 1)
485 && SvTAIL(check) ))
9041c2e3 486 s = screaminstr(sv, check,
2c2d71f5 487 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 488 else
2c2d71f5 489 goto fail_finish;
cad2e5aa
JH
490 if (data)
491 *data->scream_olds = s;
492 }
f33976b4 493 else if (prog->reganch & ROPT_CANY_SEEN)
3baa4c62
JH
494 s = fbm_instr((U8*)(s + start_shift),
495 (U8*)(strend - end_shift),
496 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 497 else
1aa99e6b
IH
498 s = fbm_instr(HOP3(s, start_shift, strend),
499 HOP3(strend, -end_shift, strbeg),
2c2d71f5 500 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
501
502 /* Update the count-of-usability, remove useless subpatterns,
503 unshift s. */
2c2d71f5
JH
504
505 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
506 (s ? "Found" : "Did not find"),
507 ((check == prog->anchored_substr) ? "anchored" : "floating"),
508 PL_colors[0],
7b0972df
JH
509 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
510 SvPVX(check),
2c2d71f5
JH
511 PL_colors[1], (SvTAIL(check) ? "$" : ""),
512 (s ? " at offset " : "...\n") ) );
513
514 if (!s)
515 goto fail_finish;
516
6eb5f6b9
JH
517 check_at = s;
518
2c2d71f5 519 /* Finish the diagnostic message */
30944b6d 520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
521
522 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
523 Start with the other substr.
524 XXXX no SCREAM optimization yet - and a very coarse implementation
525 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
526 *always* match. Probably should be marked during compile...
527 Probably it is right to do no SCREAM here...
528 */
529
530 if (prog->float_substr && prog->anchored_substr) {
30944b6d 531 /* Take into account the "other" substring. */
2c2d71f5
JH
532 /* XXXX May be hopelessly wrong for UTF... */
533 if (!other_last)
6eb5f6b9 534 other_last = strpos;
2c2d71f5 535 if (check == prog->float_substr) {
30944b6d
IZ
536 do_other_anchored:
537 {
1aa99e6b 538 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
539 char *s1 = s;
540
2c2d71f5
JH
541 t = s - prog->check_offset_max;
542 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
543 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 544 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 545 && t > strpos)))
30944b6d 546 /* EMPTY */;
2c2d71f5
JH
547 else
548 t = strpos;
1aa99e6b 549 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
550 if (t < other_last) /* These positions already checked */
551 t = other_last;
1aa99e6b 552 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
553 if (last < last1)
554 last1 = last;
555 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
556 /* On end-of-str: see comment below. */
557 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
558 HOP3(HOP3(last1, prog->anchored_offset, strend)
559 + SvCUR(prog->anchored_substr),
560 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
561 prog->anchored_substr,
562 PL_multiline ? FBMrf_MULTILINE : 0);
563 DEBUG_r(PerlIO_printf(Perl_debug_log,
564 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
565 (s ? "Found" : "Contradicts"),
566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(prog->anchored_substr)
568 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
569 SvPVX(prog->anchored_substr),
570 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
571 if (!s) {
572 if (last1 >= last2) {
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", giving up...\n"));
575 goto fail_finish;
576 }
577 DEBUG_r(PerlIO_printf(Perl_debug_log,
578 ", trying floating at offset %ld...\n",
1aa99e6b
IH
579 (long)(HOP3c(s1, 1, strend) - i_strpos)));
580 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
581 s = HOP3c(last, 1, strend);
2c2d71f5
JH
582 goto restart;
583 }
584 else {
585 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 586 (long)(s - i_strpos)));
1aa99e6b
IH
587 t = HOP3c(s, -prog->anchored_offset, strbeg);
588 other_last = HOP3c(s, 1, strend);
30944b6d 589 s = s1;
2c2d71f5
JH
590 if (t == strpos)
591 goto try_at_start;
2c2d71f5
JH
592 goto try_at_offset;
593 }
30944b6d 594 }
2c2d71f5
JH
595 }
596 else { /* Take into account the floating substring. */
597 char *last, *last1;
598 char *s1 = s;
599
1aa99e6b
IH
600 t = HOP3c(s, -start_shift, strbeg);
601 last1 = last =
602 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
603 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
604 last = HOP3c(t, prog->float_max_offset, strend);
605 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
606 if (s < other_last)
607 s = other_last;
2c2d71f5
JH
608 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
609 /* fbm_instr() takes into account exact value of end-of-str
610 if the check is SvTAIL(ed). Since false positives are OK,
611 and end-of-str is not later than strend we are OK. */
612 s = fbm_instr((unsigned char*)s,
613 (unsigned char*)last + SvCUR(prog->float_substr)
614 - (SvTAIL(prog->float_substr)!=0),
615 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
616 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
617 (s ? "Found" : "Contradicts"),
618 PL_colors[0],
7b0972df
JH
619 (int)(SvCUR(prog->float_substr)
620 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
621 SvPVX(prog->float_substr),
622 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
623 if (!s) {
624 if (last1 == last) {
625 DEBUG_r(PerlIO_printf(Perl_debug_log,
626 ", giving up...\n"));
627 goto fail_finish;
628 }
629 DEBUG_r(PerlIO_printf(Perl_debug_log,
630 ", trying anchored starting at offset %ld...\n",
30944b6d 631 (long)(s1 + 1 - i_strpos)));
803ff556 632 other_last = last;
1aa99e6b 633 s = HOP3c(t, 1, strend);
2c2d71f5
JH
634 goto restart;
635 }
636 else {
637 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 638 (long)(s - i_strpos)));
803ff556 639 other_last = s; /* Fix this later. --Hugo */
30944b6d 640 s = s1;
2c2d71f5
JH
641 if (t == strpos)
642 goto try_at_start;
2c2d71f5
JH
643 goto try_at_offset;
644 }
645 }
cad2e5aa 646 }
2c2d71f5
JH
647
648 t = s - prog->check_offset_max;
2c2d71f5
JH
649 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
650 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
651 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
652 && t > strpos))) {
2c2d71f5
JH
653 /* Fixed substring is found far enough so that the match
654 cannot start at strpos. */
655 try_at_offset:
cad2e5aa 656 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
657 /* Eventually fbm_*() should handle this, but often
658 anchored_offset is not 0, so this check will not be wasted. */
659 /* XXXX In the code below we prefer to look for "^" even in
660 presence of anchored substrings. And we search even
661 beyond the found float position. These pessimizations
662 are historical artefacts only. */
663 find_anchor:
2c2d71f5 664 while (t < strend - prog->minlen) {
cad2e5aa 665 if (*t == '\n') {
4ee3650e 666 if (t < check_at - prog->check_offset_min) {
30944b6d 667 if (prog->anchored_substr) {
4ee3650e
GS
668 /* Since we moved from the found position,
669 we definitely contradict the found anchored
30944b6d
IZ
670 substr. Due to the above check we do not
671 contradict "check" substr.
672 Thus we can arrive here only if check substr
673 is float. Redo checking for "other"=="fixed".
674 */
9041c2e3 675 strpos = t + 1;
30944b6d
IZ
676 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
677 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
678 goto do_other_anchored;
679 }
4ee3650e
GS
680 /* We don't contradict the found floating substring. */
681 /* XXXX Why not check for STCLASS? */
cad2e5aa 682 s = t + 1;
2c2d71f5 683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 684 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
685 goto set_useful;
686 }
4ee3650e
GS
687 /* Position contradicts check-string */
688 /* XXXX probably better to look for check-string
689 than for "\n", so one should lower the limit for t? */
690 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 691 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 692 other_last = strpos = s = t + 1;
cad2e5aa
JH
693 goto restart;
694 }
695 t++;
696 }
2c2d71f5
JH
697 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
698 PL_colors[0],PL_colors[1]));
699 goto fail_finish;
cad2e5aa 700 }
f5952150
GS
701 else {
702 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
703 PL_colors[0],PL_colors[1]));
704 }
cad2e5aa
JH
705 s = t;
706 set_useful:
2c2d71f5 707 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
708 }
709 else {
f5952150 710 /* The found string does not prohibit matching at strpos,
2c2d71f5 711 - no optimization of calling REx engine can be performed,
f5952150
GS
712 unless it was an MBOL and we are not after MBOL,
713 or a future STCLASS check will fail this. */
2c2d71f5
JH
714 try_at_start:
715 /* Even in this situation we may use MBOL flag if strpos is offset
716 wrt the start of the string. */
05b4157f 717 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 718 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
719 /* May be due to an implicit anchor of m{.*foo} */
720 && !(prog->reganch & ROPT_IMPLICIT))
721 {
cad2e5aa
JH
722 t = strpos;
723 goto find_anchor;
724 }
30944b6d 725 DEBUG_r( if (ml_anch)
f5952150
GS
726 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
727 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 728 );
2c2d71f5 729 success_at_start:
30944b6d 730 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 731 && prog->check_substr /* Could be deleted already */
cad2e5aa 732 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
733 && prog->check_substr == prog->float_substr)
734 {
cad2e5aa 735 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 736 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
737 SvREFCNT_dec(prog->check_substr);
738 prog->check_substr = Nullsv; /* disable */
739 prog->float_substr = Nullsv; /* clear */
5e39e1e5 740 check = Nullsv; /* abort */
cad2e5aa 741 s = strpos;
3cf5c195
IZ
742 /* XXXX This is a remnant of the old implementation. It
743 looks wasteful, since now INTUIT can use many
6eb5f6b9 744 other heuristics. */
cad2e5aa
JH
745 prog->reganch &= ~RE_USE_INTUIT;
746 }
747 else
748 s = strpos;
749 }
750
6eb5f6b9
JH
751 /* Last resort... */
752 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
753 if (prog->regstclass) {
754 /* minlen == 0 is possible if regstclass is \b or \B,
755 and the fixed substr is ''$.
756 Since minlen is already taken into account, s+1 is before strend;
757 accidentally, minlen >= 1 guaranties no false positives at s + 1
758 even for \b or \B. But (minlen? 1 : 0) below assumes that
759 regstclass does not come from lookahead... */
760 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
761 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 762 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 763 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 764 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 765 : 1);
6eb5f6b9 766 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
767 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
768 : (prog->float_substr
769 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
770 cl_l, strend)
771 : strend);
a1933d95 772 char *startpos = strbeg;
6eb5f6b9
JH
773
774 t = s;
76384e4a 775 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 776 PL_regdata = prog->data;
76384e4a
GS
777 PL_bostr = startpos;
778 }
f33976b4 779 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
6eb5f6b9
JH
780 if (!s) {
781#ifdef DEBUGGING
b7953727 782 char *what = 0;
6eb5f6b9
JH
783#endif
784 if (endpos == strend) {
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Could not match STCLASS...\n") );
787 goto fail;
788 }
66e933ab
GS
789 DEBUG_r( PerlIO_printf(Perl_debug_log,
790 "This position contradicts STCLASS...\n") );
653099ff
GS
791 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
792 goto fail;
6eb5f6b9
JH
793 /* Contradict one of substrings */
794 if (prog->anchored_substr) {
6eb5f6b9
JH
795 if (prog->anchored_substr == check) {
796 DEBUG_r( what = "anchored" );
797 hop_and_restart:
1aa99e6b 798 s = HOP3c(t, 1, strend);
66e933ab
GS
799 if (s + start_shift + end_shift > strend) {
800 /* XXXX Should be taken into account earlier? */
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Could not match STCLASS...\n") );
803 goto fail;
804 }
5e39e1e5
HS
805 if (!check)
806 goto giveup;
6eb5f6b9 807 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 808 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
809 what, (long)(s + start_shift - i_strpos)) );
810 goto restart;
811 }
66e933ab 812 /* Have both, check_string is floating */
6eb5f6b9
JH
813 if (t + start_shift >= check_at) /* Contradicts floating=check */
814 goto retry_floating_check;
815 /* Recheck anchored substring, but not floating... */
9041c2e3 816 s = check_at;
5e39e1e5
HS
817 if (!check)
818 goto giveup;
6eb5f6b9 819 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 820 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
821 (long)(other_last - i_strpos)) );
822 goto do_other_anchored;
823 }
60e71179
GS
824 /* Another way we could have checked stclass at the
825 current position only: */
826 if (ml_anch) {
827 s = t = t + 1;
5e39e1e5
HS
828 if (!check)
829 goto giveup;
60e71179 830 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
831 "Looking for /%s^%s/m starting at offset %ld...\n",
832 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 833 goto try_at_offset;
66e933ab 834 }
60e71179
GS
835 if (!prog->float_substr) /* Could have been deleted */
836 goto fail;
6eb5f6b9
JH
837 /* Check is floating subtring. */
838 retry_floating_check:
839 t = check_at - start_shift;
840 DEBUG_r( what = "floating" );
841 goto hop_and_restart;
842 }
b7953727
JH
843 if (t != s) {
844 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 845 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
846 (long)(t - i_strpos), (long)(s - i_strpos))
847 );
848 }
849 else {
850 DEBUG_r(PerlIO_printf(Perl_debug_log,
851 "Does not contradict STCLASS...\n");
852 );
853 }
6eb5f6b9 854 }
5e39e1e5
HS
855 giveup:
856 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
857 PL_colors[4], (check ? "Guessed" : "Giving up"),
858 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 859 return s;
2c2d71f5
JH
860
861 fail_finish: /* Substring not found */
66e933ab
GS
862 if (prog->check_substr) /* could be removed already */
863 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 864 fail:
2c2d71f5 865 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
866 PL_colors[4],PL_colors[5]));
867 return Nullch;
868}
9661b544 869
6eb5f6b9 870/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
871STATIC char *
872S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 873{
6eb5f6b9
JH
874 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
875 char *m;
d8093b23
G
876 STRLEN ln;
877 unsigned int c1;
878 unsigned int c2;
6eb5f6b9
JH
879 char *e;
880 register I32 tmp = 1; /* Scratch variable? */
53c4c00c 881 register bool do_utf8 = PL_reg_match_utf8;
cad2e5aa 882
6eb5f6b9
JH
883 /* We know what class it must start with. */
884 switch (OP(c)) {
6eb5f6b9 885 case ANYOF:
a0ed51b3 886 while (s < strend) {
ffc61ed2 887 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
888 if (tmp && (norun || regtry(prog, s)))
889 goto got_it;
890 else
891 tmp = doevery;
a0ed51b3 892 }
6eb5f6b9
JH
893 else
894 tmp = 1;
ffc61ed2 895 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 896 }
6eb5f6b9 897 break;
f33976b4
DB
898 case CANY:
899 while (s < strend) {
900 if (tmp && (norun || regtry(prog, s)))
901 goto got_it;
902 else
903 tmp = doevery;
904 s++;
905 }
906 break;
6eb5f6b9
JH
907 case EXACTF:
908 m = STRING(c);
909 ln = STR_LEN(c);
1aa99e6b
IH
910 if (UTF) {
911 c1 = to_utf8_lower((U8*)m);
912 c2 = to_utf8_upper((U8*)m);
913 }
914 else {
915 c1 = *(U8*)m;
916 c2 = PL_fold[c1];
917 }
6eb5f6b9
JH
918 goto do_exactf;
919 case EXACTFL:
920 m = STRING(c);
921 ln = STR_LEN(c);
d8093b23 922 c1 = *(U8*)m;
6eb5f6b9
JH
923 c2 = PL_fold_locale[c1];
924 do_exactf:
925 e = strend - ln;
b3c9acc1 926
6eb5f6b9
JH
927 if (norun && e < s)
928 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
929
930 if (do_utf8) {
931 STRLEN len;
932 if (c1 == c2)
933 while (s <= e) {
9041c2e3 934 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
935 && regtry(prog, s) )
936 goto got_it;
937 s += len;
938 }
939 else
940 while (s <= e) {
9041c2e3 941 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
942 if ( (c == c1 || c == c2) && regtry(prog, s) )
943 goto got_it;
944 s += len;
945 }
946 }
947 else {
948 if (c1 == c2)
949 while (s <= e) {
950 if ( *(U8*)s == c1
951 && (ln == 1 || !(OP(c) == EXACTF
952 ? ibcmp(s, m, ln)
953 : ibcmp_locale(s, m, ln)))
954 && (norun || regtry(prog, s)) )
955 goto got_it;
956 s++;
957 }
958 else
959 while (s <= e) {
960 if ( (*(U8*)s == c1 || *(U8*)s == c2)
961 && (ln == 1 || !(OP(c) == EXACTF
962 ? ibcmp(s, m, ln)
963 : ibcmp_locale(s, m, ln)))
964 && (norun || regtry(prog, s)) )
965 goto got_it;
966 s++;
967 }
b3c9acc1
IZ
968 }
969 break;
bbce6d69 970 case BOUNDL:
3280af22 971 PL_reg_flags |= RF_tainted;
bbce6d69 972 /* FALL THROUGH */
a0d0e21e 973 case BOUND:
ffc61ed2 974 if (do_utf8) {
12d33761 975 if (s == PL_bostr)
ffc61ed2
JH
976 tmp = '\n';
977 else {
1aa99e6b 978 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 979
0064a8a9
JH
980 if (s > (char*)r)
981 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
982 }
983 tmp = ((OP(c) == BOUND ?
9041c2e3 984 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 985 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
986 while (s < strend) {
987 if (tmp == !(OP(c) == BOUND ?
3568d838 988 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
989 isALNUM_LC_utf8((U8*)s)))
990 {
991 tmp = !tmp;
992 if ((norun || regtry(prog, s)))
993 goto got_it;
994 }
995 s += UTF8SKIP(s);
a687059c 996 }
a0d0e21e 997 }
667bb95a 998 else {
12d33761 999 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1000 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1001 while (s < strend) {
1002 if (tmp ==
1003 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1004 tmp = !tmp;
1005 if ((norun || regtry(prog, s)))
1006 goto got_it;
1007 }
1008 s++;
a0ed51b3 1009 }
a0ed51b3 1010 }
6eb5f6b9 1011 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1012 goto got_it;
1013 break;
bbce6d69 1014 case NBOUNDL:
3280af22 1015 PL_reg_flags |= RF_tainted;
bbce6d69 1016 /* FALL THROUGH */
a0d0e21e 1017 case NBOUND:
ffc61ed2 1018 if (do_utf8) {
12d33761 1019 if (s == PL_bostr)
ffc61ed2
JH
1020 tmp = '\n';
1021 else {
1aa99e6b 1022 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1023
0064a8a9
JH
1024 if (s > (char*)r)
1025 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1026 }
1027 tmp = ((OP(c) == NBOUND ?
9041c2e3 1028 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1029 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1030 while (s < strend) {
1031 if (tmp == !(OP(c) == NBOUND ?
3568d838 1032 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1033 isALNUM_LC_utf8((U8*)s)))
1034 tmp = !tmp;
1035 else if ((norun || regtry(prog, s)))
1036 goto got_it;
1037 s += UTF8SKIP(s);
1038 }
a0d0e21e 1039 }
667bb95a 1040 else {
12d33761 1041 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1042 tmp = ((OP(c) == NBOUND ?
1043 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1044 while (s < strend) {
1045 if (tmp ==
1046 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1047 tmp = !tmp;
1048 else if ((norun || regtry(prog, s)))
1049 goto got_it;
1050 s++;
1051 }
a0ed51b3 1052 }
6eb5f6b9 1053 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1054 goto got_it;
1055 break;
a0d0e21e 1056 case ALNUM:
ffc61ed2 1057 if (do_utf8) {
8269fa76 1058 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1059 while (s < strend) {
3568d838 1060 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1061 if (tmp && (norun || regtry(prog, s)))
1062 goto got_it;
1063 else
1064 tmp = doevery;
1065 }
bbce6d69 1066 else
ffc61ed2
JH
1067 tmp = 1;
1068 s += UTF8SKIP(s);
bbce6d69 1069 }
bbce6d69 1070 }
ffc61ed2
JH
1071 else {
1072 while (s < strend) {
1073 if (isALNUM(*s)) {
1074 if (tmp && (norun || regtry(prog, s)))
1075 goto got_it;
1076 else
1077 tmp = doevery;
1078 }
a0ed51b3 1079 else
ffc61ed2
JH
1080 tmp = 1;
1081 s++;
a0ed51b3 1082 }
a0ed51b3
LW
1083 }
1084 break;
bbce6d69 1085 case ALNUML:
3280af22 1086 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1087 if (do_utf8) {
1088 while (s < strend) {
1089 if (isALNUM_LC_utf8((U8*)s)) {
1090 if (tmp && (norun || regtry(prog, s)))
1091 goto got_it;
1092 else
1093 tmp = doevery;
1094 }
a687059c 1095 else
ffc61ed2
JH
1096 tmp = 1;
1097 s += UTF8SKIP(s);
a0d0e21e 1098 }
a0d0e21e 1099 }
ffc61ed2
JH
1100 else {
1101 while (s < strend) {
1102 if (isALNUM_LC(*s)) {
1103 if (tmp && (norun || regtry(prog, s)))
1104 goto got_it;
1105 else
1106 tmp = doevery;
1107 }
a0ed51b3 1108 else
ffc61ed2
JH
1109 tmp = 1;
1110 s++;
a0ed51b3 1111 }
a0ed51b3
LW
1112 }
1113 break;
a0d0e21e 1114 case NALNUM:
ffc61ed2 1115 if (do_utf8) {
8269fa76 1116 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1117 while (s < strend) {
3568d838 1118 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1119 if (tmp && (norun || regtry(prog, s)))
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
bbce6d69 1124 else
ffc61ed2
JH
1125 tmp = 1;
1126 s += UTF8SKIP(s);
bbce6d69 1127 }
bbce6d69 1128 }
ffc61ed2
JH
1129 else {
1130 while (s < strend) {
1131 if (!isALNUM(*s)) {
1132 if (tmp && (norun || regtry(prog, s)))
1133 goto got_it;
1134 else
1135 tmp = doevery;
1136 }
a0ed51b3 1137 else
ffc61ed2
JH
1138 tmp = 1;
1139 s++;
a0ed51b3 1140 }
a0ed51b3
LW
1141 }
1142 break;
bbce6d69 1143 case NALNUML:
3280af22 1144 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1145 if (do_utf8) {
1146 while (s < strend) {
1147 if (!isALNUM_LC_utf8((U8*)s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
a687059c 1153 else
ffc61ed2
JH
1154 tmp = 1;
1155 s += UTF8SKIP(s);
a687059c 1156 }
a0d0e21e 1157 }
ffc61ed2
JH
1158 else {
1159 while (s < strend) {
1160 if (!isALNUM_LC(*s)) {
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
a0ed51b3 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s++;
a0ed51b3 1169 }
a0ed51b3
LW
1170 }
1171 break;
a0d0e21e 1172 case SPACE:
ffc61ed2 1173 if (do_utf8) {
8269fa76 1174 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1175 while (s < strend) {
3568d838 1176 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1177 if (tmp && (norun || regtry(prog, s)))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
a0d0e21e 1182 else
ffc61ed2
JH
1183 tmp = 1;
1184 s += UTF8SKIP(s);
2304df62 1185 }
a0d0e21e 1186 }
ffc61ed2
JH
1187 else {
1188 while (s < strend) {
1189 if (isSPACE(*s)) {
1190 if (tmp && (norun || regtry(prog, s)))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
a0ed51b3 1195 else
ffc61ed2
JH
1196 tmp = 1;
1197 s++;
a0ed51b3 1198 }
a0ed51b3
LW
1199 }
1200 break;
bbce6d69 1201 case SPACEL:
3280af22 1202 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1203 if (do_utf8) {
1204 while (s < strend) {
1205 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1206 if (tmp && (norun || regtry(prog, s)))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
bbce6d69 1211 else
ffc61ed2
JH
1212 tmp = 1;
1213 s += UTF8SKIP(s);
bbce6d69 1214 }
bbce6d69 1215 }
ffc61ed2
JH
1216 else {
1217 while (s < strend) {
1218 if (isSPACE_LC(*s)) {
1219 if (tmp && (norun || regtry(prog, s)))
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
a0ed51b3 1224 else
ffc61ed2
JH
1225 tmp = 1;
1226 s++;
a0ed51b3 1227 }
a0ed51b3
LW
1228 }
1229 break;
a0d0e21e 1230 case NSPACE:
ffc61ed2 1231 if (do_utf8) {
8269fa76 1232 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1233 while (s < strend) {
3568d838 1234 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1235 if (tmp && (norun || regtry(prog, s)))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
a0d0e21e 1240 else
ffc61ed2
JH
1241 tmp = 1;
1242 s += UTF8SKIP(s);
a687059c 1243 }
a0d0e21e 1244 }
ffc61ed2
JH
1245 else {
1246 while (s < strend) {
1247 if (!isSPACE(*s)) {
1248 if (tmp && (norun || regtry(prog, s)))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a0ed51b3 1253 else
ffc61ed2
JH
1254 tmp = 1;
1255 s++;
a0ed51b3 1256 }
a0ed51b3
LW
1257 }
1258 break;
bbce6d69 1259 case NSPACEL:
3280af22 1260 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1261 if (do_utf8) {
1262 while (s < strend) {
1263 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
bbce6d69 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s += UTF8SKIP(s);
bbce6d69 1272 }
bbce6d69 1273 }
ffc61ed2
JH
1274 else {
1275 while (s < strend) {
1276 if (!isSPACE_LC(*s)) {
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0ed51b3 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s++;
a0ed51b3 1285 }
a0ed51b3
LW
1286 }
1287 break;
a0d0e21e 1288 case DIGIT:
ffc61ed2 1289 if (do_utf8) {
8269fa76 1290 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1291 while (s < strend) {
3568d838 1292 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
a0d0e21e 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s += UTF8SKIP(s);
2b69d0c2 1301 }
a0d0e21e 1302 }
ffc61ed2
JH
1303 else {
1304 while (s < strend) {
1305 if (isDIGIT(*s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
a0ed51b3 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s++;
a0ed51b3 1314 }
a0ed51b3
LW
1315 }
1316 break;
b8c5462f
JH
1317 case DIGITL:
1318 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1319 if (do_utf8) {
1320 while (s < strend) {
1321 if (isDIGIT_LC_utf8((U8*)s)) {
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
b8c5462f 1327 else
ffc61ed2
JH
1328 tmp = 1;
1329 s += UTF8SKIP(s);
b8c5462f 1330 }
b8c5462f 1331 }
ffc61ed2
JH
1332 else {
1333 while (s < strend) {
1334 if (isDIGIT_LC(*s)) {
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
b8c5462f 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s++;
b8c5462f 1343 }
b8c5462f
JH
1344 }
1345 break;
a0d0e21e 1346 case NDIGIT:
ffc61ed2 1347 if (do_utf8) {
8269fa76 1348 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1349 while (s < strend) {
3568d838 1350 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
a0d0e21e 1356 else
ffc61ed2
JH
1357 tmp = 1;
1358 s += UTF8SKIP(s);
a687059c 1359 }
a0d0e21e 1360 }
ffc61ed2
JH
1361 else {
1362 while (s < strend) {
1363 if (!isDIGIT(*s)) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
a0ed51b3 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s++;
a0ed51b3 1372 }
a0ed51b3
LW
1373 }
1374 break;
b8c5462f
JH
1375 case NDIGITL:
1376 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1377 if (do_utf8) {
1378 while (s < strend) {
1379 if (!isDIGIT_LC_utf8((U8*)s)) {
1380 if (tmp && (norun || regtry(prog, s)))
1381 goto got_it;
1382 else
1383 tmp = doevery;
1384 }
b8c5462f 1385 else
ffc61ed2
JH
1386 tmp = 1;
1387 s += UTF8SKIP(s);
b8c5462f 1388 }
a0ed51b3 1389 }
ffc61ed2
JH
1390 else {
1391 while (s < strend) {
1392 if (!isDIGIT_LC(*s)) {
1393 if (tmp && (norun || regtry(prog, s)))
1394 goto got_it;
1395 else
1396 tmp = doevery;
1397 }
cf93c79d 1398 else
ffc61ed2
JH
1399 tmp = 1;
1400 s++;
b8c5462f 1401 }
b8c5462f
JH
1402 }
1403 break;
b3c9acc1 1404 default:
3c3eec57
GS
1405 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1406 break;
d6a28714 1407 }
6eb5f6b9
JH
1408 return 0;
1409 got_it:
1410 return s;
1411}
1412
1413/*
1414 - regexec_flags - match a regexp against a string
1415 */
1416I32
1417Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1418 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1419/* strend: pointer to null at end of string */
1420/* strbeg: real beginning of string */
1421/* minend: end of match must be >=minend after stringarg. */
1422/* data: May be used for some additional optimizations. */
1423/* nosave: For optimizations. */
1424{
6eb5f6b9
JH
1425 register char *s;
1426 register regnode *c;
1427 register char *startpos = stringarg;
6eb5f6b9
JH
1428 I32 minlen; /* must match at least this many chars */
1429 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1430 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1431 constant substr. */ /* CC */
1432 I32 end_shift = 0; /* Same for the end. */ /* CC */
1433 I32 scream_pos = -1; /* Internal iterator of scream. */
1434 char *scream_olds;
1435 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1436 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1437
1438 PL_regcc = 0;
1439
1440 cache_re(prog);
1441#ifdef DEBUGGING
aea4f609 1442 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1443#endif
1444
1445 /* Be paranoid... */
1446 if (prog == NULL || startpos == NULL) {
1447 Perl_croak(aTHX_ "NULL regexp parameter");
1448 return 0;
1449 }
1450
1451 minlen = prog->minlen;
f33976b4 1452 if (do_utf8 && !(prog->reganch & ROPT_CANY_SEEN)) {
3baa4c62 1453 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1454 }
1455 else {
f33976b4 1456 if (strend - startpos < minlen) goto phooey;
1aa99e6b 1457 }
6eb5f6b9 1458
6eb5f6b9
JH
1459 /* Check validity of program. */
1460 if (UCHARAT(prog->program) != REG_MAGIC) {
1461 Perl_croak(aTHX_ "corrupted regexp program");
1462 }
1463
1464 PL_reg_flags = 0;
1465 PL_reg_eval_set = 0;
1466 PL_reg_maxiter = 0;
1467
1468 if (prog->reganch & ROPT_UTF8)
1469 PL_reg_flags |= RF_utf8;
1470
1471 /* Mark beginning of line for ^ and lookbehind. */
1472 PL_regbol = startpos;
1473 PL_bostr = strbeg;
1474 PL_reg_sv = sv;
1475
1476 /* Mark end of line for $ (and such) */
1477 PL_regeol = strend;
1478
1479 /* see how far we have to get to not match where we matched before */
1480 PL_regtill = startpos+minend;
1481
1482 /* We start without call_cc context. */
1483 PL_reg_call_cc = 0;
1484
1485 /* If there is a "must appear" string, look for it. */
1486 s = startpos;
1487
1488 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1489 MAGIC *mg;
1490
1491 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1492 PL_reg_ganch = startpos;
1493 else if (sv && SvTYPE(sv) >= SVt_PVMG
1494 && SvMAGIC(sv)
14befaf4
DM
1495 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1496 && mg->mg_len >= 0) {
6eb5f6b9
JH
1497 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1498 if (prog->reganch & ROPT_ANCH_GPOS) {
f33976b4 1499 if (s > PL_reg_ganch)
6eb5f6b9
JH
1500 goto phooey;
1501 s = PL_reg_ganch;
1502 }
1503 }
1504 else /* pos() not defined */
1505 PL_reg_ganch = strbeg;
1506 }
1507
699c3c34
JH
1508 if (do_utf8 == (UTF!=0) &&
1509 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1510 re_scream_pos_data d;
1511
1512 d.scream_olds = &scream_olds;
1513 d.scream_pos = &scream_pos;
1514 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1515 if (!s)
1516 goto phooey; /* not present */
1517 }
1518
1519 DEBUG_r( if (!PL_colorset) reginitcolors() );
1520 DEBUG_r(PerlIO_printf(Perl_debug_log,
1521 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1522 PL_colors[4],PL_colors[5],PL_colors[0],
1523 prog->precomp,
1524 PL_colors[1],
1525 (strlen(prog->precomp) > 60 ? "..." : ""),
1526 PL_colors[0],
1527 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1528 startpos, PL_colors[1],
1529 (strend - startpos > 60 ? "..." : ""))
1530 );
1531
1532 /* Simplest case: anchored match need be tried only once. */
1533 /* [unless only anchor is BOL and multiline is set] */
1534 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1535 if (s == startpos && regtry(prog, startpos))
1536 goto got_it;
1537 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1538 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1539 {
1540 char *end;
1541
1542 if (minlen)
1543 dontbother = minlen - 1;
1aa99e6b 1544 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1545 /* for multiline we only have to try after newlines */
1546 if (prog->check_substr) {
1547 if (s == startpos)
1548 goto after_try;
1549 while (1) {
1550 if (regtry(prog, s))
1551 goto got_it;
1552 after_try:
1553 if (s >= end)
1554 goto phooey;
1555 if (prog->reganch & RE_USE_INTUIT) {
1556 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1557 if (!s)
1558 goto phooey;
1559 }
1560 else
1561 s++;
1562 }
1563 } else {
1564 if (s > startpos)
1565 s--;
1566 while (s < end) {
1567 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1568 if (regtry(prog, s))
1569 goto got_it;
1570 }
1571 }
1572 }
1573 }
1574 goto phooey;
1575 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1576 if (regtry(prog, PL_reg_ganch))
1577 goto got_it;
1578 goto phooey;
1579 }
1580
1581 /* Messy cases: unanchored match. */
9041c2e3 1582 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1583 /* we have /x+whatever/ */
1584 /* it must be a one character string (XXXX Except UTF?) */
1585 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1586#ifdef DEBUGGING
1587 int did_match = 0;
1588#endif
1589
1aa99e6b 1590 if (do_utf8) {
6eb5f6b9
JH
1591 while (s < strend) {
1592 if (*s == ch) {
bf93d4cc 1593 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1594 if (regtry(prog, s)) goto got_it;
1595 s += UTF8SKIP(s);
1596 while (s < strend && *s == ch)
1597 s += UTF8SKIP(s);
1598 }
1599 s += UTF8SKIP(s);
1600 }
1601 }
1602 else {
1603 while (s < strend) {
1604 if (*s == ch) {
bf93d4cc 1605 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1606 if (regtry(prog, s)) goto got_it;
1607 s++;
1608 while (s < strend && *s == ch)
1609 s++;
1610 }
1611 s++;
1612 }
1613 }
b7953727 1614 DEBUG_r(if (!did_match)
bf93d4cc 1615 PerlIO_printf(Perl_debug_log,
b7953727
JH
1616 "Did not find anchored character...\n")
1617 );
6eb5f6b9
JH
1618 }
1619 /*SUPPRESS 560*/
1aa99e6b
IH
1620 else if (do_utf8 == (UTF!=0) &&
1621 (prog->anchored_substr != Nullsv
9041c2e3 1622 || (prog->float_substr != Nullsv
1aa99e6b 1623 && prog->float_max_offset < strend - s))) {
9041c2e3 1624 SV *must = prog->anchored_substr
6eb5f6b9 1625 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1626 I32 back_max =
6eb5f6b9 1627 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1628 I32 back_min =
6eb5f6b9 1629 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1630 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1631 -(I32)(CHR_SVLEN(must)
1aa99e6b 1632 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1633 char *last1; /* Last position checked before */
bf93d4cc
GS
1634#ifdef DEBUGGING
1635 int did_match = 0;
1636#endif
6eb5f6b9
JH
1637
1638 if (s > PL_bostr)
1639 last1 = HOPc(s, -1);
1640 else
1641 last1 = s - 1; /* bogus */
1642
1643 /* XXXX check_substr already used to find `s', can optimize if
1644 check_substr==must. */
1645 scream_pos = -1;
1646 dontbother = end_shift;
1647 strend = HOPc(strend, -dontbother);
1648 while ( (s <= last) &&
9041c2e3 1649 ((flags & REXEC_SCREAM)
1aa99e6b 1650 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1651 end_shift, &scream_pos, 0))
1aa99e6b 1652 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1653 (unsigned char*)strend, must,
6eb5f6b9 1654 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1655 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1656 if (HOPc(s, -back_max) > last1) {
1657 last1 = HOPc(s, -back_min);
1658 s = HOPc(s, -back_max);
1659 }
1660 else {
1661 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1662
1663 last1 = HOPc(s, -back_min);
1664 s = t;
1665 }
1aa99e6b 1666 if (do_utf8) {
6eb5f6b9
JH
1667 while (s <= last1) {
1668 if (regtry(prog, s))
1669 goto got_it;
1670 s += UTF8SKIP(s);
1671 }
1672 }
1673 else {
1674 while (s <= last1) {
1675 if (regtry(prog, s))
1676 goto got_it;
1677 s++;
1678 }
1679 }
1680 }
b7953727
JH
1681 DEBUG_r(if (!did_match)
1682 PerlIO_printf(Perl_debug_log,
1683 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1684 ((must == prog->anchored_substr)
1685 ? "anchored" : "floating"),
1686 PL_colors[0],
1687 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1688 SvPVX(must),
b7953727
JH
1689 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1690 );
6eb5f6b9
JH
1691 goto phooey;
1692 }
155aba94 1693 else if ((c = prog->regstclass)) {
66e933ab
GS
1694 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1695 /* don't bother with what can't match */
6eb5f6b9 1696 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1697 DEBUG_r({
1698 SV *prop = sv_newmortal();
1699 regprop(prop, c);
1700 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1701 });
6eb5f6b9
JH
1702 if (find_byclass(prog, c, s, strend, startpos, 0))
1703 goto got_it;
bf93d4cc 1704 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1705 }
1706 else {
1707 dontbother = 0;
1708 if (prog->float_substr != Nullsv) { /* Trim the end. */
1709 char *last;
d6a28714
JH
1710
1711 if (flags & REXEC_SCREAM) {
1712 last = screaminstr(sv, prog->float_substr, s - strbeg,
1713 end_shift, &scream_pos, 1); /* last one */
1714 if (!last)
ffc61ed2 1715 last = scream_olds; /* Only one occurrence. */
b8c5462f 1716 }
d6a28714
JH
1717 else {
1718 STRLEN len;
1719 char *little = SvPV(prog->float_substr, len);
1720
1721 if (SvTAIL(prog->float_substr)) {
1722 if (memEQ(strend - len + 1, little, len - 1))
1723 last = strend - len + 1;
1724 else if (!PL_multiline)
9041c2e3 1725 last = memEQ(strend - len, little, len)
d6a28714 1726 ? strend - len : Nullch;
b8c5462f 1727 else
d6a28714
JH
1728 goto find_last;
1729 } else {
1730 find_last:
9041c2e3 1731 if (len)
d6a28714 1732 last = rninstr(s, strend, little, little + len);
b8c5462f 1733 else
d6a28714 1734 last = strend; /* matching `$' */
b8c5462f 1735 }
b8c5462f 1736 }
bf93d4cc
GS
1737 if (last == NULL) {
1738 DEBUG_r(PerlIO_printf(Perl_debug_log,
1739 "%sCan't trim the tail, match fails (should not happen)%s\n",
1740 PL_colors[4],PL_colors[5]));
1741 goto phooey; /* Should not happen! */
1742 }
d6a28714
JH
1743 dontbother = strend - last + prog->float_min_offset;
1744 }
1745 if (minlen && (dontbother < minlen))
1746 dontbother = minlen - 1;
1747 strend -= dontbother; /* this one's always in bytes! */
1748 /* We don't know much -- general case. */
1aa99e6b 1749 if (do_utf8) {
d6a28714
JH
1750 for (;;) {
1751 if (regtry(prog, s))
1752 goto got_it;
1753 if (s >= strend)
1754 break;
b8c5462f 1755 s += UTF8SKIP(s);
d6a28714
JH
1756 };
1757 }
1758 else {
1759 do {
1760 if (regtry(prog, s))
1761 goto got_it;
1762 } while (s++ < strend);
1763 }
1764 }
1765
1766 /* Failure. */
1767 goto phooey;
1768
1769got_it:
1770 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1771
1772 if (PL_reg_eval_set) {
1773 /* Preserve the current value of $^R */
1774 if (oreplsv != GvSV(PL_replgv))
1775 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1776 restored, the value remains
1777 the same. */
acfe0abc 1778 restore_pos(aTHX_ 0);
d6a28714
JH
1779 }
1780
1781 /* make sure $`, $&, $', and $digit will work later */
1782 if ( !(flags & REXEC_NOT_FIRST) ) {
1783 if (RX_MATCH_COPIED(prog)) {
1784 Safefree(prog->subbeg);
1785 RX_MATCH_COPIED_off(prog);
1786 }
1787 if (flags & REXEC_COPY_STR) {
1788 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1789
1790 s = savepvn(strbeg, i);
1791 prog->subbeg = s;
1792 prog->sublen = i;
1793 RX_MATCH_COPIED_on(prog);
1794 }
1795 else {
1796 prog->subbeg = strbeg;
1797 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1798 }
1799 }
9041c2e3 1800
d6a28714
JH
1801 return 1;
1802
1803phooey:
bf93d4cc
GS
1804 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1805 PL_colors[4],PL_colors[5]));
d6a28714 1806 if (PL_reg_eval_set)
acfe0abc 1807 restore_pos(aTHX_ 0);
d6a28714
JH
1808 return 0;
1809}
1810
1811/*
1812 - regtry - try match at specific point
1813 */
1814STATIC I32 /* 0 failure, 1 success */
1815S_regtry(pTHX_ regexp *prog, char *startpos)
1816{
d6a28714
JH
1817 register I32 i;
1818 register I32 *sp;
1819 register I32 *ep;
1820 CHECKPOINT lastcp;
1821
02db2b7b
IZ
1822#ifdef DEBUGGING
1823 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1824#endif
d6a28714
JH
1825 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1826 MAGIC *mg;
1827
1828 PL_reg_eval_set = RS_init;
1829 DEBUG_r(DEBUG_s(
b900a521
JH
1830 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1831 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1832 ));
e8347627 1833 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1834 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1835 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1836 SAVETMPS;
1837 /* Apparently this is not needed, judging by wantarray. */
e8347627 1838 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1839 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1840
1841 if (PL_reg_sv) {
1842 /* Make $_ available to executed code. */
1843 if (PL_reg_sv != DEFSV) {
4d1ff10f 1844 /* SAVE_DEFSV does *not* suffice here for USE_5005THREADS */
d6a28714
JH
1845 SAVESPTR(DEFSV);
1846 DEFSV = PL_reg_sv;
b8c5462f 1847 }
d6a28714 1848
9041c2e3 1849 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1850 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1851 /* prepare for quick setting of pos */
14befaf4
DM
1852 sv_magic(PL_reg_sv, (SV*)0,
1853 PERL_MAGIC_regex_global, Nullch, 0);
1854 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1855 mg->mg_len = -1;
b8c5462f 1856 }
d6a28714
JH
1857 PL_reg_magic = mg;
1858 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1859 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1860 }
09687e5a 1861 if (!PL_reg_curpm) {
0f79a09d 1862 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1863#ifdef USE_ITHREADS
1864 {
1865 SV* repointer = newSViv(0);
577e12cc 1866 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1867 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1868 av_push(PL_regex_padav,repointer);
1869 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1870 PL_regex_pad = AvARRAY(PL_regex_padav);
1871 }
1872#endif
1873 }
aaa362c4 1874 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1875 PL_reg_oldcurpm = PL_curpm;
1876 PL_curpm = PL_reg_curpm;
1877 if (RX_MATCH_COPIED(prog)) {
1878 /* Here is a serious problem: we cannot rewrite subbeg,
1879 since it may be needed if this match fails. Thus
1880 $` inside (?{}) could fail... */
1881 PL_reg_oldsaved = prog->subbeg;
1882 PL_reg_oldsavedlen = prog->sublen;
1883 RX_MATCH_COPIED_off(prog);
1884 }
1885 else
1886 PL_reg_oldsaved = Nullch;
1887 prog->subbeg = PL_bostr;
1888 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1889 }
1890 prog->startp[0] = startpos - PL_bostr;
1891 PL_reginput = startpos;
1892 PL_regstartp = prog->startp;
1893 PL_regendp = prog->endp;
1894 PL_reglastparen = &prog->lastparen;
a01268b5 1895 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1896 prog->lastparen = 0;
1897 PL_regsize = 0;
1898 DEBUG_r(PL_reg_starttry = startpos);
1899 if (PL_reg_start_tmpl <= prog->nparens) {
1900 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1901 if(PL_reg_start_tmp)
1902 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1903 else
1904 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1905 }
1906
1907 /* XXXX What this code is doing here?!!! There should be no need
1908 to do this again and again, PL_reglastparen should take care of
3dd2943c 1909 this! --ilya*/
dafc8851
JH
1910
1911 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1912 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1913 * PL_reglastparen), is not needed at all by the test suite
1914 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1915 * enough, for building DynaLoader, or otherwise this
1916 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1917 * will happen. Meanwhile, this code *is* needed for the
1918 * above-mentioned test suite tests to succeed. The common theme
1919 * on those tests seems to be returning null fields from matches.
1920 * --jhi */
dafc8851 1921#if 1
d6a28714
JH
1922 sp = prog->startp;
1923 ep = prog->endp;
1924 if (prog->nparens) {
09e8ae3b 1925 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1926 *++sp = -1;
1927 *++ep = -1;
1928 }
1929 }
dafc8851 1930#endif
02db2b7b 1931 REGCP_SET(lastcp);
d6a28714
JH
1932 if (regmatch(prog->program + 1)) {
1933 prog->endp[0] = PL_reginput - PL_bostr;
1934 return 1;
1935 }
02db2b7b 1936 REGCP_UNWIND(lastcp);
d6a28714
JH
1937 return 0;
1938}
1939
02db2b7b
IZ
1940#define RE_UNWIND_BRANCH 1
1941#define RE_UNWIND_BRANCHJ 2
1942
1943union re_unwind_t;
1944
1945typedef struct { /* XX: makes sense to enlarge it... */
1946 I32 type;
1947 I32 prev;
1948 CHECKPOINT lastcp;
1949} re_unwind_generic_t;
1950
1951typedef struct {
1952 I32 type;
1953 I32 prev;
1954 CHECKPOINT lastcp;
1955 I32 lastparen;
1956 regnode *next;
1957 char *locinput;
1958 I32 nextchr;
1959#ifdef DEBUGGING
1960 int regindent;
1961#endif
1962} re_unwind_branch_t;
1963
1964typedef union re_unwind_t {
1965 I32 type;
1966 re_unwind_generic_t generic;
1967 re_unwind_branch_t branch;
1968} re_unwind_t;
1969
8ba1375e
MJD
1970#define sayYES goto yes
1971#define sayNO goto no
1972#define sayYES_FINAL goto yes_final
1973#define sayYES_LOUD goto yes_loud
1974#define sayNO_FINAL goto no_final
1975#define sayNO_SILENT goto do_no
1976#define saySAME(x) if (x) goto yes; else goto no
1977
1978#define REPORT_CODE_OFF 24
1979
d6a28714
JH
1980/*
1981 - regmatch - main matching routine
1982 *
1983 * Conceptually the strategy is simple: check to see whether the current
1984 * node matches, call self recursively to see whether the rest matches,
1985 * and then act accordingly. In practice we make some effort to avoid
1986 * recursion, in particular by going through "ordinary" nodes (that don't
1987 * need to know whether the rest of the match failed) by a loop instead of
1988 * by recursion.
1989 */
1990/* [lwall] I've hoisted the register declarations to the outer block in order to
1991 * maybe save a little bit of pushing and popping on the stack. It also takes
1992 * advantage of machines that use a register save mask on subroutine entry.
1993 */
1994STATIC I32 /* 0 failure, 1 success */
1995S_regmatch(pTHX_ regnode *prog)
1996{
d6a28714
JH
1997 register regnode *scan; /* Current node. */
1998 regnode *next; /* Next node. */
1999 regnode *inner; /* Next node in internal branch. */
2000 register I32 nextchr; /* renamed nextchr - nextchar colides with
2001 function of same name */
2002 register I32 n; /* no or next */
b7953727
JH
2003 register I32 ln = 0; /* len or last */
2004 register char *s = Nullch; /* operand or save */
d6a28714 2005 register char *locinput = PL_reginput;
b7953727 2006 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 2007 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2008 I32 unwind = 0;
b7953727 2009#if 0
02db2b7b 2010 I32 firstcp = PL_savestack_ix;
b7953727 2011#endif
53c4c00c 2012 register bool do_utf8 = PL_reg_match_utf8;
02db2b7b 2013
d6a28714
JH
2014#ifdef DEBUGGING
2015 PL_regindent++;
2016#endif
2017
2018 /* Note that nextchr is a byte even in UTF */
2019 nextchr = UCHARAT(locinput);
2020 scan = prog;
2021 while (scan != NULL) {
8ba1375e 2022
d6a28714
JH
2023 DEBUG_r( {
2024 SV *prop = sv_newmortal();
2025 int docolor = *PL_colors[0];
2026 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2027 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2028 /* The part of the string before starttry has one color
2029 (pref0_len chars), between starttry and current
2030 position another one (pref_len - pref0_len chars),
2031 after the current position the third one.
2032 We assume that pref0_len <= pref_len, otherwise we
2033 decrease pref0_len. */
9041c2e3 2034 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2035 ? (5 + taill) - l : locinput - PL_bostr;
2036 int pref0_len;
d6a28714 2037
1aa99e6b
IH
2038 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2039 pref_len++;
2040 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2041 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2042 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2043 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2044 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2045 l--;
d6a28714
JH
2046 if (pref0_len < 0)
2047 pref0_len = 0;
2048 if (pref0_len > pref_len)
2049 pref0_len = pref_len;
2050 regprop(prop, scan);
9041c2e3 2051 PerlIO_printf(Perl_debug_log,
b900a521 2052 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2053 (IV)(locinput - PL_bostr),
2054 PL_colors[4], pref0_len,
d6a28714 2055 locinput - pref_len, PL_colors[5],
9041c2e3 2056 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2057 locinput - pref_len + pref0_len, PL_colors[3],
2058 (docolor ? "" : "> <"),
2059 PL_colors[0], l, locinput, PL_colors[1],
2060 15 - l - pref_len + 1,
2061 "",
b900a521 2062 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2063 SvPVX(prop));
2064 } );
2065
2066 next = scan + NEXT_OFF(scan);
2067 if (next == scan)
2068 next = NULL;
2069
2070 switch (OP(scan)) {
2071 case BOL:
12d33761
HS
2072 if (locinput == PL_bostr || (PL_multiline &&
2073 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2074 {
2075 /* regtill = regbol; */
b8c5462f
JH
2076 break;
2077 }
d6a28714
JH
2078 sayNO;
2079 case MBOL:
12d33761
HS
2080 if (locinput == PL_bostr ||
2081 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2082 {
b8c5462f
JH
2083 break;
2084 }
d6a28714
JH
2085 sayNO;
2086 case SBOL:
c2a73568 2087 if (locinput == PL_bostr)
b8c5462f 2088 break;
d6a28714
JH
2089 sayNO;
2090 case GPOS:
2091 if (locinput == PL_reg_ganch)
2092 break;
2093 sayNO;
2094 case EOL:
2095 if (PL_multiline)
2096 goto meol;
2097 else
2098 goto seol;
2099 case MEOL:
2100 meol:
2101 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2102 sayNO;
b8c5462f 2103 break;
d6a28714
JH
2104 case SEOL:
2105 seol:
2106 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2107 sayNO;
d6a28714 2108 if (PL_regeol - locinput > 1)
b8c5462f 2109 sayNO;
b8c5462f 2110 break;
d6a28714
JH
2111 case EOS:
2112 if (PL_regeol != locinput)
b8c5462f 2113 sayNO;
d6a28714 2114 break;
ffc61ed2 2115 case SANY:
d6a28714 2116 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2117 sayNO;
f33976b4
DB
2118 if (do_utf8) {
2119 locinput += PL_utf8skip[nextchr];
2120 if (locinput > PL_regeol)
2121 sayNO;
2122 nextchr = UCHARAT(locinput);
2123 }
2124 else
2125 nextchr = UCHARAT(++locinput);
2126 break;
2127 case CANY:
2128 if (!nextchr && locinput >= PL_regeol)
2129 sayNO;
b8c5462f 2130 nextchr = UCHARAT(++locinput);
a0d0e21e 2131 break;
ffc61ed2 2132 case REG_ANY:
1aa99e6b
IH
2133 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2134 sayNO;
2135 if (do_utf8) {
b8c5462f 2136 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2137 if (locinput > PL_regeol)
2138 sayNO;
a0ed51b3 2139 nextchr = UCHARAT(locinput);
a0ed51b3 2140 }
1aa99e6b
IH
2141 else
2142 nextchr = UCHARAT(++locinput);
a0ed51b3 2143 break;
d6a28714 2144 case EXACT:
cd439c50
IZ
2145 s = STRING(scan);
2146 ln = STR_LEN(scan);
1aa99e6b
IH
2147 if (do_utf8 != (UTF!=0)) {
2148 char *l = locinput;
2149 char *e = s + ln;
2150 STRLEN len;
2151 if (do_utf8)
2152 while (s < e) {
2153 if (l >= PL_regeol)
2154 sayNO;
9041c2e3 2155 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2156 sayNO;
2157 s++;
2158 l += len;
2159 }
2160 else
2161 while (s < e) {
2162 if (l >= PL_regeol)
2163 sayNO;
9041c2e3 2164 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2165 sayNO;
2166 s += len;
2167 l++;
2168 }
2169 locinput = l;
2170 nextchr = UCHARAT(locinput);
2171 break;
2172 }
d6a28714
JH
2173 /* Inline the first character, for speed. */
2174 if (UCHARAT(s) != nextchr)
2175 sayNO;
2176 if (PL_regeol - locinput < ln)
2177 sayNO;
2178 if (ln > 1 && memNE(s, locinput, ln))
2179 sayNO;
2180 locinput += ln;
2181 nextchr = UCHARAT(locinput);
2182 break;
2183 case EXACTFL:
b8c5462f
JH
2184 PL_reg_flags |= RF_tainted;
2185 /* FALL THROUGH */
d6a28714 2186 case EXACTF:
cd439c50
IZ
2187 s = STRING(scan);
2188 ln = STR_LEN(scan);
d6a28714 2189
1aa99e6b 2190 if (do_utf8) {
d6a28714 2191 char *l = locinput;
1aa99e6b
IH
2192 char *e;
2193 e = s + ln;
d6a28714
JH
2194 c1 = OP(scan) == EXACTF;
2195 while (s < e) {
1aa99e6b 2196 if (l >= PL_regeol) {
d6a28714
JH
2197 sayNO;
2198 }
2b9d42f0 2199 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2200 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2201 sayNO;
2202 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2203 l += UTF8SKIP(l);
b8c5462f 2204 }
d6a28714 2205 locinput = l;
a0ed51b3
LW
2206 nextchr = UCHARAT(locinput);
2207 break;
2208 }
d6a28714
JH
2209
2210 /* Inline the first character, for speed. */
2211 if (UCHARAT(s) != nextchr &&
2212 UCHARAT(s) != ((OP(scan) == EXACTF)
2213 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2214 sayNO;
d6a28714 2215 if (PL_regeol - locinput < ln)
b8c5462f 2216 sayNO;
d6a28714
JH
2217 if (ln > 1 && (OP(scan) == EXACTF
2218 ? ibcmp(s, locinput, ln)
2219 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2220 sayNO;
d6a28714
JH
2221 locinput += ln;
2222 nextchr = UCHARAT(locinput);
a0d0e21e 2223 break;
d6a28714 2224 case ANYOF:
ffc61ed2
JH
2225 if (do_utf8) {
2226 if (!reginclass(scan, (U8*)locinput, do_utf8))
2227 sayNO;
2228 if (locinput >= PL_regeol)
2229 sayNO;
2230 locinput += PL_utf8skip[nextchr];
b8c5462f 2231 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2232 }
2233 else {
2234 if (nextchr < 0)
2235 nextchr = UCHARAT(locinput);
2236 if (!reginclass(scan, (U8*)locinput, do_utf8))
2237 sayNO;
2238 if (!nextchr && locinput >= PL_regeol)
2239 sayNO;
2240 nextchr = UCHARAT(++locinput);
2241 }
b8c5462f 2242 break;
d6a28714 2243 case ALNUML:
b8c5462f
JH
2244 PL_reg_flags |= RF_tainted;
2245 /* FALL THROUGH */
d6a28714 2246 case ALNUM:
b8c5462f 2247 if (!nextchr)
4633a7c4 2248 sayNO;
ffc61ed2 2249 if (do_utf8) {
ad24be35 2250 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2251 if (!(OP(scan) == ALNUM
3568d838 2252 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2253 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2254 {
2255 sayNO;
a0ed51b3 2256 }
b8c5462f 2257 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2258 nextchr = UCHARAT(locinput);
2259 break;
2260 }
ffc61ed2 2261 if (!(OP(scan) == ALNUM
d6a28714 2262 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2263 sayNO;
b8c5462f 2264 nextchr = UCHARAT(++locinput);
a0d0e21e 2265 break;
d6a28714 2266 case NALNUML:
b8c5462f
JH
2267 PL_reg_flags |= RF_tainted;
2268 /* FALL THROUGH */
d6a28714
JH
2269 case NALNUM:
2270 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2271 sayNO;
ffc61ed2 2272 if (do_utf8) {
8269fa76 2273 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2274 if (OP(scan) == NALNUM
3568d838 2275 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2276 : isALNUM_LC_utf8((U8*)locinput))
2277 {
b8c5462f 2278 sayNO;
d6a28714 2279 }
b8c5462f
JH
2280 locinput += PL_utf8skip[nextchr];
2281 nextchr = UCHARAT(locinput);
2282 break;
2283 }
ffc61ed2 2284 if (OP(scan) == NALNUM
d6a28714 2285 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2286 sayNO;
76e3520e 2287 nextchr = UCHARAT(++locinput);
a0d0e21e 2288 break;
d6a28714
JH
2289 case BOUNDL:
2290 case NBOUNDL:
3280af22 2291 PL_reg_flags |= RF_tainted;
bbce6d69 2292 /* FALL THROUGH */
d6a28714
JH
2293 case BOUND:
2294 case NBOUND:
2295 /* was last char in word? */
ffc61ed2 2296 if (do_utf8) {
12d33761
HS
2297 if (locinput == PL_bostr)
2298 ln = '\n';
ffc61ed2
JH
2299 else {
2300 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2301
2b9d42f0 2302 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2303 }
2304 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2305 ln = isALNUM_uni(ln);
8269fa76 2306 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2307 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2308 }
2309 else {
9041c2e3 2310 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2311 n = isALNUM_LC_utf8((U8*)locinput);
2312 }
a0ed51b3 2313 }
d6a28714 2314 else {
12d33761
HS
2315 ln = (locinput != PL_bostr) ?
2316 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2317 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2318 ln = isALNUM(ln);
2319 n = isALNUM(nextchr);
2320 }
2321 else {
2322 ln = isALNUM_LC(ln);
2323 n = isALNUM_LC(nextchr);
2324 }
d6a28714 2325 }
ffc61ed2
JH
2326 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2327 OP(scan) == BOUNDL))
2328 sayNO;
a0ed51b3 2329 break;
d6a28714 2330 case SPACEL:
3280af22 2331 PL_reg_flags |= RF_tainted;
bbce6d69 2332 /* FALL THROUGH */
d6a28714 2333 case SPACE:
9442cb0e 2334 if (!nextchr)
4633a7c4 2335 sayNO;
1aa99e6b 2336 if (do_utf8) {
fd400ab9 2337 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2338 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2339 if (!(OP(scan) == SPACE
3568d838 2340 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2341 : isSPACE_LC_utf8((U8*)locinput)))
2342 {
2343 sayNO;
2344 }
2345 locinput += PL_utf8skip[nextchr];
2346 nextchr = UCHARAT(locinput);
2347 break;
d6a28714 2348 }
ffc61ed2
JH
2349 if (!(OP(scan) == SPACE
2350 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2351 sayNO;
2352 nextchr = UCHARAT(++locinput);
2353 }
2354 else {
2355 if (!(OP(scan) == SPACE
2356 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2357 sayNO;
2358 nextchr = UCHARAT(++locinput);
a0ed51b3 2359 }
a0ed51b3 2360 break;
d6a28714 2361 case NSPACEL:
3280af22 2362 PL_reg_flags |= RF_tainted;
bbce6d69 2363 /* FALL THROUGH */
d6a28714 2364 case NSPACE:
9442cb0e 2365 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2366 sayNO;
1aa99e6b 2367 if (do_utf8) {
8269fa76 2368 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2369 if (OP(scan) == NSPACE
3568d838 2370 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2371 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2372 {
2373 sayNO;
2374 }
2375 locinput += PL_utf8skip[nextchr];
2376 nextchr = UCHARAT(locinput);
2377 break;
a0ed51b3 2378 }
ffc61ed2 2379 if (OP(scan) == NSPACE
d6a28714 2380 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2381 sayNO;
76e3520e 2382 nextchr = UCHARAT(++locinput);
a0d0e21e 2383 break;
d6a28714 2384 case DIGITL:
a0ed51b3
LW
2385 PL_reg_flags |= RF_tainted;
2386 /* FALL THROUGH */
d6a28714 2387 case DIGIT:
9442cb0e 2388 if (!nextchr)
a0ed51b3 2389 sayNO;
1aa99e6b 2390 if (do_utf8) {
8269fa76 2391 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2392 if (!(OP(scan) == DIGIT
3568d838 2393 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2394 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2395 {
a0ed51b3 2396 sayNO;
dfe13c55 2397 }
6f06b55f 2398 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2399 nextchr = UCHARAT(locinput);
2400 break;
2401 }
ffc61ed2 2402 if (!(OP(scan) == DIGIT
9442cb0e 2403 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2404 sayNO;
2405 nextchr = UCHARAT(++locinput);
2406 break;
d6a28714 2407 case NDIGITL:
b8c5462f
JH
2408 PL_reg_flags |= RF_tainted;
2409 /* FALL THROUGH */
d6a28714 2410 case NDIGIT:
9442cb0e 2411 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2412 sayNO;
1aa99e6b 2413 if (do_utf8) {
8269fa76 2414 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2415 if (OP(scan) == NDIGIT
3568d838 2416 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2417 : isDIGIT_LC_utf8((U8*)locinput))
2418 {
a0ed51b3 2419 sayNO;
9442cb0e 2420 }
6f06b55f 2421 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2422 nextchr = UCHARAT(locinput);
2423 break;
2424 }
ffc61ed2 2425 if (OP(scan) == NDIGIT
9442cb0e 2426 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2427 sayNO;
2428 nextchr = UCHARAT(++locinput);
2429 break;
2430 case CLUMP:
8269fa76 2431 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2432 if (locinput >= PL_regeol ||
2433 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2434 sayNO;
6f06b55f 2435 locinput += PL_utf8skip[nextchr];
3568d838
JH
2436 while (locinput < PL_regeol &&
2437 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2438 locinput += UTF8SKIP(locinput);
2439 if (locinput > PL_regeol)
2440 sayNO;
2441 nextchr = UCHARAT(locinput);
2442 break;
c8756f30 2443 case REFFL:
3280af22 2444 PL_reg_flags |= RF_tainted;
c8756f30 2445 /* FALL THROUGH */
c277df42 2446 case REF:
c8756f30 2447 case REFF:
c277df42 2448 n = ARG(scan); /* which paren pair */
cf93c79d 2449 ln = PL_regstartp[n];
2c2d71f5 2450 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2451 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2452 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2453 if (ln == PL_regendp[n])
a0d0e21e 2454 break;
a0ed51b3 2455
cf93c79d 2456 s = PL_bostr + ln;
1aa99e6b 2457 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2458 char *l = locinput;
cf93c79d 2459 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2460 /*
2461 * Note that we can't do the "other character" lookup trick as
2462 * in the 8-bit case (no pun intended) because in Unicode we
2463 * have to map both upper and title case to lower case.
2464 */
2465 if (OP(scan) == REFF) {
2466 while (s < e) {
2467 if (l >= PL_regeol)
2468 sayNO;
dfe13c55 2469 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2470 sayNO;
2471 s += UTF8SKIP(s);
2472 l += UTF8SKIP(l);
2473 }
2474 }
2475 else {
2476 while (s < e) {
2477 if (l >= PL_regeol)
2478 sayNO;
dfe13c55 2479 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2480 sayNO;
2481 s += UTF8SKIP(s);
2482 l += UTF8SKIP(l);
2483 }
2484 }
2485 locinput = l;
2486 nextchr = UCHARAT(locinput);
2487 break;
2488 }
2489
a0d0e21e 2490 /* Inline the first character, for speed. */
76e3520e 2491 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2492 (OP(scan) == REF ||
2493 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2494 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2495 sayNO;
cf93c79d 2496 ln = PL_regendp[n] - ln;
3280af22 2497 if (locinput + ln > PL_regeol)
4633a7c4 2498 sayNO;
c8756f30
AK
2499 if (ln > 1 && (OP(scan) == REF
2500 ? memNE(s, locinput, ln)
2501 : (OP(scan) == REFF
2502 ? ibcmp(s, locinput, ln)
2503 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2504 sayNO;
a0d0e21e 2505 locinput += ln;
76e3520e 2506 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2507 break;
2508
2509 case NOTHING:
c277df42 2510 case TAIL:
a0d0e21e
LW
2511 break;
2512 case BACK:
2513 break;
c277df42
IZ
2514 case EVAL:
2515 {
2516 dSP;
533c011a 2517 OP_4tree *oop = PL_op;
3280af22
NIS
2518 COP *ocurcop = PL_curcop;
2519 SV **ocurpad = PL_curpad;
c277df42 2520 SV *ret;
9041c2e3 2521
c277df42 2522 n = ARG(scan);
533c011a 2523 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2524 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2525 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2526 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2527
cea2e8a9 2528 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2529 SPAGAIN;
2530 ret = POPs;
2531 PUTBACK;
9041c2e3 2532
0f5d15d6
IZ
2533 PL_op = oop;
2534 PL_curpad = ocurpad;
2535 PL_curcop = ocurcop;
c277df42 2536 if (logical) {
0f5d15d6
IZ
2537 if (logical == 2) { /* Postponed subexpression. */
2538 regexp *re;
22c35a8c 2539 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2540 re_cc_state state;
0f5d15d6
IZ
2541 CHECKPOINT cp, lastcp;
2542
2543 if(SvROK(ret) || SvRMAGICAL(ret)) {
2544 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2545
2546 if(SvMAGICAL(sv))
14befaf4 2547 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2548 }
2549 if (mg) {
2550 re = (regexp *)mg->mg_obj;
df0003d4 2551 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2552 }
2553 else {
2554 STRLEN len;
2555 char *t = SvPV(ret, len);
2556 PMOP pm;
2557 char *oprecomp = PL_regprecomp;
2558 I32 osize = PL_regsize;
2559 I32 onpar = PL_regnpar;
2560
5fcd1c1b 2561 Zero(&pm, 1, PMOP);
cea2e8a9 2562 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2563 if (!(SvFLAGS(ret)
0f5d15d6 2564 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2565 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2566 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2567 PL_regprecomp = oprecomp;
2568 PL_regsize = osize;
2569 PL_regnpar = onpar;
2570 }
2571 DEBUG_r(
9041c2e3 2572 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2573 "Entering embedded `%s%.60s%s%s'\n",
2574 PL_colors[0],
2575 re->precomp,
2576 PL_colors[1],
2577 (strlen(re->precomp) > 60 ? "..." : ""))
2578 );
2579 state.node = next;
2580 state.prev = PL_reg_call_cc;
2581 state.cc = PL_regcc;
2582 state.re = PL_reg_re;
2583
2ab05381 2584 PL_regcc = 0;
9041c2e3 2585
0f5d15d6 2586 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2587 REGCP_SET(lastcp);
0f5d15d6
IZ
2588 cache_re(re);
2589 state.ss = PL_savestack_ix;
2590 *PL_reglastparen = 0;
a01268b5 2591 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2592 PL_reg_call_cc = &state;
2593 PL_reginput = locinput;
2c2d71f5
JH
2594
2595 /* XXXX This is too dramatic a measure... */
2596 PL_reg_maxiter = 0;
2597
0f5d15d6 2598 if (regmatch(re->program + 1)) {
2c914db6
IZ
2599 /* Even though we succeeded, we need to restore
2600 global variables, since we may be wrapped inside
2601 SUSPEND, thus the match may be not finished yet. */
2602
2603 /* XXXX Do this only if SUSPENDed? */
2604 PL_reg_call_cc = state.prev;
2605 PL_regcc = state.cc;
2606 PL_reg_re = state.re;
2607 cache_re(PL_reg_re);
2608
2609 /* XXXX This is too dramatic a measure... */
2610 PL_reg_maxiter = 0;
2611
2612 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2613 ReREFCNT_dec(re);
2614 regcpblow(cp);
2615 sayYES;
2616 }
0f5d15d6 2617 ReREFCNT_dec(re);
02db2b7b 2618 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2619 regcppop();
2620 PL_reg_call_cc = state.prev;
2621 PL_regcc = state.cc;
2622 PL_reg_re = state.re;
d3790889 2623 cache_re(PL_reg_re);
2c2d71f5
JH
2624
2625 /* XXXX This is too dramatic a measure... */
2626 PL_reg_maxiter = 0;
2627
8e514ae6 2628 logical = 0;
0f5d15d6
IZ
2629 sayNO;
2630 }
c277df42 2631 sw = SvTRUE(ret);
0f5d15d6 2632 logical = 0;
a0ed51b3
LW
2633 }
2634 else
3280af22 2635 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2636 break;
2637 }
a0d0e21e 2638 case OPEN:
c277df42 2639 n = ARG(scan); /* which paren pair */
3280af22
NIS
2640 PL_reg_start_tmp[n] = locinput;
2641 if (n > PL_regsize)
2642 PL_regsize = n;
a0d0e21e
LW
2643 break;
2644 case CLOSE:
c277df42 2645 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2646 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2647 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2648 if (n > *PL_reglastparen)
2649 *PL_reglastparen = n;
a01268b5 2650 *PL_reglastcloseparen = n;
a0d0e21e 2651 break;
c277df42
IZ
2652 case GROUPP:
2653 n = ARG(scan); /* which paren pair */
cf93c79d 2654 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2655 break;
2656 case IFTHEN:
2c2d71f5 2657 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2658 if (sw)
2659 next = NEXTOPER(NEXTOPER(scan));
2660 else {
2661 next = scan + ARG(scan);
2662 if (OP(next) == IFTHEN) /* Fake one. */
2663 next = NEXTOPER(NEXTOPER(next));
2664 }
2665 break;
2666 case LOGICAL:
0f5d15d6 2667 logical = scan->flags;
c277df42 2668 break;
2ab05381
IZ
2669/*******************************************************************
2670 PL_regcc contains infoblock about the innermost (...)* loop, and
2671 a pointer to the next outer infoblock.
2672
2673 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2674
2675 1) After matching X, regnode for CURLYX is processed;
2676
9041c2e3 2677 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2678 regmatch() recursively with the starting point at WHILEM node;
2679
2680 3) Each hit of WHILEM node tries to match A and Z (in the order
2681 depending on the current iteration, min/max of {min,max} and
2682 greediness). The information about where are nodes for "A"
2683 and "Z" is read from the infoblock, as is info on how many times "A"
2684 was already matched, and greediness.
2685
2686 4) After A matches, the same WHILEM node is hit again.
2687
2688 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2689 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2690 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2691 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2692 of the external loop.
2693
2694 Currently present infoblocks form a tree with a stem formed by PL_curcc
2695 and whatever it mentions via ->next, and additional attached trees
2696 corresponding to temporarily unset infoblocks as in "5" above.
2697
9041c2e3 2698 In the following picture infoblocks for outer loop of
2ab05381
IZ
2699 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2700 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2701 infoblocks are drawn below the "reset" infoblock.
2702
2703 In fact in the picture below we do not show failed matches for Z and T
2704 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2705 more obvious *why* one needs to *temporary* unset infoblocks.]
2706
2707 Matched REx position InfoBlocks Comment
2708 (Y(A)*?Z)*?T x
2709 Y(A)*?Z)*?T x <- O
2710 Y (A)*?Z)*?T x <- O
2711 Y A)*?Z)*?T x <- O <- I
2712 YA )*?Z)*?T x <- O <- I
2713 YA A)*?Z)*?T x <- O <- I
2714 YAA )*?Z)*?T x <- O <- I
2715 YAA Z)*?T x <- O # Temporary unset I
2716 I
2717
2718 YAAZ Y(A)*?Z)*?T x <- O
2719 I
2720
2721 YAAZY (A)*?Z)*?T x <- O
2722 I
2723
2724 YAAZY A)*?Z)*?T x <- O <- I
2725 I
2726
2727 YAAZYA )*?Z)*?T x <- O <- I
2728 I
2729
2730 YAAZYA Z)*?T x <- O # Temporary unset I
2731 I,I
2732
2733 YAAZYAZ )*?T x <- O
2734 I,I
2735
2736 YAAZYAZ T x # Temporary unset O
2737 O
2738 I,I
2739
2740 YAAZYAZT x
2741 O
2742 I,I
2743 *******************************************************************/
a0d0e21e
LW
2744 case CURLYX: {
2745 CURCUR cc;
3280af22 2746 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2747 /* No need to save/restore up to this paren */
2748 I32 parenfloor = scan->flags;
c277df42
IZ
2749
2750 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2751 next += ARG(next);
3280af22
NIS
2752 cc.oldcc = PL_regcc;
2753 PL_regcc = &cc;
cb434fcc
IZ
2754 /* XXXX Probably it is better to teach regpush to support
2755 parenfloor > PL_regsize... */
2756 if (parenfloor > *PL_reglastparen)
2757 parenfloor = *PL_reglastparen; /* Pessimization... */
2758 cc.parenfloor = parenfloor;
a0d0e21e
LW
2759 cc.cur = -1;
2760 cc.min = ARG1(scan);
2761 cc.max = ARG2(scan);
c277df42 2762 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2763 cc.next = next;
2764 cc.minmod = minmod;
2765 cc.lastloc = 0;
3280af22 2766 PL_reginput = locinput;
a0d0e21e
LW
2767 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2768 regcpblow(cp);
3280af22 2769 PL_regcc = cc.oldcc;
4633a7c4 2770 saySAME(n);
a0d0e21e
LW
2771 }
2772 /* NOT REACHED */
2773 case WHILEM: {
2774 /*
2775 * This is really hard to understand, because after we match
2776 * what we're trying to match, we must make sure the rest of
2c2d71f5 2777 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2778 * to go back UP the parse tree by recursing ever deeper. And
2779 * if it fails, we have to reset our parent's current state
2780 * that we can try again after backing off.
2781 */
2782
c277df42 2783 CHECKPOINT cp, lastcp;
3280af22 2784 CURCUR* cc = PL_regcc;
c277df42
IZ
2785 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2786
4633a7c4 2787 n = cc->cur + 1; /* how many we know we matched */
3280af22 2788 PL_reginput = locinput;
a0d0e21e 2789
c277df42 2790 DEBUG_r(
9041c2e3
NIS
2791 PerlIO_printf(Perl_debug_log,
2792 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2793 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2794 (long)n, (long)cc->min,
c277df42
IZ
2795 (long)cc->max, (long)cc)
2796 );
4633a7c4 2797
a0d0e21e
LW
2798 /* If degenerate scan matches "", assume scan done. */
2799
579cf2c3 2800 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2801 PL_regcc = cc->oldcc;
2ab05381
IZ
2802 if (PL_regcc)
2803 ln = PL_regcc->cur;
c277df42 2804 DEBUG_r(
c3464db5
DD
2805 PerlIO_printf(Perl_debug_log,
2806 "%*s empty match detected, try continuation...\n",
3280af22 2807 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2808 );
a0d0e21e 2809 if (regmatch(cc->next))
4633a7c4 2810 sayYES;
2ab05381
IZ
2811 if (PL_regcc)
2812 PL_regcc->cur = ln;
3280af22 2813 PL_regcc = cc;
4633a7c4 2814 sayNO;
a0d0e21e
LW
2815 }
2816
2817 /* First just match a string of min scans. */
2818
2819 if (n < cc->min) {
2820 cc->cur = n;
2821 cc->lastloc = locinput;
4633a7c4
LW
2822 if (regmatch(cc->scan))
2823 sayYES;
2824 cc->cur = n - 1;
c277df42 2825 cc->lastloc = lastloc;
4633a7c4 2826 sayNO;
a0d0e21e
LW
2827 }
2828
2c2d71f5
JH
2829 if (scan->flags) {
2830 /* Check whether we already were at this position.
2831 Postpone detection until we know the match is not
2832 *that* much linear. */
2833 if (!PL_reg_maxiter) {
2834 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2835 PL_reg_leftiter = PL_reg_maxiter;
2836 }
2837 if (PL_reg_leftiter-- == 0) {
2838 I32 size = (PL_reg_maxiter + 7)/8;
2839 if (PL_reg_poscache) {
2840 if (PL_reg_poscache_size < size) {
2841 Renew(PL_reg_poscache, size, char);
2842 PL_reg_poscache_size = size;
2843 }
2844 Zero(PL_reg_poscache, size, char);
2845 }
2846 else {
2847 PL_reg_poscache_size = size;
2848 Newz(29, PL_reg_poscache, size, char);
2849 }
2850 DEBUG_r(
2851 PerlIO_printf(Perl_debug_log,
2852 "%sDetected a super-linear match, switching on caching%s...\n",
2853 PL_colors[4], PL_colors[5])
2854 );
2855 }
2856 if (PL_reg_leftiter < 0) {
2857 I32 o = locinput - PL_bostr, b;
2858
2859 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2860 b = o % 8;
2861 o /= 8;
2862 if (PL_reg_poscache[o] & (1<<b)) {
2863 DEBUG_r(
2864 PerlIO_printf(Perl_debug_log,
2865 "%*s already tried at this position...\n",
2866 REPORT_CODE_OFF+PL_regindent*2, "")
2867 );
7821416a 2868 sayNO_SILENT;
2c2d71f5
JH
2869 }
2870 PL_reg_poscache[o] |= (1<<b);
2871 }
2872 }
2873
a0d0e21e
LW
2874 /* Prefer next over scan for minimal matching. */
2875
2876 if (cc->minmod) {
3280af22 2877 PL_regcc = cc->oldcc;
2ab05381
IZ
2878 if (PL_regcc)
2879 ln = PL_regcc->cur;
5f05dabc 2880 cp = regcppush(cc->parenfloor);
02db2b7b 2881 REGCP_SET(lastcp);
5f05dabc 2882 if (regmatch(cc->next)) {
c277df42 2883 regcpblow(cp);
4633a7c4 2884 sayYES; /* All done. */
5f05dabc 2885 }
02db2b7b 2886 REGCP_UNWIND(lastcp);
5f05dabc 2887 regcppop();
2ab05381
IZ
2888 if (PL_regcc)
2889 PL_regcc->cur = ln;
3280af22 2890 PL_regcc = cc;
a0d0e21e 2891
c277df42 2892 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2893 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2894 && !(PL_reg_flags & RF_warned)) {
2895 PL_reg_flags |= RF_warned;
e476b1b5 2896 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2897 "Complex regular subexpression recursion",
2898 REG_INFTY - 1);
c277df42 2899 }
4633a7c4 2900 sayNO;
c277df42 2901 }
a687059c 2902
c277df42 2903 DEBUG_r(
c3464db5
DD
2904 PerlIO_printf(Perl_debug_log,
2905 "%*s trying longer...\n",
3280af22 2906 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2907 );
a0d0e21e 2908 /* Try scanning more and see if it helps. */
3280af22 2909 PL_reginput = locinput;
a0d0e21e
LW
2910 cc->cur = n;
2911 cc->lastloc = locinput;
5f05dabc 2912 cp = regcppush(cc->parenfloor);
02db2b7b 2913 REGCP_SET(lastcp);
5f05dabc 2914 if (regmatch(cc->scan)) {
c277df42 2915 regcpblow(cp);
4633a7c4 2916 sayYES;
5f05dabc 2917 }
02db2b7b 2918 REGCP_UNWIND(lastcp);
5f05dabc 2919 regcppop();
4633a7c4 2920 cc->cur = n - 1;
c277df42 2921 cc->lastloc = lastloc;
4633a7c4 2922 sayNO;
a0d0e21e
LW
2923 }
2924
2925 /* Prefer scan over next for maximal matching. */
2926
2927 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2928 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2929 cc->cur = n;
2930 cc->lastloc = locinput;
02db2b7b 2931 REGCP_SET(lastcp);
5f05dabc 2932 if (regmatch(cc->scan)) {
c277df42 2933 regcpblow(cp);
4633a7c4 2934 sayYES;
5f05dabc 2935 }
02db2b7b 2936 REGCP_UNWIND(lastcp);
a0d0e21e 2937 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2938 PL_reginput = locinput;
c277df42 2939 DEBUG_r(
c3464db5
DD
2940 PerlIO_printf(Perl_debug_log,
2941 "%*s failed, try continuation...\n",
3280af22 2942 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2943 );
2944 }
9041c2e3 2945 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2946 && !(PL_reg_flags & RF_warned)) {
3280af22 2947 PL_reg_flags |= RF_warned;
e476b1b5 2948 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2949 "Complex regular subexpression recursion",
2950 REG_INFTY - 1);
a0d0e21e
LW
2951 }
2952
2953 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2954 PL_regcc = cc->oldcc;
2ab05381
IZ
2955 if (PL_regcc)
2956 ln = PL_regcc->cur;
a0d0e21e 2957 if (regmatch(cc->next))
4633a7c4 2958 sayYES;
2ab05381
IZ
2959 if (PL_regcc)
2960 PL_regcc->cur = ln;
3280af22 2961 PL_regcc = cc;
4633a7c4 2962 cc->cur = n - 1;
c277df42 2963 cc->lastloc = lastloc;
4633a7c4 2964 sayNO;
a0d0e21e
LW
2965 }
2966 /* NOT REACHED */
9041c2e3 2967 case BRANCHJ:
c277df42
IZ
2968 next = scan + ARG(scan);
2969 if (next == scan)
2970 next = NULL;
2971 inner = NEXTOPER(NEXTOPER(scan));
2972 goto do_branch;
9041c2e3 2973 case BRANCH:
c277df42
IZ
2974 inner = NEXTOPER(scan);
2975 do_branch:
2976 {
c277df42
IZ
2977 c1 = OP(scan);
2978 if (OP(next) != c1) /* No choice. */
2979 next = inner; /* Avoid recursion. */
a0d0e21e 2980 else {
02db2b7b
IZ
2981 I32 lastparen = *PL_reglastparen;
2982 I32 unwind1;
2983 re_unwind_branch_t *uw;
2984
2985 /* Put unwinding data on stack */
2986 unwind1 = SSNEWt(1,re_unwind_branch_t);
2987 uw = SSPTRt(unwind1,re_unwind_branch_t);
2988 uw->prev = unwind;
2989 unwind = unwind1;
2990 uw->type = ((c1 == BRANCH)
2991 ? RE_UNWIND_BRANCH
2992 : RE_UNWIND_BRANCHJ);
2993 uw->lastparen = lastparen;
2994 uw->next = next;
2995 uw->locinput = locinput;
2996 uw->nextchr = nextchr;
2997#ifdef DEBUGGING
2998 uw->regindent = ++PL_regindent;
2999#endif
c277df42 3000
02db2b7b
IZ
3001 REGCP_SET(uw->lastcp);
3002
3003 /* Now go into the first branch */
3004 next = inner;
a687059c 3005 }
a0d0e21e
LW
3006 }
3007 break;
3008 case MINMOD:
3009 minmod = 1;
3010 break;
c277df42
IZ
3011 case CURLYM:
3012 {
00db4c45 3013 I32 l = 0;
c277df42 3014 CHECKPOINT lastcp;
9041c2e3 3015
c277df42
IZ
3016 /* We suppose that the next guy does not need
3017 backtracking: in particular, it is of constant length,
3018 and has no parenths to influence future backrefs. */
3019 ln = ARG1(scan); /* min to match */
3020 n = ARG2(scan); /* max to match */
c277df42
IZ
3021 paren = scan->flags;
3022 if (paren) {
3280af22
NIS
3023 if (paren > PL_regsize)
3024 PL_regsize = paren;
3025 if (paren > *PL_reglastparen)
3026 *PL_reglastparen = paren;
c277df42 3027 }
dc45a647 3028 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3029 if (paren)
3030 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3031 PL_reginput = locinput;
c277df42
IZ
3032 if (minmod) {
3033 minmod = 0;
3034 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3035 sayNO;
f31a99c8
HS
3036 /* if we matched something zero-length we don't need to
3037 backtrack - capturing parens are already defined, so
3038 the caveat in the maximal case doesn't apply
3039
3040 XXXX if ln == 0, we can redo this check first time
3041 through the following loop
3042 */
3043 if (ln && l == 0)
3044 n = ln; /* don't backtrack */
3280af22 3045 locinput = PL_reginput;
5f80c4cf
JP
3046 if (
3047 PL_regkind[(U8)OP(next)] == EXACT ||
74750237
JP
3048 OP(next) == OPEN ||
3049 OP(next) == CLOSE ||
5f80c4cf
JP
3050 OP(next) == EVAL
3051 ) {
3052 regnode *text_node = next;
3053
3054 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3055 NEXT_IMPT(text_node);
5f80c4cf
JP
3056
3057 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3058 c1 = c2 = -1000;
3059 }
3060 else {
3061 c1 = (U8)*STRING(text_node);
3062 if (OP(next) == EXACTF)
3063 c2 = PL_fold[c1];
3064 else if (OP(text_node) == EXACTFL)
3065 c2 = PL_fold_locale[c1];
3066 else
3067 c2 = c1;
3068 }
a0ed51b3
LW
3069 }
3070 else
c277df42 3071 c1 = c2 = -1000;
02db2b7b 3072 REGCP_SET(lastcp);
5f4b28b2 3073 /* This may be improved if l == 0. */
c277df42
IZ
3074 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3075 /* If it could work, try it. */
3076 if (c1 == -1000 ||
3280af22
NIS
3077 UCHARAT(PL_reginput) == c1 ||
3078 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3079 {
3080 if (paren) {
f31a99c8 3081 if (ln) {
cf93c79d
IZ
3082 PL_regstartp[paren] =
3083 HOPc(PL_reginput, -l) - PL_bostr;
3084 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3085 }
3086 else
cf93c79d 3087 PL_regendp[paren] = -1;
c277df42
IZ
3088 }
3089 if (regmatch(next))
3090 sayYES;
02db2b7b 3091 REGCP_UNWIND(lastcp);
c277df42
IZ
3092 }
3093 /* Couldn't or didn't -- move forward. */
3280af22 3094 PL_reginput = locinput;
c277df42
IZ
3095 if (regrepeat_hard(scan, 1, &l)) {
3096 ln++;
3280af22 3097 locinput = PL_reginput;
c277df42
IZ
3098 }
3099 else
3100 sayNO;
3101 }
a0ed51b3
LW
3102 }
3103 else {
c277df42 3104 n = regrepeat_hard(scan, n, &l);
f31a99c8
HS
3105 /* if we matched something zero-length we don't need to
3106 backtrack, unless the minimum count is zero and we
3107 are capturing the result - in that case the capture
3108 being defined or not may affect later execution
3109 */
3110 if (n != 0 && l == 0 && !(paren && ln == 0))
3111 ln = n; /* don't backtrack */
3280af22 3112 locinput = PL_reginput;
c277df42 3113 DEBUG_r(
5c0ca799 3114 PerlIO_printf(Perl_debug_log,
faccc32b 3115 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3116 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3117 (IV) n, (IV)l)
c277df42
IZ
3118 );
3119 if (n >= ln) {
5f80c4cf
JP
3120 if (
3121 PL_regkind[(U8)OP(next)] == EXACT ||
74750237
JP
3122 OP(next) == OPEN ||
3123 OP(next) == CLOSE ||
5f80c4cf
JP
3124 OP(next) == EVAL
3125 ) {
3126 regnode *text_node = next;
3127
3128 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3129 NEXT_IMPT(text_node);
5f80c4cf
JP
3130
3131 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3132 c1 = c2 = -1000;
3133 }
3134 else {
3135 c1 = (U8)*STRING(text_node);
3136 if (OP(text_node) == EXACTF)
3137 c2 = PL_fold[c1];
3138 else if (OP(text_node) == EXACTFL)
3139 c2 = PL_fold_locale[c1];
3140 else
3141 c2 = c1;
3142 }
a0ed51b3
LW
3143 }
3144 else
c277df42
IZ
3145 c1 = c2 = -1000;
3146 }
02db2b7b 3147 REGCP_SET(lastcp);
c277df42
IZ
3148 while (n >= ln) {
3149 /* If it could work, try it. */
3150 if (c1 == -1000 ||
3280af22
NIS
3151 UCHARAT(PL_reginput) == c1 ||
3152 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3153 {
3154 DEBUG_r(
c3464db5 3155 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3156 "%*s trying tail with n=%"IVdf"...\n",
3157 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3158 );
3159 if (paren) {
3160 if (n) {
cf93c79d
IZ
3161 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3162 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3163 }
a0ed51b3 3164 else
cf93c79d 3165 PL_regendp[paren] = -1;
c277df42 3166 }
a0ed51b3
LW
3167 if (regmatch(next))
3168 sayYES;
02db2b7b 3169 REGCP_UNWIND(lastcp);
a0ed51b3 3170 }
c277df42
IZ
3171 /* Couldn't or didn't -- back up. */
3172 n--;
dfe13c55 3173 locinput = HOPc(locinput, -l);
3280af22 3174 PL_reginput = locinput;
c277df42
IZ
3175 }
3176 }
3177 sayNO;
3178 break;
3179 }
3180 case CURLYN:
3181 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3182 if (paren > PL_regsize)
3183 PL_regsize = paren;
3184 if (paren > *PL_reglastparen)
3185 *PL_reglastparen = paren;
c277df42
IZ
3186 ln = ARG1(scan); /* min to match */
3187 n = ARG2(scan); /* max to match */
dc45a647 3188 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3189 goto repeat;
a0d0e21e 3190 case CURLY:
c277df42 3191 paren = 0;
a0d0e21e
LW
3192 ln = ARG1(scan); /* min to match */
3193 n = ARG2(scan); /* max to match */
dc45a647 3194 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3195 goto repeat;
3196 case STAR:
3197 ln = 0;
c277df42 3198 n = REG_INFTY;
a0d0e21e 3199 scan = NEXTOPER(scan);
c277df42 3200 paren = 0;
a0d0e21e
LW
3201 goto repeat;
3202 case PLUS:
c277df42
IZ
3203 ln = 1;
3204 n = REG_INFTY;
3205 scan = NEXTOPER(scan);
3206 paren = 0;
3207 repeat:
a0d0e21e
LW
3208 /*
3209 * Lookahead to avoid useless match attempts
3210 * when we know what character comes next.
3211 */
5f80c4cf
JP
3212
3213 /*
3214 * Used to only do .*x and .*?x, but now it allows
3215 * for )'s, ('s and (?{ ... })'s to be in the way
3216 * of the quantifier and the EXACT-like node. -- japhy
3217 */
3218
3219 if (
3220 PL_regkind[(U8)OP(next)] == EXACT ||
74750237
JP
3221 OP(next) == OPEN ||
3222 OP(next) == CLOSE ||
5f80c4cf
JP
3223 OP(next) == EVAL
3224 ) {
3225 U8 *s;
3226 regnode *text_node = next;
3227
3228 if (PL_regkind[(U8)OP(next)] != EXACT)
74750237 3229 NEXT_IMPT(text_node);
5f80c4cf
JP
3230
3231 if (PL_regkind[(U8)OP(text_node)] != EXACT) {
3232 c1 = c2 = -1000;
3233 }
3234 else {
3235 s = (U8*)STRING(text_node);
3236
3237 if (!UTF) {
3238 c2 = c1 = *s;
3239 if (OP(text_node) == EXACTF)
3240 c2 = PL_fold[c1];
3241 else if (OP(text_node) == EXACTFL)
3242 c2 = PL_fold_locale[c1];
1aa99e6b 3243 }
5f80c4cf
JP
3244 else { /* UTF */
3245 if (OP(text_node) == EXACTF) {
3246 c1 = to_utf8_lower(s);
3247 c2 = to_utf8_upper(s);
3248 }
3249 else {
3250 c2 = c1 = utf8_to_uvchr(s, NULL);
3251 }
1aa99e6b
IH
3252 }
3253 }
bbce6d69 3254 }
a0d0e21e 3255 else
bbce6d69 3256 c1 = c2 = -1000;
3280af22 3257 PL_reginput = locinput;
a0d0e21e 3258 if (minmod) {
c277df42 3259 CHECKPOINT lastcp;
a0d0e21e
LW
3260 minmod = 0;
3261 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3262 sayNO;
a0ed51b3 3263 locinput = PL_reginput;
02db2b7b 3264 REGCP_SET(lastcp);
0fe9bf95 3265 if (c1 != -1000) {
1aa99e6b 3266 char *e; /* Should not check after this */
0fe9bf95
IZ
3267 char *old = locinput;
3268
1aa99e6b 3269 if (n == REG_INFTY) {
0fe9bf95 3270 e = PL_regeol - 1;
1aa99e6b
IH
3271 if (do_utf8)
3272 while (UTF8_IS_CONTINUATION(*(U8*)e))
3273 e--;
3274 }
3275 else if (do_utf8) {
3276 int m = n - ln;
3277 for (e = locinput;
3278 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3279 e += UTF8SKIP(e);
3280 }
3281 else {
3282 e = locinput + n - ln;
3283 if (e >= PL_regeol)
3284 e = PL_regeol - 1;
3285 }
0fe9bf95 3286 while (1) {
1aa99e6b 3287 int count;
0fe9bf95 3288 /* Find place 'next' could work */
1aa99e6b
IH
3289 if (!do_utf8) {
3290 if (c1 == c2) {
3291 while (locinput <= e && *locinput != c1)
3292 locinput++;
3293 } else {
9041c2e3 3294 while (locinput <= e
1aa99e6b
IH
3295 && *locinput != c1
3296 && *locinput != c2)
3297 locinput++;
3298 }
3299 count = locinput - old;
3300 }
3301 else {
3302 STRLEN len;
3303 if (c1 == c2) {
3304 for (count = 0;
3305 locinput <= e &&
9041c2e3 3306 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3307 count++)
3308 locinput += len;
3309
3310 } else {
3311 for (count = 0; locinput <= e; count++) {
9041c2e3 3312 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3313 if (c == c1 || c == c2)
3314 break;
9041c2e3 3315 locinput += len;
1aa99e6b
IH
3316 }
3317 }
0fe9bf95 3318 }
9041c2e3 3319 if (locinput > e)
0fe9bf95
IZ
3320 sayNO;
3321 /* PL_reginput == old now */
3322 if (locinput != old) {
3323 ln = 1; /* Did some */
1aa99e6b 3324 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3325 sayNO;
3326 }
3327 /* PL_reginput == locinput now */
29d1e993 3328 TRYPAREN(paren, ln, locinput);
0fe9bf95 3329 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3330 REGCP_UNWIND(lastcp);
0fe9bf95 3331 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3332 old = locinput;
3333 if (do_utf8)
3334 locinput += UTF8SKIP(locinput);
3335 else
3336 locinput++;
0fe9bf95
IZ
3337 }
3338 }
3339 else
c277df42 3340 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3341 UV c;
3342 if (c1 != -1000) {
3343 if (do_utf8)
9041c2e3 3344 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3345 else
9041c2e3 3346 c = UCHARAT(PL_reginput);
2390ecbc
PP
3347 /* If it could work, try it. */
3348 if (c == c1 || c == c2)
3349 {
3350 TRYPAREN(paren, n, PL_reginput);
3351 REGCP_UNWIND(lastcp);
3352 }
1aa99e6b 3353 }
a0d0e21e 3354 /* If it could work, try it. */
2390ecbc 3355 else if (c1 == -1000)
bbce6d69 3356 {
29d1e993 3357 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3358 REGCP_UNWIND(lastcp);
bbce6d69 3359 }
c277df42 3360 /* Couldn't or didn't -- move forward. */
a0ed51b3 3361 PL_reginput = locinput;
a0d0e21e
LW
3362 if (regrepeat(scan, 1)) {
3363 ln++;
a0ed51b3
LW
3364 locinput = PL_reginput;
3365 }
3366 else
4633a7c4 3367 sayNO;
a0d0e21e
LW
3368 }
3369 }
3370 else {
c277df42 3371 CHECKPOINT lastcp;
a0d0e21e 3372 n = regrepeat(scan, n);
a0ed51b3 3373 locinput = PL_reginput;
22c35a8c 3374 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3375 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3376 ln = n; /* why back off? */
1aeab75a
GS
3377 /* ...because $ and \Z can match before *and* after
3378 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3379 We should back off by one in this case. */
3380 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3381 ln--;
3382 }
02db2b7b 3383 REGCP_SET(lastcp);
c277df42 3384 if (paren) {
8fa7f367 3385 UV c = 0;
c277df42 3386 while (n >= ln) {
1aa99e6b
IH
3387 if (c1 != -1000) {
3388 if (do_utf8)
9041c2e3 3389 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3390 else
9041c2e3 3391 c = UCHARAT(PL_reginput);
1aa99e6b 3392 }
c277df42 3393 /* If it could work, try it. */
1aa99e6b 3394 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3395 {
29d1e993 3396 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3397 REGCP_UNWIND(lastcp);
c277df42
IZ
3398 }
3399 /* Couldn't or didn't -- back up. */
3400 n--;
dfe13c55 3401 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3402 }
a0ed51b3
LW
3403 }
3404 else {
8fa7f367 3405 UV c = 0;
c277df42 3406 while (n >= ln) {
1aa99e6b
IH
3407 if (c1 != -1000) {
3408 if (do_utf8)
9041c2e3 3409 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3410 else
9041c2e3 3411 c = UCHARAT(PL_reginput);
1aa99e6b 3412 }
c277df42 3413 /* If it could work, try it. */
1aa99e6b 3414 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3415 {
29d1e993 3416 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3417 REGCP_UNWIND(lastcp);
c277df42
IZ
3418 }
3419 /* Couldn't or didn't -- back up. */
3420 n--;
dfe13c55 3421 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3422 }
a0d0e21e
LW
3423 }
3424 }
4633a7c4 3425 sayNO;
c277df42 3426 break;
a0d0e21e 3427 case END:
0f5d15d6
IZ
3428 if (PL_reg_call_cc) {
3429 re_cc_state *cur_call_cc = PL_reg_call_cc;
3430 CURCUR *cctmp = PL_regcc;
3431 regexp *re = PL_reg_re;
3432 CHECKPOINT cp, lastcp;
3433
3434 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3435 REGCP_SET(lastcp);
0f5d15d6
IZ
3436 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3437 the caller. */
3438 PL_reginput = locinput; /* Make position available to
3439 the callcc. */
3440 cache_re(PL_reg_call_cc->re);
3441 PL_regcc = PL_reg_call_cc->cc;
3442 PL_reg_call_cc = PL_reg_call_cc->prev;
3443 if (regmatch(cur_call_cc->node)) {
3444 PL_reg_call_cc = cur_call_cc;
3445 regcpblow(cp);
3446 sayYES;
3447 }
02db2b7b 3448 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3449 regcppop();
3450 PL_reg_call_cc = cur_call_cc;
3451 PL_regcc = cctmp;
3452 PL_reg_re = re;
3453 cache_re(re);
3454
3455 DEBUG_r(
3456 PerlIO_printf(Perl_debug_log,
3457 "%*s continuation failed...\n",
3458 REPORT_CODE_OFF+PL_regindent*2, "")
3459 );
7821416a 3460 sayNO_SILENT;
0f5d15d6 3461 }
7821416a
IZ
3462 if (locinput < PL_regtill) {
3463 DEBUG_r(PerlIO_printf(Perl_debug_log,
3464 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3465 PL_colors[4],
3466 (long)(locinput - PL_reg_starttry),
3467 (long)(PL_regtill - PL_reg_starttry),
3468 PL_colors[5]));
3469 sayNO_FINAL; /* Cannot match: too short. */
3470 }
3471 PL_reginput = locinput; /* put where regtry can find it */
3472 sayYES_FINAL; /* Success! */
7e5428c5 3473 case SUCCEED:
3280af22 3474 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3475 sayYES_LOUD; /* Success! */
c277df42
IZ
3476 case SUSPEND:
3477 n = 1;
9fe1d20c 3478 PL_reginput = locinput;
9041c2e3 3479 goto do_ifmatch;
a0d0e21e 3480 case UNLESSM:
c277df42 3481 n = 0;
a0ed51b3 3482 if (scan->flags) {
efb30f32
HS
3483 s = HOPBACKc(locinput, scan->flags);
3484 if (!s)
3485 goto say_yes;
3486 PL_reginput = s;
a0ed51b3
LW
3487 }
3488 else
3489 PL_reginput = locinput;
c277df42
IZ
3490 goto do_ifmatch;
3491 case IFMATCH:
3492 n = 1;
a0ed51b3 3493 if (scan->flags) {
efb30f32
HS
3494 s = HOPBACKc(locinput, scan->flags);
3495 if (!s)
3496 goto say_no;
3497 PL_reginput = s;
a0ed51b3
LW
3498 }
3499 else
3500 PL_reginput = locinput;
3501
c277df42 3502 do_ifmatch:
c277df42
IZ
3503 inner = NEXTOPER(NEXTOPER(scan));
3504 if (regmatch(inner) != n) {
3505 say_no:
3506 if (logical) {
3507 logical = 0;
3508 sw = 0;
3509 goto do_longjump;
a0ed51b3
LW
3510 }
3511 else
c277df42
IZ
3512 sayNO;
3513 }
3514 say_yes:
3515 if (logical) {
3516 logical = 0;
3517 sw = 1;
3518 }
fe44a5e8 3519 if (OP(scan) == SUSPEND) {
3280af22 3520 locinput = PL_reginput;
565764a8 3521 nextchr = UCHARAT(locinput);
fe44a5e8 3522 }
c277df42
IZ
3523 /* FALL THROUGH. */
3524 case LONGJMP:
3525 do_longjump:
3526 next = scan + ARG(scan);
3527 if (next == scan)
3528 next = NULL;
a0d0e21e
LW
3529 break;
3530 default:
b900a521 3531 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3532 PTR2UV(scan), OP(scan));
cea2e8a9 3533 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3534 }
02db2b7b 3535 reenter:
a0d0e21e
LW
3536 scan = next;
3537 }
a687059c 3538
a0d0e21e
LW
3539 /*
3540 * We get here only if there's trouble -- normally "case END" is
3541 * the terminating point.
3542 */
cea2e8a9 3543 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3544 /*NOTREACHED*/
4633a7c4
LW
3545 sayNO;
3546
7821416a
IZ
3547yes_loud:
3548 DEBUG_r(
3549 PerlIO_printf(Perl_debug_log,
3550 "%*s %scould match...%s\n",
3551 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3552 );
3553 goto yes;
3554yes_final:
3555 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3556 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3557yes:
3558#ifdef DEBUGGING
3280af22 3559 PL_regindent--;
4633a7c4 3560#endif
02db2b7b
IZ
3561
3562#if 0 /* Breaks $^R */
3563 if (unwind)
3564 regcpblow(firstcp);
3565#endif
4633a7c4
LW
3566 return 1;
3567
3568no:
7821416a
IZ
3569 DEBUG_r(
3570 PerlIO_printf(Perl_debug_log,
3571 "%*s %sfailed...%s\n",
3572 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3573 );
3574 goto do_no;
3575no_final:
3576do_no:
02db2b7b
IZ
3577 if (unwind) {
3578 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3579
3580 switch (uw->type) {
3581 case RE_UNWIND_BRANCH:
3582 case RE_UNWIND_BRANCHJ:
3583 {
3584 re_unwind_branch_t *uwb = &(uw->branch);
3585 I32 lastparen = uwb->lastparen;
9041c2e3 3586
02db2b7b
IZ
3587 REGCP_UNWIND(uwb->lastcp);
3588 for (n = *PL_reglastparen; n > lastparen; n--)
3589 PL_regendp[n] = -1;
3590 *PL_reglastparen = n;
3591 scan = next = uwb->next;
9041c2e3
NIS
3592 if ( !scan ||
3593 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3594 ? BRANCH : BRANCHJ) ) { /* Failure */
3595 unwind = uwb->prev;
3596#ifdef DEBUGGING
3597 PL_regindent--;
3598#endif
3599 goto do_no;
3600 }
3601 /* Have more choice yet. Reuse the same uwb. */
3602 /*SUPPRESS 560*/
3603 if ((n = (uwb->type == RE_UNWIND_BRANCH
3604 ? NEXT_OFF(next) : ARG(next))))
3605 next += n;
3606 else
3607 next = NULL; /* XXXX Needn't unwinding in this case... */
3608 uwb->next = next;
3609 next = NEXTOPER(scan);
3610 if (uwb->type == RE_UNWIND_BRANCHJ)
3611 next = NEXTOPER(next);
3612 locinput = uwb->locinput;
3613 nextchr = uwb->nextchr;
3614#ifdef DEBUGGING
3615 PL_regindent = uwb->regindent;
3616#endif
3617
3618 goto reenter;
3619 }
3620 /* NOT REACHED */
3621 default:
3622 Perl_croak(aTHX_ "regexp unwind memory corruption");
3623 }
3624 /* NOT REACHED */
3625 }
4633a7c4 3626#ifdef DEBUGGING
3280af22 3627 PL_regindent--;
4633a7c4 3628#endif
a0d0e21e 3629 return 0;
a687059c
LW
3630}
3631
3632/*
3633 - regrepeat - repeatedly match something simple, report how many
3634 */
3635/*
3636 * [This routine now assumes that it will only match on things of length 1.
3637 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3638 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3639 */
76e3520e 3640STATIC I32
cea2e8a9 3641S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3642{
a0d0e21e 3643 register char *scan;
a0d0e21e 3644 register I32 c;
3280af22 3645 register char *loceol = PL_regeol;
a0ed51b3 3646 register I32 hardcount = 0;
53c4c00c 3647 register bool do_utf8 = PL_reg_match_utf8;
a0d0e21e 3648
3280af22 3649 scan = PL_reginput;
c277df42 3650 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3651 loceol = scan + max;
a0d0e21e 3652 switch (OP(p)) {
22c35a8c 3653 case REG_ANY:
1aa99e6b 3654 if (do_utf8) {
ffc61ed2 3655 loceol = PL_regeol;
1aa99e6b 3656 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3657 scan += UTF8SKIP(scan);
3658 hardcount++;
3659 }
3660 } else {
3661 while (scan < loceol && *scan != '\n')
3662 scan++;
a0ed51b3
LW
3663 }
3664 break;
ffc61ed2 3665 case SANY:
3baa4c62 3666 scan = loceol;
a0ed51b3 3667 break;
f33976b4
DB
3668 case CANY:
3669 scan = loceol;
3670 break;
bbce6d69 3671 case EXACT: /* length of string is 1 */
cd439c50 3672 c = (U8)*STRING(p);
bbce6d69 3673 while (scan < loceol && UCHARAT(scan) == c)
3674 scan++;
3675 break;
3676 case EXACTF: /* length of string is 1 */
cd439c50 3677 c = (U8)*STRING(p);
bbce6d69 3678 while (scan < loceol &&
22c35a8c 3679 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3680 scan++;
3681 break;
3682 case EXACTFL: /* length of string is 1 */
3280af22 3683 PL_reg_flags |= RF_tainted;
cd439c50 3684 c = (U8)*STRING(p);
bbce6d69 3685 while (scan < loceol &&
22c35a8c 3686 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3687 scan++;
3688 break;
3689 case ANYOF:
ffc61ed2
JH
3690 if (do_utf8) {
3691 loceol = PL_regeol;
1aa99e6b
IH
3692 while (hardcount < max && scan < loceol &&
3693 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3694 scan += UTF8SKIP(scan);
3695 hardcount++;
3696 }
3697 } else {
3698 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3699 scan++;
3700 }
a0d0e21e
LW
3701 break;
3702 case ALNUM:
1aa99e6b 3703 if (do_utf8) {
ffc61ed2 3704 loceol = PL_regeol;
8269fa76 3705 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3706 while (hardcount < max && scan < loceol &&
3568d838 3707 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3708 scan += UTF8SKIP(scan);
3709 hardcount++;
3710 }
3711 } else {
3712 while (scan < loceol && isALNUM(*scan))
3713 scan++;
a0ed51b3
LW
3714 }
3715 break;
bbce6d69 3716 case ALNUML:
3280af22 3717 PL_reg_flags |= RF_tainted;
1aa99e6b 3718 if (do_utf8) {
ffc61ed2 3719 loceol = PL_regeol;
1aa99e6b
IH
3720 while (hardcount < max && scan < loceol &&
3721 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3722 scan += UTF8SKIP(scan);
3723 hardcount++;
3724 }
3725 } else {
3726 while (scan < loceol && isALNUM_LC(*scan))
3727 scan++;
a0ed51b3
LW
3728 }
3729 break;
a0d0e21e 3730 case NALNUM:
1aa99e6b 3731 if (do_utf8) {
ffc61ed2 3732 loceol = PL_regeol;
8269fa76 3733 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3734 while (hardcount < max && scan < loceol &&
3568d838 3735 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3736 scan += UTF8SKIP(scan);
3737 hardcount++;
3738 }
3739 } else {
3740 while (scan < loceol && !isALNUM(*scan))
3741 scan++;
a0ed51b3
LW
3742 }
3743 break;
bbce6d69 3744 case NALNUML:
3280af22 3745 PL_reg_flags |= RF_tainted;
1aa99e6b 3746 if (do_utf8) {
ffc61ed2 3747 loceol = PL_regeol;
1aa99e6b
IH
3748 while (hardcount < max && scan < loceol &&
3749 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3750 scan += UTF8SKIP(scan);
3751 hardcount++;
3752 }
3753 } else {
3754 while (scan < loceol && !isALNUM_LC(*scan))
3755 scan++;
a0ed51b3
LW
3756 }
3757 break;
a0d0e21e 3758 case SPACE:
1aa99e6b 3759 if (do_utf8) {
ffc61ed2 3760 loceol = PL_regeol;
8269fa76 3761 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3762 while (hardcount < max && scan < loceol &&
3568d838
JH
3763 (*scan == ' ' ||
3764 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3765 scan += UTF8SKIP(scan);
3766 hardcount++;
3767 }
3768 } else {
3769 while (scan < loceol && isSPACE(*scan))
3770 scan++;
a0ed51b3
LW
3771 }
3772 break;
bbce6d69 3773 case SPACEL:
3280af22 3774 PL_reg_flags |= RF_tainted;
1aa99e6b 3775 if (do_utf8) {
ffc61ed2 3776 loceol = PL_regeol;
1aa99e6b 3777 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3778 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3779 scan += UTF8SKIP(scan);
3780 hardcount++;
3781 }
3782 } else {
3783 while (scan < loceol && isSPACE_LC(*scan))
3784 scan++;
a0ed51b3
LW
3785 }
3786 break;
a0d0e21e 3787 case NSPACE:
1aa99e6b 3788 if (do_utf8) {
ffc61ed2 3789 loceol = PL_regeol;
8269fa76 3790 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3791 while (hardcount < max && scan < loceol &&
3568d838
JH
3792 !(*scan == ' ' ||
3793 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3794 scan += UTF8SKIP(scan);
3795 hardcount++;
3796 }
3797 } else {
3798 while (scan < loceol && !isSPACE(*scan))
3799 scan++;
3800 break;
a0ed51b3 3801 }
bbce6d69 3802 case NSPACEL:
3280af22 3803 PL_reg_flags |= RF_tainted;
1aa99e6b 3804 if (do_utf8) {
ffc61ed2 3805 loceol = PL_regeol;
1aa99e6b 3806 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3807 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3808 scan += UTF8SKIP(scan);
3809 hardcount++;
3810 }
3811 } else {
3812 while (scan < loceol && !isSPACE_LC(*scan))
3813 scan++;
a0ed51b3
LW
3814 }
3815 break;
a0d0e21e 3816 case DIGIT:
1aa99e6b 3817 if (do_utf8) {
ffc61ed2 3818 loceol = PL_regeol;
8269fa76 3819 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3820 while (hardcount < max && scan < loceol &&
3568d838 3821 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3822 scan += UTF8SKIP(scan);
3823 hardcount++;
3824 }
3825 } else {
3826 while (scan < loceol && isDIGIT(*scan))
3827 scan++;
a0ed51b3
LW
3828 }
3829 break;
a0d0e21e 3830 case NDIGIT:
1aa99e6b 3831 if (do_utf8) {
ffc61ed2 3832 loceol = PL_regeol;
8269fa76 3833 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3834 while (hardcount < max && scan < loceol &&
3568d838 3835 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3836 scan += UTF8SKIP(scan);
3837 hardcount++;
3838 }
3839 } else {
3840 while (scan < loceol && !isDIGIT(*scan))
3841 scan++;
a0ed51b3
LW
3842 }
3843 break;
a0d0e21e
LW
3844 default: /* Called on something of 0 width. */
3845 break; /* So match right here or not at all. */
3846 }
a687059c 3847
a0ed51b3
LW
3848 if (hardcount)
3849 c = hardcount;
3850 else
3851 c = scan - PL_reginput;
3280af22 3852 PL_reginput = scan;
a687059c 3853
9041c2e3 3854 DEBUG_r(
c277df42
IZ
3855 {
3856 SV *prop = sv_newmortal();
3857
3858 regprop(prop, p);
9041c2e3
NIS
3859 PerlIO_printf(Perl_debug_log,
3860 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3861 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3862 });
9041c2e3 3863
a0d0e21e 3864 return(c);
a687059c
LW
3865}
3866
3867/*
c277df42 3868 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3869 *
c277df42
IZ
3870 * The repeater is supposed to have constant length.
3871 */
3872
76e3520e 3873STATIC I32
cea2e8a9 3874S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3875{
b7953727 3876 register char *scan = Nullch;
c277df42 3877 register char *start;
3280af22 3878 register char *loceol = PL_regeol;
a0ed51b3 3879 I32 l = 0;
708e3b05 3880 I32 count = 0, res = 1;
a0ed51b3
LW
3881
3882 if (!max)
3883 return 0;
c277df42 3884
3280af22 3885 start = PL_reginput;
53c4c00c 3886 if (PL_reg_match_utf8) {
708e3b05 3887 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3888 if (!count++) {
3889 l = 0;
3890 while (start < PL_reginput) {
3891 l++;
3892 start += UTF8SKIP(start);
3893 }
3894 *lp = l;
3895 if (l == 0)
3896 return max;
3897 }
3898 if (count == max)
3899 return count;
3900 }
3901 }
3902 else {
708e3b05 3903 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3904 if (!count++) {
3905 *lp = l = PL_reginput - start;
3906 if (max != REG_INFTY && l*max < loceol - scan)
3907 loceol = scan + l*max;
3908 if (l == 0)
3909 return max;
c277df42
IZ
3910 }
3911 }
3912 }
708e3b05 3913 if (!res)
3280af22 3914 PL_reginput = scan;
9041c2e3 3915
a0ed51b3 3916 return count;
c277df42
IZ
3917}
3918
3919/*
ffc61ed2
JH
3920- regclass_swash - prepare the utf8 swash
3921*/
3922
3923SV *
3924Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3925{
3926 SV *sw = NULL;
3927 SV *si = NULL;
3928
3929 if (PL_regdata && PL_regdata->count) {
3930 U32 n = ARG(node);
3931
3932 if (PL_regdata->what[n] == 's') {
3933 SV *rv = (SV*)PL_regdata->data[n];
3934 AV *av = (AV*)SvRV((SV*)rv);
3935 SV **a;
9041c2e3 3936
ffc61ed2
JH
3937 si = *av_fetch(av, 0, FALSE);
3938 a = av_fetch(av, 1, FALSE);
9041c2e3 3939
ffc61ed2
JH
3940 if (a)
3941 sw = *a;
3942 else if (si && doinit) {
3943 sw = swash_init("utf8", "", si, 1, 0);
3944 (void)av_store(av, 1, sw);
3945 }
3946 }
3947 }
3948
3949 if (initsvp)
3950 *initsvp = si;
3951
3952 return sw;
3953}
3954
3955/*
cb8d8820 3956 - reginclass - determine if a character falls into a character class
bbce6d69 3957 */
3958
76e3520e 3959STATIC bool
ffc61ed2 3960S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3961{
ffc61ed2 3962 char flags = ANYOF_FLAGS(n);
bbce6d69 3963 bool match = FALSE;
1aa99e6b 3964 UV c;
3568d838 3965 STRLEN len = 0;
1aa99e6b 3966
3568d838 3967 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3968
ffc61ed2
JH
3969 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3970 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3971 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3972 match = TRUE;
bbce6d69 3973 }
3568d838 3974 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3975 match = TRUE;
ffc61ed2
JH
3976 if (!match) {
3977 SV *sw = regclass_swash(n, TRUE, 0);
3978
3979 if (sw) {
3568d838 3980 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3981 match = TRUE;
3982 else if (flags & ANYOF_FOLD) {
3983 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3984
ffc61ed2
JH
3985 if (flags & ANYOF_LOCALE) {
3986 PL_reg_flags |= RF_tainted;
9041c2e3 3987 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3988 }
1aa99e6b 3989 else
9041c2e3 3990 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3991 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3992 match = TRUE;
3993 }
3994 }
bbce6d69 3995 }
3996 }
1aa99e6b 3997 if (!match && c < 256) {
ffc61ed2
JH
3998 if (ANYOF_BITMAP_TEST(n, c))
3999 match = TRUE;
4000 else if (flags & ANYOF_FOLD) {
3568d838 4001 I32 f;
a0ed51b3 4002
ffc61ed2
JH
4003 if (flags & ANYOF_LOCALE) {
4004 PL_reg_flags |= RF_tainted;
4005 f = PL_fold_locale[c];
4006 }
4007 else
4008 f = PL_fold[c];
4009 if (f != c && ANYOF_BITMAP_TEST(n, f))
4010 match = TRUE;
4011 }
4012
4013 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 4014 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
4015 if (
4016 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
4017 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
4018 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
4019 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
4020 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
4021 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
4022 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
4023 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
4024 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
4025 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
4026 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
4027 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
4028 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
4029 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
4030 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
4031 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
4032 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
4033 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
4034 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
4035 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
4036 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
4037 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
4038 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
4039 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
4040 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
4041 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
4042 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
4043 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
4044 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
4045 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
4046 ) /* How's that for a conditional? */
4047 {
4048 match = TRUE;
4049 }
a0ed51b3 4050 }
a0ed51b3
LW
4051 }
4052
a0ed51b3
LW
4053 return (flags & ANYOF_INVERT) ? !match : match;
4054}
161b471a 4055
dfe13c55 4056STATIC U8 *
cea2e8a9 4057S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 4058{
1aa99e6b
IH
4059 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4060}
4061
4062STATIC U8 *
4063S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 4064{
a0ed51b3 4065 if (off >= 0) {
1aa99e6b 4066 while (off-- && s < lim) {
ffc61ed2 4067 /* XXX could check well-formedness here */
a0ed51b3 4068 s += UTF8SKIP(s);
ffc61ed2 4069 }
a0ed51b3
LW
4070 }
4071 else {
4072 while (off++) {
1aa99e6b 4073 if (s > lim) {
a0ed51b3 4074 s--;
ffc61ed2 4075 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4076 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4077 s--;
ffc61ed2
JH
4078 }
4079 /* XXX could check well-formedness here */
a0ed51b3
LW
4080 }
4081 }
4082 }
4083 return s;
4084}
161b471a 4085
dfe13c55 4086STATIC U8 *
1aa99e6b 4087S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4088{
1aa99e6b
IH
4089 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4090}
4091
4092STATIC U8 *
4093S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4094{
4095 if (off >= 0) {
1aa99e6b 4096 while (off-- && s < lim) {
ffc61ed2 4097 /* XXX could check well-formedness here */
a0ed51b3 4098 s += UTF8SKIP(s);
ffc61ed2 4099 }
a0ed51b3
LW
4100 if (off >= 0)
4101 return 0;
4102 }
4103 else {
4104 while (off++) {
1aa99e6b 4105 if (s > lim) {
a0ed51b3 4106 s--;
ffc61ed2 4107 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4108 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4109 s--;
ffc61ed2
JH
4110 }
4111 /* XXX could check well-formedness here */
a0ed51b3
LW
4112 }
4113 else
4114 break;
4115 }
4116 if (off <= 0)
4117 return 0;
4118 }
4119 return s;
4120}
51371543 4121
51371543 4122static void
acfe0abc 4123restore_pos(pTHX_ void *arg)
51371543 4124{
51371543
GS
4125 if (PL_reg_eval_set) {
4126 if (PL_reg_oldsaved) {
4127 PL_reg_re->subbeg = PL_reg_oldsaved;
4128 PL_reg_re->sublen = PL_reg_oldsavedlen;
4129 RX_MATCH_COPIED_on(PL_reg_re);
4130 }
4131 PL_reg_magic->mg_len = PL_reg_oldpos;
4132 PL_reg_eval_set = 0;
4133 PL_curpm = PL_reg_oldcurpm;
4134 }
4135}