This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [patch] ignore PL_curpm in PL_regex_padav cleanup
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
e49a9654
IH
147 if (paren_elems_to_push < 0)
148 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
149
a01268b5 150#define REGCP_OTHER_ELEMS 6
b1ce53c5 151 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 152 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 153/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
154 SSPUSHINT(PL_regendp[p]);
155 SSPUSHINT(PL_regstartp[p]);
3280af22 156 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
157 SSPUSHINT(p);
158 }
b1ce53c5 159/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
160 SSPUSHINT(PL_regsize);
161 SSPUSHINT(*PL_reglastparen);
a01268b5 162 SSPUSHINT(*PL_reglastcloseparen);
3280af22 163 SSPUSHPTR(PL_reginput);
41123dfd
JH
164#define REGCP_FRAME_ELEMS 2
165/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
166 * are needed for the regexp context stack bookkeeping. */
167 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 168 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 169
a0d0e21e
LW
170 return retval;
171}
172
c277df42 173/* These are needed since we do not localize EVAL nodes: */
02db2b7b 174# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 175 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 176 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 177
02db2b7b 178# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 179 PerlIO_printf(Perl_debug_log, \
faccc32b 180 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 181 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 182
76e3520e 183STATIC char *
cea2e8a9 184S_regcppop(pTHX)
a0d0e21e 185{
b1ce53c5 186 I32 i;
a0d0e21e
LW
187 U32 paren = 0;
188 char *input;
cf93c79d 189 I32 tmps;
b1ce53c5
JH
190
191 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 192 i = SSPOPINT;
b1ce53c5
JH
193 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
194 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 195 input = (char *) SSPOPPTR;
a01268b5 196 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
197 *PL_reglastparen = SSPOPINT;
198 PL_regsize = SSPOPINT;
b1ce53c5
JH
199
200 /* Now restore the parentheses context. */
41123dfd
JH
201 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
202 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 203 paren = (U32)SSPOPINT;
3280af22 204 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
205 PL_regstartp[paren] = SSPOPINT;
206 tmps = SSPOPINT;
3280af22
NIS
207 if (paren <= *PL_reglastparen)
208 PL_regendp[paren] = tmps;
c277df42 209 DEBUG_r(
c3464db5 210 PerlIO_printf(Perl_debug_log,
b900a521 211 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 212 (UV)paren, (IV)PL_regstartp[paren],
b900a521 213 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 214 (IV)PL_regendp[paren],
3280af22 215 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 216 );
a0d0e21e 217 }
c277df42 218 DEBUG_r(
3280af22 219 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 220 PerlIO_printf(Perl_debug_log,
faccc32b
JH
221 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
222 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
223 }
224 );
daf18116 225#if 1
dafc8851
JH
226 /* It would seem that the similar code in regtry()
227 * already takes care of this, and in fact it is in
228 * a better location to since this code can #if 0-ed out
229 * but the code in regtry() is needed or otherwise tests
230 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
231 * (as of patchlevel 7877) will fail. Then again,
232 * this code seems to be necessary or otherwise
233 * building DynaLoader will fail:
234 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
235 * --jhi */
3280af22
NIS
236 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
237 if (paren > PL_regsize)
cf93c79d
IZ
238 PL_regstartp[paren] = -1;
239 PL_regendp[paren] = -1;
a0d0e21e 240 }
dafc8851 241#endif
a0d0e21e
LW
242 return input;
243}
244
0f5d15d6 245STATIC char *
cea2e8a9 246S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
247{
248 I32 tmp = PL_savestack_ix;
249
250 PL_savestack_ix = ss;
251 regcppop();
252 PL_savestack_ix = tmp;
942e002e 253 return Nullch;
0f5d15d6
IZ
254}
255
256typedef struct re_cc_state
257{
258 I32 ss;
259 regnode *node;
260 struct re_cc_state *prev;
261 CURCUR *cc;
262 regexp *re;
263} re_cc_state;
264
02db2b7b 265#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 266
29d1e993
HS
267#define TRYPAREN(paren, n, input) { \
268 if (paren) { \
269 if (n) { \
270 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
271 PL_regendp[paren] = input - PL_bostr; \
272 } \
273 else \
274 PL_regendp[paren] = -1; \
275 } \
276 if (regmatch(next)) \
277 sayYES; \
278 if (paren && n) \
279 PL_regendp[paren] = -1; \
280}
281
282
a687059c 283/*
e50aee73 284 * pregexec and friends
a687059c
LW
285 */
286
287/*
c277df42 288 - pregexec - match a regexp against a string
a687059c 289 */
c277df42 290I32
864dbfa3 291Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 292 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
293/* strend: pointer to null at end of string */
294/* strbeg: real beginning of string */
295/* minend: end of match must be >=minend after stringarg. */
296/* nosave: For optimizations. */
297{
298 return
9041c2e3 299 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
300 nosave ? 0 : REXEC_COPY_STR);
301}
0f5d15d6
IZ
302
303STATIC void
cea2e8a9 304S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
305{
306 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
307#ifdef DEBUGGING
308 PL_regprogram = prog->program;
309#endif
310 PL_regnpar = prog->nparens;
9041c2e3
NIS
311 PL_regdata = prog->data;
312 PL_reg_re = prog;
0f5d15d6 313}
22e551b9 314
9041c2e3 315/*
cad2e5aa
JH
316 * Need to implement the following flags for reg_anch:
317 *
318 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
319 * USE_INTUIT_ML
320 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
321 * INTUIT_AUTORITATIVE_ML
322 * INTUIT_ONCE_NOML - Intuit can match in one location only.
323 * INTUIT_ONCE_ML
324 *
325 * Another flag for this function: SECOND_TIME (so that float substrs
326 * with giant delta may be not rechecked).
327 */
328
329/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
330
2c2d71f5 331/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
332 Otherwise, only SvCUR(sv) is used to get strbeg. */
333
334/* XXXX We assume that strpos is strbeg unless sv. */
335
6eb5f6b9
JH
336/* XXXX Some places assume that there is a fixed substring.
337 An update may be needed if optimizer marks as "INTUITable"
338 RExen without fixed substrings. Similarly, it is assumed that
339 lengths of all the strings are no more than minlen, thus they
340 cannot come from lookahead.
341 (Or minlen should take into account lookahead.) */
342
2c2d71f5
JH
343/* A failure to find a constant substring means that there is no need to make
344 an expensive call to REx engine, thus we celebrate a failure. Similarly,
345 finding a substring too deep into the string means that less calls to
30944b6d
IZ
346 regtry() should be needed.
347
348 REx compiler's optimizer found 4 possible hints:
349 a) Anchored substring;
350 b) Fixed substring;
351 c) Whether we are anchored (beginning-of-line or \G);
352 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 353 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
354 string which does not contradict any of them.
355 */
2c2d71f5 356
6eb5f6b9
JH
357/* Most of decisions we do here should have been done at compile time.
358 The nodes of the REx which we used for the search should have been
359 deleted from the finite automaton. */
360
cad2e5aa
JH
361char *
362Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
363 char *strend, U32 flags, re_scream_pos_data *data)
364{
b7953727 365 register I32 start_shift = 0;
cad2e5aa 366 /* Should be nonnegative! */
b7953727 367 register I32 end_shift = 0;
2c2d71f5
JH
368 register char *s;
369 register SV *check;
a1933d95 370 char *strbeg;
cad2e5aa
JH
371 char *t;
372 I32 ml_anch;
6eb5f6b9 373 register char *other_last = Nullch; /* other substr checked before this */
b7953727 374 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
375#ifdef DEBUGGING
376 char *i_strpos = strpos;
377#endif
cad2e5aa
JH
378
379 DEBUG_r( if (!PL_colorset) reginitcolors() );
380 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 381 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
382 PL_colors[4],PL_colors[5],PL_colors[0],
383 prog->precomp,
384 PL_colors[1],
385 (strlen(prog->precomp) > 60 ? "..." : ""),
386 PL_colors[0],
b900a521 387 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
388 strpos, PL_colors[1],
389 (strend - strpos > 60 ? "..." : ""))
390 );
391
1aa99e6b
IH
392 if (prog->reganch & ROPT_UTF8)
393 PL_reg_flags |= RF_utf8;
394
395 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 396 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 397 goto fail;
2c2d71f5 398 }
a1933d95 399 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 400 PL_regeol = strend;
653099ff 401 check = prog->check_substr;
2c2d71f5 402 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
403 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
404 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 405 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 406
7e25d62c
JH
407 if (!ml_anch) {
408 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
409 | ROPT_IMPLICIT)) /* not a real BOL */
410 /* SvCUR is not set on references: SvRV and SvPVX overlap */
411 && sv && !SvROK(sv)
412 && (strpos != strbeg)) {
413 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
414 goto fail;
415 }
416 if (prog->check_offset_min == prog->check_offset_max &&
417 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 418 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
419 I32 slen;
420
1aa99e6b 421 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
422 if (SvTAIL(check)) {
423 slen = SvCUR(check); /* >= 1 */
cad2e5aa 424
9041c2e3 425 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
426 || (strend - s == slen && strend[-1] != '\n')) {
427 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
428 goto fail_finish;
cad2e5aa
JH
429 }
430 /* Now should match s[0..slen-2] */
431 slen--;
653099ff 432 if (slen && (*SvPVX(check) != *s
cad2e5aa 433 || (slen > 1
653099ff 434 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
435 report_neq:
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
437 goto fail_finish;
438 }
cad2e5aa 439 }
653099ff
GS
440 else if (*SvPVX(check) != *s
441 || ((slen = SvCUR(check)) > 1
442 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
443 goto report_neq;
444 goto success_at_start;
7e25d62c 445 }
cad2e5aa 446 }
2c2d71f5 447 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 448 s = strpos;
2c2d71f5 449 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 450 end_shift = prog->minlen - start_shift -
653099ff 451 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 452 if (!ml_anch) {
653099ff
GS
453 I32 end = prog->check_offset_max + CHR_SVLEN(check)
454 - (SvTAIL(check) != 0);
1aa99e6b 455 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
456
457 if (end_shift < eshift)
458 end_shift = eshift;
459 }
cad2e5aa 460 }
2c2d71f5 461 else { /* Can match at random position */
cad2e5aa
JH
462 ml_anch = 0;
463 s = strpos;
2c2d71f5
JH
464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
465 /* Should be nonnegative! */
466 end_shift = prog->minlen - start_shift -
653099ff 467 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
468 }
469
2c2d71f5 470#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 471 if (end_shift < 0)
6bbae5e6 472 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
473#endif
474
2c2d71f5
JH
475 restart:
476 /* Find a possible match in the region s..strend by looking for
477 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 478 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
479 I32 p = -1; /* Internal iterator of scream. */
480 I32 *pp = data ? data->scream_pos : &p;
481
2c2d71f5
JH
482 if (PL_screamfirst[BmRARE(check)] >= 0
483 || ( BmRARE(check) == '\n'
484 && (BmPREVIOUS(check) == SvCUR(check) - 1)
485 && SvTAIL(check) ))
9041c2e3 486 s = screaminstr(sv, check,
2c2d71f5 487 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 488 else
2c2d71f5 489 goto fail_finish;
cad2e5aa
JH
490 if (data)
491 *data->scream_olds = s;
492 }
3baa4c62
JH
493 else if (prog->reganch & ROPT_SANY_SEEN)
494 s = fbm_instr((U8*)(s + start_shift),
495 (U8*)(strend - end_shift),
496 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 497 else
1aa99e6b
IH
498 s = fbm_instr(HOP3(s, start_shift, strend),
499 HOP3(strend, -end_shift, strbeg),
2c2d71f5 500 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
501
502 /* Update the count-of-usability, remove useless subpatterns,
503 unshift s. */
2c2d71f5
JH
504
505 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
506 (s ? "Found" : "Did not find"),
507 ((check == prog->anchored_substr) ? "anchored" : "floating"),
508 PL_colors[0],
7b0972df
JH
509 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
510 SvPVX(check),
2c2d71f5
JH
511 PL_colors[1], (SvTAIL(check) ? "$" : ""),
512 (s ? " at offset " : "...\n") ) );
513
514 if (!s)
515 goto fail_finish;
516
6eb5f6b9
JH
517 check_at = s;
518
2c2d71f5 519 /* Finish the diagnostic message */
30944b6d 520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
521
522 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
523 Start with the other substr.
524 XXXX no SCREAM optimization yet - and a very coarse implementation
525 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
526 *always* match. Probably should be marked during compile...
527 Probably it is right to do no SCREAM here...
528 */
529
530 if (prog->float_substr && prog->anchored_substr) {
30944b6d 531 /* Take into account the "other" substring. */
2c2d71f5
JH
532 /* XXXX May be hopelessly wrong for UTF... */
533 if (!other_last)
6eb5f6b9 534 other_last = strpos;
2c2d71f5 535 if (check == prog->float_substr) {
30944b6d
IZ
536 do_other_anchored:
537 {
1aa99e6b 538 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
539 char *s1 = s;
540
2c2d71f5
JH
541 t = s - prog->check_offset_max;
542 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
543 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 544 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 545 && t > strpos)))
30944b6d 546 /* EMPTY */;
2c2d71f5
JH
547 else
548 t = strpos;
1aa99e6b 549 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
550 if (t < other_last) /* These positions already checked */
551 t = other_last;
1aa99e6b 552 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
553 if (last < last1)
554 last1 = last;
555 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
556 /* On end-of-str: see comment below. */
557 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
558 HOP3(HOP3(last1, prog->anchored_offset, strend)
559 + SvCUR(prog->anchored_substr),
560 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
561 prog->anchored_substr,
562 PL_multiline ? FBMrf_MULTILINE : 0);
563 DEBUG_r(PerlIO_printf(Perl_debug_log,
564 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
565 (s ? "Found" : "Contradicts"),
566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(prog->anchored_substr)
568 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
569 SvPVX(prog->anchored_substr),
570 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
571 if (!s) {
572 if (last1 >= last2) {
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", giving up...\n"));
575 goto fail_finish;
576 }
577 DEBUG_r(PerlIO_printf(Perl_debug_log,
578 ", trying floating at offset %ld...\n",
1aa99e6b
IH
579 (long)(HOP3c(s1, 1, strend) - i_strpos)));
580 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
581 s = HOP3c(last, 1, strend);
2c2d71f5
JH
582 goto restart;
583 }
584 else {
585 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 586 (long)(s - i_strpos)));
1aa99e6b
IH
587 t = HOP3c(s, -prog->anchored_offset, strbeg);
588 other_last = HOP3c(s, 1, strend);
30944b6d 589 s = s1;
2c2d71f5
JH
590 if (t == strpos)
591 goto try_at_start;
2c2d71f5
JH
592 goto try_at_offset;
593 }
30944b6d 594 }
2c2d71f5
JH
595 }
596 else { /* Take into account the floating substring. */
597 char *last, *last1;
598 char *s1 = s;
599
1aa99e6b
IH
600 t = HOP3c(s, -start_shift, strbeg);
601 last1 = last =
602 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
603 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
604 last = HOP3c(t, prog->float_max_offset, strend);
605 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
606 if (s < other_last)
607 s = other_last;
2c2d71f5
JH
608 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
609 /* fbm_instr() takes into account exact value of end-of-str
610 if the check is SvTAIL(ed). Since false positives are OK,
611 and end-of-str is not later than strend we are OK. */
612 s = fbm_instr((unsigned char*)s,
613 (unsigned char*)last + SvCUR(prog->float_substr)
614 - (SvTAIL(prog->float_substr)!=0),
615 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
616 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
617 (s ? "Found" : "Contradicts"),
618 PL_colors[0],
7b0972df
JH
619 (int)(SvCUR(prog->float_substr)
620 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
621 SvPVX(prog->float_substr),
622 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
623 if (!s) {
624 if (last1 == last) {
625 DEBUG_r(PerlIO_printf(Perl_debug_log,
626 ", giving up...\n"));
627 goto fail_finish;
628 }
629 DEBUG_r(PerlIO_printf(Perl_debug_log,
630 ", trying anchored starting at offset %ld...\n",
30944b6d 631 (long)(s1 + 1 - i_strpos)));
803ff556 632 other_last = last;
1aa99e6b 633 s = HOP3c(t, 1, strend);
2c2d71f5
JH
634 goto restart;
635 }
636 else {
637 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 638 (long)(s - i_strpos)));
803ff556 639 other_last = s; /* Fix this later. --Hugo */
30944b6d 640 s = s1;
2c2d71f5
JH
641 if (t == strpos)
642 goto try_at_start;
2c2d71f5
JH
643 goto try_at_offset;
644 }
645 }
cad2e5aa 646 }
2c2d71f5
JH
647
648 t = s - prog->check_offset_max;
2c2d71f5
JH
649 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
650 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
651 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
652 && t > strpos))) {
2c2d71f5
JH
653 /* Fixed substring is found far enough so that the match
654 cannot start at strpos. */
655 try_at_offset:
cad2e5aa 656 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
657 /* Eventually fbm_*() should handle this, but often
658 anchored_offset is not 0, so this check will not be wasted. */
659 /* XXXX In the code below we prefer to look for "^" even in
660 presence of anchored substrings. And we search even
661 beyond the found float position. These pessimizations
662 are historical artefacts only. */
663 find_anchor:
2c2d71f5 664 while (t < strend - prog->minlen) {
cad2e5aa 665 if (*t == '\n') {
4ee3650e 666 if (t < check_at - prog->check_offset_min) {
30944b6d 667 if (prog->anchored_substr) {
4ee3650e
GS
668 /* Since we moved from the found position,
669 we definitely contradict the found anchored
30944b6d
IZ
670 substr. Due to the above check we do not
671 contradict "check" substr.
672 Thus we can arrive here only if check substr
673 is float. Redo checking for "other"=="fixed".
674 */
9041c2e3 675 strpos = t + 1;
30944b6d
IZ
676 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
677 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
678 goto do_other_anchored;
679 }
4ee3650e
GS
680 /* We don't contradict the found floating substring. */
681 /* XXXX Why not check for STCLASS? */
cad2e5aa 682 s = t + 1;
2c2d71f5 683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 684 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
685 goto set_useful;
686 }
4ee3650e
GS
687 /* Position contradicts check-string */
688 /* XXXX probably better to look for check-string
689 than for "\n", so one should lower the limit for t? */
690 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 691 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 692 other_last = strpos = s = t + 1;
cad2e5aa
JH
693 goto restart;
694 }
695 t++;
696 }
2c2d71f5
JH
697 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
698 PL_colors[0],PL_colors[1]));
699 goto fail_finish;
cad2e5aa 700 }
f5952150
GS
701 else {
702 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
703 PL_colors[0],PL_colors[1]));
704 }
cad2e5aa
JH
705 s = t;
706 set_useful:
2c2d71f5 707 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
708 }
709 else {
f5952150 710 /* The found string does not prohibit matching at strpos,
2c2d71f5 711 - no optimization of calling REx engine can be performed,
f5952150
GS
712 unless it was an MBOL and we are not after MBOL,
713 or a future STCLASS check will fail this. */
2c2d71f5
JH
714 try_at_start:
715 /* Even in this situation we may use MBOL flag if strpos is offset
716 wrt the start of the string. */
05b4157f 717 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 718 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
719 /* May be due to an implicit anchor of m{.*foo} */
720 && !(prog->reganch & ROPT_IMPLICIT))
721 {
cad2e5aa
JH
722 t = strpos;
723 goto find_anchor;
724 }
30944b6d 725 DEBUG_r( if (ml_anch)
f5952150
GS
726 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
727 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 728 );
2c2d71f5 729 success_at_start:
30944b6d 730 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 731 && prog->check_substr /* Could be deleted already */
cad2e5aa 732 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
733 && prog->check_substr == prog->float_substr)
734 {
cad2e5aa 735 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 736 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
737 SvREFCNT_dec(prog->check_substr);
738 prog->check_substr = Nullsv; /* disable */
739 prog->float_substr = Nullsv; /* clear */
5e39e1e5 740 check = Nullsv; /* abort */
cad2e5aa 741 s = strpos;
3cf5c195
IZ
742 /* XXXX This is a remnant of the old implementation. It
743 looks wasteful, since now INTUIT can use many
6eb5f6b9 744 other heuristics. */
cad2e5aa
JH
745 prog->reganch &= ~RE_USE_INTUIT;
746 }
747 else
748 s = strpos;
749 }
750
6eb5f6b9
JH
751 /* Last resort... */
752 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
753 if (prog->regstclass) {
754 /* minlen == 0 is possible if regstclass is \b or \B,
755 and the fixed substr is ''$.
756 Since minlen is already taken into account, s+1 is before strend;
757 accidentally, minlen >= 1 guaranties no false positives at s + 1
758 even for \b or \B. But (minlen? 1 : 0) below assumes that
759 regstclass does not come from lookahead... */
760 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
761 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 762 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 763 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 764 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 765 : 1);
6eb5f6b9 766 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
767 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
768 : (prog->float_substr
769 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
770 cl_l, strend)
771 : strend);
a1933d95 772 char *startpos = strbeg;
6eb5f6b9
JH
773
774 t = s;
76384e4a 775 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 776 PL_regdata = prog->data;
76384e4a
GS
777 PL_bostr = startpos;
778 }
6eb5f6b9
JH
779 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
780 if (!s) {
781#ifdef DEBUGGING
b7953727 782 char *what = 0;
6eb5f6b9
JH
783#endif
784 if (endpos == strend) {
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Could not match STCLASS...\n") );
787 goto fail;
788 }
66e933ab
GS
789 DEBUG_r( PerlIO_printf(Perl_debug_log,
790 "This position contradicts STCLASS...\n") );
653099ff
GS
791 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
792 goto fail;
6eb5f6b9
JH
793 /* Contradict one of substrings */
794 if (prog->anchored_substr) {
6eb5f6b9
JH
795 if (prog->anchored_substr == check) {
796 DEBUG_r( what = "anchored" );
797 hop_and_restart:
1aa99e6b 798 s = HOP3c(t, 1, strend);
66e933ab
GS
799 if (s + start_shift + end_shift > strend) {
800 /* XXXX Should be taken into account earlier? */
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Could not match STCLASS...\n") );
803 goto fail;
804 }
5e39e1e5
HS
805 if (!check)
806 goto giveup;
6eb5f6b9 807 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 808 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
809 what, (long)(s + start_shift - i_strpos)) );
810 goto restart;
811 }
66e933ab 812 /* Have both, check_string is floating */
6eb5f6b9
JH
813 if (t + start_shift >= check_at) /* Contradicts floating=check */
814 goto retry_floating_check;
815 /* Recheck anchored substring, but not floating... */
9041c2e3 816 s = check_at;
5e39e1e5
HS
817 if (!check)
818 goto giveup;
6eb5f6b9 819 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 820 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
821 (long)(other_last - i_strpos)) );
822 goto do_other_anchored;
823 }
60e71179
GS
824 /* Another way we could have checked stclass at the
825 current position only: */
826 if (ml_anch) {
827 s = t = t + 1;
5e39e1e5
HS
828 if (!check)
829 goto giveup;
60e71179 830 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
831 "Looking for /%s^%s/m starting at offset %ld...\n",
832 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 833 goto try_at_offset;
66e933ab 834 }
60e71179
GS
835 if (!prog->float_substr) /* Could have been deleted */
836 goto fail;
6eb5f6b9
JH
837 /* Check is floating subtring. */
838 retry_floating_check:
839 t = check_at - start_shift;
840 DEBUG_r( what = "floating" );
841 goto hop_and_restart;
842 }
b7953727
JH
843 if (t != s) {
844 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 845 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
846 (long)(t - i_strpos), (long)(s - i_strpos))
847 );
848 }
849 else {
850 DEBUG_r(PerlIO_printf(Perl_debug_log,
851 "Does not contradict STCLASS...\n");
852 );
853 }
6eb5f6b9 854 }
5e39e1e5
HS
855 giveup:
856 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
857 PL_colors[4], (check ? "Guessed" : "Giving up"),
858 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 859 return s;
2c2d71f5
JH
860
861 fail_finish: /* Substring not found */
66e933ab
GS
862 if (prog->check_substr) /* could be removed already */
863 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 864 fail:
2c2d71f5 865 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
866 PL_colors[4],PL_colors[5]));
867 return Nullch;
868}
9661b544 869
6eb5f6b9 870/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
871STATIC char *
872S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 873{
6eb5f6b9
JH
874 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
875 char *m;
d8093b23
G
876 STRLEN ln;
877 unsigned int c1;
878 unsigned int c2;
6eb5f6b9
JH
879 char *e;
880 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 881 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 882
6eb5f6b9
JH
883 /* We know what class it must start with. */
884 switch (OP(c)) {
6eb5f6b9 885 case ANYOF:
a0ed51b3 886 while (s < strend) {
ffc61ed2 887 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
888 if (tmp && (norun || regtry(prog, s)))
889 goto got_it;
890 else
891 tmp = doevery;
a0ed51b3 892 }
6eb5f6b9
JH
893 else
894 tmp = 1;
ffc61ed2 895 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 896 }
6eb5f6b9
JH
897 break;
898 case EXACTF:
899 m = STRING(c);
900 ln = STR_LEN(c);
1aa99e6b
IH
901 if (UTF) {
902 c1 = to_utf8_lower((U8*)m);
903 c2 = to_utf8_upper((U8*)m);
904 }
905 else {
906 c1 = *(U8*)m;
907 c2 = PL_fold[c1];
908 }
6eb5f6b9
JH
909 goto do_exactf;
910 case EXACTFL:
911 m = STRING(c);
912 ln = STR_LEN(c);
d8093b23 913 c1 = *(U8*)m;
6eb5f6b9
JH
914 c2 = PL_fold_locale[c1];
915 do_exactf:
916 e = strend - ln;
b3c9acc1 917
6eb5f6b9
JH
918 if (norun && e < s)
919 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
920
921 if (do_utf8) {
922 STRLEN len;
923 if (c1 == c2)
924 while (s <= e) {
9041c2e3 925 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
926 && regtry(prog, s) )
927 goto got_it;
928 s += len;
929 }
930 else
931 while (s <= e) {
9041c2e3 932 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
933 if ( (c == c1 || c == c2) && regtry(prog, s) )
934 goto got_it;
935 s += len;
936 }
937 }
938 else {
939 if (c1 == c2)
940 while (s <= e) {
941 if ( *(U8*)s == c1
942 && (ln == 1 || !(OP(c) == EXACTF
943 ? ibcmp(s, m, ln)
944 : ibcmp_locale(s, m, ln)))
945 && (norun || regtry(prog, s)) )
946 goto got_it;
947 s++;
948 }
949 else
950 while (s <= e) {
951 if ( (*(U8*)s == c1 || *(U8*)s == c2)
952 && (ln == 1 || !(OP(c) == EXACTF
953 ? ibcmp(s, m, ln)
954 : ibcmp_locale(s, m, ln)))
955 && (norun || regtry(prog, s)) )
956 goto got_it;
957 s++;
958 }
b3c9acc1
IZ
959 }
960 break;
bbce6d69 961 case BOUNDL:
3280af22 962 PL_reg_flags |= RF_tainted;
bbce6d69 963 /* FALL THROUGH */
a0d0e21e 964 case BOUND:
ffc61ed2 965 if (do_utf8) {
12d33761 966 if (s == PL_bostr)
ffc61ed2
JH
967 tmp = '\n';
968 else {
1aa99e6b 969 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 970
0064a8a9
JH
971 if (s > (char*)r)
972 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
973 }
974 tmp = ((OP(c) == BOUND ?
9041c2e3 975 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 976 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
977 while (s < strend) {
978 if (tmp == !(OP(c) == BOUND ?
3568d838 979 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
980 isALNUM_LC_utf8((U8*)s)))
981 {
982 tmp = !tmp;
983 if ((norun || regtry(prog, s)))
984 goto got_it;
985 }
986 s += UTF8SKIP(s);
a687059c 987 }
a0d0e21e 988 }
667bb95a 989 else {
12d33761 990 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
991 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
992 while (s < strend) {
993 if (tmp ==
994 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
995 tmp = !tmp;
996 if ((norun || regtry(prog, s)))
997 goto got_it;
998 }
999 s++;
a0ed51b3 1000 }
a0ed51b3 1001 }
6eb5f6b9 1002 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1003 goto got_it;
1004 break;
bbce6d69 1005 case NBOUNDL:
3280af22 1006 PL_reg_flags |= RF_tainted;
bbce6d69 1007 /* FALL THROUGH */
a0d0e21e 1008 case NBOUND:
ffc61ed2 1009 if (do_utf8) {
12d33761 1010 if (s == PL_bostr)
ffc61ed2
JH
1011 tmp = '\n';
1012 else {
1aa99e6b 1013 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1014
0064a8a9
JH
1015 if (s > (char*)r)
1016 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1017 }
1018 tmp = ((OP(c) == NBOUND ?
9041c2e3 1019 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1020 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1021 while (s < strend) {
1022 if (tmp == !(OP(c) == NBOUND ?
3568d838 1023 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1024 isALNUM_LC_utf8((U8*)s)))
1025 tmp = !tmp;
1026 else if ((norun || regtry(prog, s)))
1027 goto got_it;
1028 s += UTF8SKIP(s);
1029 }
a0d0e21e 1030 }
667bb95a 1031 else {
12d33761 1032 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1033 tmp = ((OP(c) == NBOUND ?
1034 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1035 while (s < strend) {
1036 if (tmp ==
1037 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1038 tmp = !tmp;
1039 else if ((norun || regtry(prog, s)))
1040 goto got_it;
1041 s++;
1042 }
a0ed51b3 1043 }
6eb5f6b9 1044 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1045 goto got_it;
1046 break;
a0d0e21e 1047 case ALNUM:
ffc61ed2 1048 if (do_utf8) {
8269fa76 1049 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1050 while (s < strend) {
3568d838 1051 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1052 if (tmp && (norun || regtry(prog, s)))
1053 goto got_it;
1054 else
1055 tmp = doevery;
1056 }
bbce6d69 1057 else
ffc61ed2
JH
1058 tmp = 1;
1059 s += UTF8SKIP(s);
bbce6d69 1060 }
bbce6d69 1061 }
ffc61ed2
JH
1062 else {
1063 while (s < strend) {
1064 if (isALNUM(*s)) {
1065 if (tmp && (norun || regtry(prog, s)))
1066 goto got_it;
1067 else
1068 tmp = doevery;
1069 }
a0ed51b3 1070 else
ffc61ed2
JH
1071 tmp = 1;
1072 s++;
a0ed51b3 1073 }
a0ed51b3
LW
1074 }
1075 break;
bbce6d69 1076 case ALNUML:
3280af22 1077 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1078 if (do_utf8) {
1079 while (s < strend) {
1080 if (isALNUM_LC_utf8((U8*)s)) {
1081 if (tmp && (norun || regtry(prog, s)))
1082 goto got_it;
1083 else
1084 tmp = doevery;
1085 }
a687059c 1086 else
ffc61ed2
JH
1087 tmp = 1;
1088 s += UTF8SKIP(s);
a0d0e21e 1089 }
a0d0e21e 1090 }
ffc61ed2
JH
1091 else {
1092 while (s < strend) {
1093 if (isALNUM_LC(*s)) {
1094 if (tmp && (norun || regtry(prog, s)))
1095 goto got_it;
1096 else
1097 tmp = doevery;
1098 }
a0ed51b3 1099 else
ffc61ed2
JH
1100 tmp = 1;
1101 s++;
a0ed51b3 1102 }
a0ed51b3
LW
1103 }
1104 break;
a0d0e21e 1105 case NALNUM:
ffc61ed2 1106 if (do_utf8) {
8269fa76 1107 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1108 while (s < strend) {
3568d838 1109 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1110 if (tmp && (norun || regtry(prog, s)))
1111 goto got_it;
1112 else
1113 tmp = doevery;
1114 }
bbce6d69 1115 else
ffc61ed2
JH
1116 tmp = 1;
1117 s += UTF8SKIP(s);
bbce6d69 1118 }
bbce6d69 1119 }
ffc61ed2
JH
1120 else {
1121 while (s < strend) {
1122 if (!isALNUM(*s)) {
1123 if (tmp && (norun || regtry(prog, s)))
1124 goto got_it;
1125 else
1126 tmp = doevery;
1127 }
a0ed51b3 1128 else
ffc61ed2
JH
1129 tmp = 1;
1130 s++;
a0ed51b3 1131 }
a0ed51b3
LW
1132 }
1133 break;
bbce6d69 1134 case NALNUML:
3280af22 1135 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1136 if (do_utf8) {
1137 while (s < strend) {
1138 if (!isALNUM_LC_utf8((U8*)s)) {
1139 if (tmp && (norun || regtry(prog, s)))
1140 goto got_it;
1141 else
1142 tmp = doevery;
1143 }
a687059c 1144 else
ffc61ed2
JH
1145 tmp = 1;
1146 s += UTF8SKIP(s);
a687059c 1147 }
a0d0e21e 1148 }
ffc61ed2
JH
1149 else {
1150 while (s < strend) {
1151 if (!isALNUM_LC(*s)) {
1152 if (tmp && (norun || regtry(prog, s)))
1153 goto got_it;
1154 else
1155 tmp = doevery;
1156 }
a0ed51b3 1157 else
ffc61ed2
JH
1158 tmp = 1;
1159 s++;
a0ed51b3 1160 }
a0ed51b3
LW
1161 }
1162 break;
a0d0e21e 1163 case SPACE:
ffc61ed2 1164 if (do_utf8) {
8269fa76 1165 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1166 while (s < strend) {
3568d838 1167 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1168 if (tmp && (norun || regtry(prog, s)))
1169 goto got_it;
1170 else
1171 tmp = doevery;
1172 }
a0d0e21e 1173 else
ffc61ed2
JH
1174 tmp = 1;
1175 s += UTF8SKIP(s);
2304df62 1176 }
a0d0e21e 1177 }
ffc61ed2
JH
1178 else {
1179 while (s < strend) {
1180 if (isSPACE(*s)) {
1181 if (tmp && (norun || regtry(prog, s)))
1182 goto got_it;
1183 else
1184 tmp = doevery;
1185 }
a0ed51b3 1186 else
ffc61ed2
JH
1187 tmp = 1;
1188 s++;
a0ed51b3 1189 }
a0ed51b3
LW
1190 }
1191 break;
bbce6d69 1192 case SPACEL:
3280af22 1193 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1194 if (do_utf8) {
1195 while (s < strend) {
1196 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1197 if (tmp && (norun || regtry(prog, s)))
1198 goto got_it;
1199 else
1200 tmp = doevery;
1201 }
bbce6d69 1202 else
ffc61ed2
JH
1203 tmp = 1;
1204 s += UTF8SKIP(s);
bbce6d69 1205 }
bbce6d69 1206 }
ffc61ed2
JH
1207 else {
1208 while (s < strend) {
1209 if (isSPACE_LC(*s)) {
1210 if (tmp && (norun || regtry(prog, s)))
1211 goto got_it;
1212 else
1213 tmp = doevery;
1214 }
a0ed51b3 1215 else
ffc61ed2
JH
1216 tmp = 1;
1217 s++;
a0ed51b3 1218 }
a0ed51b3
LW
1219 }
1220 break;
a0d0e21e 1221 case NSPACE:
ffc61ed2 1222 if (do_utf8) {
8269fa76 1223 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1224 while (s < strend) {
3568d838 1225 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1226 if (tmp && (norun || regtry(prog, s)))
1227 goto got_it;
1228 else
1229 tmp = doevery;
1230 }
a0d0e21e 1231 else
ffc61ed2
JH
1232 tmp = 1;
1233 s += UTF8SKIP(s);
a687059c 1234 }
a0d0e21e 1235 }
ffc61ed2
JH
1236 else {
1237 while (s < strend) {
1238 if (!isSPACE(*s)) {
1239 if (tmp && (norun || regtry(prog, s)))
1240 goto got_it;
1241 else
1242 tmp = doevery;
1243 }
a0ed51b3 1244 else
ffc61ed2
JH
1245 tmp = 1;
1246 s++;
a0ed51b3 1247 }
a0ed51b3
LW
1248 }
1249 break;
bbce6d69 1250 case NSPACEL:
3280af22 1251 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1252 if (do_utf8) {
1253 while (s < strend) {
1254 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1255 if (tmp && (norun || regtry(prog, s)))
1256 goto got_it;
1257 else
1258 tmp = doevery;
1259 }
bbce6d69 1260 else
ffc61ed2
JH
1261 tmp = 1;
1262 s += UTF8SKIP(s);
bbce6d69 1263 }
bbce6d69 1264 }
ffc61ed2
JH
1265 else {
1266 while (s < strend) {
1267 if (!isSPACE_LC(*s)) {
1268 if (tmp && (norun || regtry(prog, s)))
1269 goto got_it;
1270 else
1271 tmp = doevery;
1272 }
a0ed51b3 1273 else
ffc61ed2
JH
1274 tmp = 1;
1275 s++;
a0ed51b3 1276 }
a0ed51b3
LW
1277 }
1278 break;
a0d0e21e 1279 case DIGIT:
ffc61ed2 1280 if (do_utf8) {
8269fa76 1281 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1282 while (s < strend) {
3568d838 1283 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1284 if (tmp && (norun || regtry(prog, s)))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
a0d0e21e 1289 else
ffc61ed2
JH
1290 tmp = 1;
1291 s += UTF8SKIP(s);
2b69d0c2 1292 }
a0d0e21e 1293 }
ffc61ed2
JH
1294 else {
1295 while (s < strend) {
1296 if (isDIGIT(*s)) {
1297 if (tmp && (norun || regtry(prog, s)))
1298 goto got_it;
1299 else
1300 tmp = doevery;
1301 }
a0ed51b3 1302 else
ffc61ed2
JH
1303 tmp = 1;
1304 s++;
a0ed51b3 1305 }
a0ed51b3
LW
1306 }
1307 break;
b8c5462f
JH
1308 case DIGITL:
1309 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1310 if (do_utf8) {
1311 while (s < strend) {
1312 if (isDIGIT_LC_utf8((U8*)s)) {
1313 if (tmp && (norun || regtry(prog, s)))
1314 goto got_it;
1315 else
1316 tmp = doevery;
1317 }
b8c5462f 1318 else
ffc61ed2
JH
1319 tmp = 1;
1320 s += UTF8SKIP(s);
b8c5462f 1321 }
b8c5462f 1322 }
ffc61ed2
JH
1323 else {
1324 while (s < strend) {
1325 if (isDIGIT_LC(*s)) {
1326 if (tmp && (norun || regtry(prog, s)))
1327 goto got_it;
1328 else
1329 tmp = doevery;
1330 }
b8c5462f 1331 else
ffc61ed2
JH
1332 tmp = 1;
1333 s++;
b8c5462f 1334 }
b8c5462f
JH
1335 }
1336 break;
a0d0e21e 1337 case NDIGIT:
ffc61ed2 1338 if (do_utf8) {
8269fa76 1339 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1340 while (s < strend) {
3568d838 1341 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1342 if (tmp && (norun || regtry(prog, s)))
1343 goto got_it;
1344 else
1345 tmp = doevery;
1346 }
a0d0e21e 1347 else
ffc61ed2
JH
1348 tmp = 1;
1349 s += UTF8SKIP(s);
a687059c 1350 }
a0d0e21e 1351 }
ffc61ed2
JH
1352 else {
1353 while (s < strend) {
1354 if (!isDIGIT(*s)) {
1355 if (tmp && (norun || regtry(prog, s)))
1356 goto got_it;
1357 else
1358 tmp = doevery;
1359 }
a0ed51b3 1360 else
ffc61ed2
JH
1361 tmp = 1;
1362 s++;
a0ed51b3 1363 }
a0ed51b3
LW
1364 }
1365 break;
b8c5462f
JH
1366 case NDIGITL:
1367 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1368 if (do_utf8) {
1369 while (s < strend) {
1370 if (!isDIGIT_LC_utf8((U8*)s)) {
1371 if (tmp && (norun || regtry(prog, s)))
1372 goto got_it;
1373 else
1374 tmp = doevery;
1375 }
b8c5462f 1376 else
ffc61ed2
JH
1377 tmp = 1;
1378 s += UTF8SKIP(s);
b8c5462f 1379 }
a0ed51b3 1380 }
ffc61ed2
JH
1381 else {
1382 while (s < strend) {
1383 if (!isDIGIT_LC(*s)) {
1384 if (tmp && (norun || regtry(prog, s)))
1385 goto got_it;
1386 else
1387 tmp = doevery;
1388 }
cf93c79d 1389 else
ffc61ed2
JH
1390 tmp = 1;
1391 s++;
b8c5462f 1392 }
b8c5462f
JH
1393 }
1394 break;
b3c9acc1 1395 default:
3c3eec57
GS
1396 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1397 break;
d6a28714 1398 }
6eb5f6b9
JH
1399 return 0;
1400 got_it:
1401 return s;
1402}
1403
1404/*
1405 - regexec_flags - match a regexp against a string
1406 */
1407I32
1408Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1409 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1410/* strend: pointer to null at end of string */
1411/* strbeg: real beginning of string */
1412/* minend: end of match must be >=minend after stringarg. */
1413/* data: May be used for some additional optimizations. */
1414/* nosave: For optimizations. */
1415{
6eb5f6b9
JH
1416 register char *s;
1417 register regnode *c;
1418 register char *startpos = stringarg;
6eb5f6b9
JH
1419 I32 minlen; /* must match at least this many chars */
1420 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1421 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1422 constant substr. */ /* CC */
1423 I32 end_shift = 0; /* Same for the end. */ /* CC */
1424 I32 scream_pos = -1; /* Internal iterator of scream. */
1425 char *scream_olds;
1426 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1427 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1428
1429 PL_regcc = 0;
1430
1431 cache_re(prog);
1432#ifdef DEBUGGING
aea4f609 1433 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1434#endif
1435
1436 /* Be paranoid... */
1437 if (prog == NULL || startpos == NULL) {
1438 Perl_croak(aTHX_ "NULL regexp parameter");
1439 return 0;
1440 }
1441
1442 minlen = prog->minlen;
1aa99e6b 1443 if (do_utf8) {
3baa4c62
JH
1444 if (!(prog->reganch & ROPT_SANY_SEEN))
1445 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1446 }
1447 else {
1448 if (strend - startpos < minlen) goto phooey;
1449 }
6eb5f6b9 1450
6eb5f6b9
JH
1451 /* Check validity of program. */
1452 if (UCHARAT(prog->program) != REG_MAGIC) {
1453 Perl_croak(aTHX_ "corrupted regexp program");
1454 }
1455
1456 PL_reg_flags = 0;
1457 PL_reg_eval_set = 0;
1458 PL_reg_maxiter = 0;
1459
1460 if (prog->reganch & ROPT_UTF8)
1461 PL_reg_flags |= RF_utf8;
1462
1463 /* Mark beginning of line for ^ and lookbehind. */
1464 PL_regbol = startpos;
1465 PL_bostr = strbeg;
1466 PL_reg_sv = sv;
1467
1468 /* Mark end of line for $ (and such) */
1469 PL_regeol = strend;
1470
1471 /* see how far we have to get to not match where we matched before */
1472 PL_regtill = startpos+minend;
1473
1474 /* We start without call_cc context. */
1475 PL_reg_call_cc = 0;
1476
1477 /* If there is a "must appear" string, look for it. */
1478 s = startpos;
1479
1480 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1481 MAGIC *mg;
1482
1483 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1484 PL_reg_ganch = startpos;
1485 else if (sv && SvTYPE(sv) >= SVt_PVMG
1486 && SvMAGIC(sv)
14befaf4
DM
1487 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1488 && mg->mg_len >= 0) {
6eb5f6b9
JH
1489 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1490 if (prog->reganch & ROPT_ANCH_GPOS) {
1491 if (s > PL_reg_ganch)
1492 goto phooey;
1493 s = PL_reg_ganch;
1494 }
1495 }
1496 else /* pos() not defined */
1497 PL_reg_ganch = strbeg;
1498 }
1499
699c3c34
JH
1500 if (do_utf8 == (UTF!=0) &&
1501 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1502 re_scream_pos_data d;
1503
1504 d.scream_olds = &scream_olds;
1505 d.scream_pos = &scream_pos;
1506 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1507 if (!s)
1508 goto phooey; /* not present */
1509 }
1510
1511 DEBUG_r( if (!PL_colorset) reginitcolors() );
1512 DEBUG_r(PerlIO_printf(Perl_debug_log,
1513 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1514 PL_colors[4],PL_colors[5],PL_colors[0],
1515 prog->precomp,
1516 PL_colors[1],
1517 (strlen(prog->precomp) > 60 ? "..." : ""),
1518 PL_colors[0],
1519 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1520 startpos, PL_colors[1],
1521 (strend - startpos > 60 ? "..." : ""))
1522 );
1523
1524 /* Simplest case: anchored match need be tried only once. */
1525 /* [unless only anchor is BOL and multiline is set] */
1526 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1527 if (s == startpos && regtry(prog, startpos))
1528 goto got_it;
1529 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1530 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1531 {
1532 char *end;
1533
1534 if (minlen)
1535 dontbother = minlen - 1;
1aa99e6b 1536 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1537 /* for multiline we only have to try after newlines */
1538 if (prog->check_substr) {
1539 if (s == startpos)
1540 goto after_try;
1541 while (1) {
1542 if (regtry(prog, s))
1543 goto got_it;
1544 after_try:
1545 if (s >= end)
1546 goto phooey;
1547 if (prog->reganch & RE_USE_INTUIT) {
1548 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1549 if (!s)
1550 goto phooey;
1551 }
1552 else
1553 s++;
1554 }
1555 } else {
1556 if (s > startpos)
1557 s--;
1558 while (s < end) {
1559 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1560 if (regtry(prog, s))
1561 goto got_it;
1562 }
1563 }
1564 }
1565 }
1566 goto phooey;
1567 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1568 if (regtry(prog, PL_reg_ganch))
1569 goto got_it;
1570 goto phooey;
1571 }
1572
1573 /* Messy cases: unanchored match. */
9041c2e3 1574 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1575 /* we have /x+whatever/ */
1576 /* it must be a one character string (XXXX Except UTF?) */
1577 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1578#ifdef DEBUGGING
1579 int did_match = 0;
1580#endif
1581
1aa99e6b 1582 if (do_utf8) {
6eb5f6b9
JH
1583 while (s < strend) {
1584 if (*s == ch) {
bf93d4cc 1585 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1586 if (regtry(prog, s)) goto got_it;
1587 s += UTF8SKIP(s);
1588 while (s < strend && *s == ch)
1589 s += UTF8SKIP(s);
1590 }
1591 s += UTF8SKIP(s);
1592 }
1593 }
1594 else {
1595 while (s < strend) {
1596 if (*s == ch) {
bf93d4cc 1597 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1598 if (regtry(prog, s)) goto got_it;
1599 s++;
1600 while (s < strend && *s == ch)
1601 s++;
1602 }
1603 s++;
1604 }
1605 }
b7953727 1606 DEBUG_r(if (!did_match)
bf93d4cc 1607 PerlIO_printf(Perl_debug_log,
b7953727
JH
1608 "Did not find anchored character...\n")
1609 );
6eb5f6b9
JH
1610 }
1611 /*SUPPRESS 560*/
1aa99e6b
IH
1612 else if (do_utf8 == (UTF!=0) &&
1613 (prog->anchored_substr != Nullsv
9041c2e3 1614 || (prog->float_substr != Nullsv
1aa99e6b 1615 && prog->float_max_offset < strend - s))) {
9041c2e3 1616 SV *must = prog->anchored_substr
6eb5f6b9 1617 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1618 I32 back_max =
6eb5f6b9 1619 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1620 I32 back_min =
6eb5f6b9 1621 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1622 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1623 -(I32)(CHR_SVLEN(must)
1aa99e6b 1624 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1625 char *last1; /* Last position checked before */
bf93d4cc
GS
1626#ifdef DEBUGGING
1627 int did_match = 0;
1628#endif
6eb5f6b9
JH
1629
1630 if (s > PL_bostr)
1631 last1 = HOPc(s, -1);
1632 else
1633 last1 = s - 1; /* bogus */
1634
1635 /* XXXX check_substr already used to find `s', can optimize if
1636 check_substr==must. */
1637 scream_pos = -1;
1638 dontbother = end_shift;
1639 strend = HOPc(strend, -dontbother);
1640 while ( (s <= last) &&
9041c2e3 1641 ((flags & REXEC_SCREAM)
1aa99e6b 1642 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1643 end_shift, &scream_pos, 0))
1aa99e6b 1644 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1645 (unsigned char*)strend, must,
6eb5f6b9 1646 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1647 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1648 if (HOPc(s, -back_max) > last1) {
1649 last1 = HOPc(s, -back_min);
1650 s = HOPc(s, -back_max);
1651 }
1652 else {
1653 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1654
1655 last1 = HOPc(s, -back_min);
1656 s = t;
1657 }
1aa99e6b 1658 if (do_utf8) {
6eb5f6b9
JH
1659 while (s <= last1) {
1660 if (regtry(prog, s))
1661 goto got_it;
1662 s += UTF8SKIP(s);
1663 }
1664 }
1665 else {
1666 while (s <= last1) {
1667 if (regtry(prog, s))
1668 goto got_it;
1669 s++;
1670 }
1671 }
1672 }
b7953727
JH
1673 DEBUG_r(if (!did_match)
1674 PerlIO_printf(Perl_debug_log,
1675 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1676 ((must == prog->anchored_substr)
1677 ? "anchored" : "floating"),
1678 PL_colors[0],
1679 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1680 SvPVX(must),
b7953727
JH
1681 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1682 );
6eb5f6b9
JH
1683 goto phooey;
1684 }
155aba94 1685 else if ((c = prog->regstclass)) {
66e933ab
GS
1686 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1687 /* don't bother with what can't match */
6eb5f6b9 1688 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1689 DEBUG_r({
1690 SV *prop = sv_newmortal();
1691 regprop(prop, c);
1692 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1693 });
6eb5f6b9
JH
1694 if (find_byclass(prog, c, s, strend, startpos, 0))
1695 goto got_it;
bf93d4cc 1696 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1697 }
1698 else {
1699 dontbother = 0;
1700 if (prog->float_substr != Nullsv) { /* Trim the end. */
1701 char *last;
d6a28714
JH
1702
1703 if (flags & REXEC_SCREAM) {
1704 last = screaminstr(sv, prog->float_substr, s - strbeg,
1705 end_shift, &scream_pos, 1); /* last one */
1706 if (!last)
ffc61ed2 1707 last = scream_olds; /* Only one occurrence. */
b8c5462f 1708 }
d6a28714
JH
1709 else {
1710 STRLEN len;
1711 char *little = SvPV(prog->float_substr, len);
1712
1713 if (SvTAIL(prog->float_substr)) {
1714 if (memEQ(strend - len + 1, little, len - 1))
1715 last = strend - len + 1;
1716 else if (!PL_multiline)
9041c2e3 1717 last = memEQ(strend - len, little, len)
d6a28714 1718 ? strend - len : Nullch;
b8c5462f 1719 else
d6a28714
JH
1720 goto find_last;
1721 } else {
1722 find_last:
9041c2e3 1723 if (len)
d6a28714 1724 last = rninstr(s, strend, little, little + len);
b8c5462f 1725 else
d6a28714 1726 last = strend; /* matching `$' */
b8c5462f 1727 }
b8c5462f 1728 }
bf93d4cc
GS
1729 if (last == NULL) {
1730 DEBUG_r(PerlIO_printf(Perl_debug_log,
1731 "%sCan't trim the tail, match fails (should not happen)%s\n",
1732 PL_colors[4],PL_colors[5]));
1733 goto phooey; /* Should not happen! */
1734 }
d6a28714
JH
1735 dontbother = strend - last + prog->float_min_offset;
1736 }
1737 if (minlen && (dontbother < minlen))
1738 dontbother = minlen - 1;
1739 strend -= dontbother; /* this one's always in bytes! */
1740 /* We don't know much -- general case. */
1aa99e6b 1741 if (do_utf8) {
d6a28714
JH
1742 for (;;) {
1743 if (regtry(prog, s))
1744 goto got_it;
1745 if (s >= strend)
1746 break;
b8c5462f 1747 s += UTF8SKIP(s);
d6a28714
JH
1748 };
1749 }
1750 else {
1751 do {
1752 if (regtry(prog, s))
1753 goto got_it;
1754 } while (s++ < strend);
1755 }
1756 }
1757
1758 /* Failure. */
1759 goto phooey;
1760
1761got_it:
1762 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1763
1764 if (PL_reg_eval_set) {
1765 /* Preserve the current value of $^R */
1766 if (oreplsv != GvSV(PL_replgv))
1767 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1768 restored, the value remains
1769 the same. */
1770 restore_pos(aTHXo_ 0);
1771 }
1772
1773 /* make sure $`, $&, $', and $digit will work later */
1774 if ( !(flags & REXEC_NOT_FIRST) ) {
1775 if (RX_MATCH_COPIED(prog)) {
1776 Safefree(prog->subbeg);
1777 RX_MATCH_COPIED_off(prog);
1778 }
1779 if (flags & REXEC_COPY_STR) {
1780 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1781
1782 s = savepvn(strbeg, i);
1783 prog->subbeg = s;
1784 prog->sublen = i;
1785 RX_MATCH_COPIED_on(prog);
1786 }
1787 else {
1788 prog->subbeg = strbeg;
1789 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1790 }
1791 }
9041c2e3 1792
d6a28714
JH
1793 return 1;
1794
1795phooey:
bf93d4cc
GS
1796 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1797 PL_colors[4],PL_colors[5]));
d6a28714
JH
1798 if (PL_reg_eval_set)
1799 restore_pos(aTHXo_ 0);
1800 return 0;
1801}
1802
1803/*
1804 - regtry - try match at specific point
1805 */
1806STATIC I32 /* 0 failure, 1 success */
1807S_regtry(pTHX_ regexp *prog, char *startpos)
1808{
d6a28714
JH
1809 register I32 i;
1810 register I32 *sp;
1811 register I32 *ep;
1812 CHECKPOINT lastcp;
1813
02db2b7b
IZ
1814#ifdef DEBUGGING
1815 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1816#endif
d6a28714
JH
1817 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1818 MAGIC *mg;
1819
1820 PL_reg_eval_set = RS_init;
1821 DEBUG_r(DEBUG_s(
b900a521
JH
1822 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1823 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1824 ));
e8347627 1825 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1826 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1827 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1828 SAVETMPS;
1829 /* Apparently this is not needed, judging by wantarray. */
e8347627 1830 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1831 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1832
1833 if (PL_reg_sv) {
1834 /* Make $_ available to executed code. */
1835 if (PL_reg_sv != DEFSV) {
1836 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1837 SAVESPTR(DEFSV);
1838 DEFSV = PL_reg_sv;
b8c5462f 1839 }
d6a28714 1840
9041c2e3 1841 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1842 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1843 /* prepare for quick setting of pos */
14befaf4
DM
1844 sv_magic(PL_reg_sv, (SV*)0,
1845 PERL_MAGIC_regex_global, Nullch, 0);
1846 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1847 mg->mg_len = -1;
b8c5462f 1848 }
d6a28714
JH
1849 PL_reg_magic = mg;
1850 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1851 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714 1852 }
09687e5a 1853 if (!PL_reg_curpm) {
0f79a09d 1854 Newz(22,PL_reg_curpm, 1, PMOP);
09687e5a
AB
1855#ifdef USE_ITHREADS
1856 {
1857 SV* repointer = newSViv(0);
577e12cc 1858 /* so we know which PL_regex_padav element is PL_reg_curpm */
35061a7e 1859 SvFLAGS(repointer) |= SVf_BREAK;
09687e5a
AB
1860 av_push(PL_regex_padav,repointer);
1861 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
1862 PL_regex_pad = AvARRAY(PL_regex_padav);
1863 }
1864#endif
1865 }
aaa362c4 1866 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1867 PL_reg_oldcurpm = PL_curpm;
1868 PL_curpm = PL_reg_curpm;
1869 if (RX_MATCH_COPIED(prog)) {
1870 /* Here is a serious problem: we cannot rewrite subbeg,
1871 since it may be needed if this match fails. Thus
1872 $` inside (?{}) could fail... */
1873 PL_reg_oldsaved = prog->subbeg;
1874 PL_reg_oldsavedlen = prog->sublen;
1875 RX_MATCH_COPIED_off(prog);
1876 }
1877 else
1878 PL_reg_oldsaved = Nullch;
1879 prog->subbeg = PL_bostr;
1880 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1881 }
1882 prog->startp[0] = startpos - PL_bostr;
1883 PL_reginput = startpos;
1884 PL_regstartp = prog->startp;
1885 PL_regendp = prog->endp;
1886 PL_reglastparen = &prog->lastparen;
a01268b5 1887 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1888 prog->lastparen = 0;
1889 PL_regsize = 0;
1890 DEBUG_r(PL_reg_starttry = startpos);
1891 if (PL_reg_start_tmpl <= prog->nparens) {
1892 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1893 if(PL_reg_start_tmp)
1894 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1895 else
1896 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1897 }
1898
1899 /* XXXX What this code is doing here?!!! There should be no need
1900 to do this again and again, PL_reglastparen should take care of
3dd2943c 1901 this! --ilya*/
dafc8851
JH
1902
1903 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1904 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1905 * PL_reglastparen), is not needed at all by the test suite
1906 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1907 * enough, for building DynaLoader, or otherwise this
1908 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1909 * will happen. Meanwhile, this code *is* needed for the
1910 * above-mentioned test suite tests to succeed. The common theme
1911 * on those tests seems to be returning null fields from matches.
1912 * --jhi */
dafc8851 1913#if 1
d6a28714
JH
1914 sp = prog->startp;
1915 ep = prog->endp;
1916 if (prog->nparens) {
09e8ae3b 1917 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1918 *++sp = -1;
1919 *++ep = -1;
1920 }
1921 }
dafc8851 1922#endif
02db2b7b 1923 REGCP_SET(lastcp);
d6a28714
JH
1924 if (regmatch(prog->program + 1)) {
1925 prog->endp[0] = PL_reginput - PL_bostr;
1926 return 1;
1927 }
02db2b7b 1928 REGCP_UNWIND(lastcp);
d6a28714
JH
1929 return 0;
1930}
1931
02db2b7b
IZ
1932#define RE_UNWIND_BRANCH 1
1933#define RE_UNWIND_BRANCHJ 2
1934
1935union re_unwind_t;
1936
1937typedef struct { /* XX: makes sense to enlarge it... */
1938 I32 type;
1939 I32 prev;
1940 CHECKPOINT lastcp;
1941} re_unwind_generic_t;
1942
1943typedef struct {
1944 I32 type;
1945 I32 prev;
1946 CHECKPOINT lastcp;
1947 I32 lastparen;
1948 regnode *next;
1949 char *locinput;
1950 I32 nextchr;
1951#ifdef DEBUGGING
1952 int regindent;
1953#endif
1954} re_unwind_branch_t;
1955
1956typedef union re_unwind_t {
1957 I32 type;
1958 re_unwind_generic_t generic;
1959 re_unwind_branch_t branch;
1960} re_unwind_t;
1961
8ba1375e
MJD
1962#define sayYES goto yes
1963#define sayNO goto no
1964#define sayYES_FINAL goto yes_final
1965#define sayYES_LOUD goto yes_loud
1966#define sayNO_FINAL goto no_final
1967#define sayNO_SILENT goto do_no
1968#define saySAME(x) if (x) goto yes; else goto no
1969
1970#define REPORT_CODE_OFF 24
1971
d6a28714
JH
1972/*
1973 - regmatch - main matching routine
1974 *
1975 * Conceptually the strategy is simple: check to see whether the current
1976 * node matches, call self recursively to see whether the rest matches,
1977 * and then act accordingly. In practice we make some effort to avoid
1978 * recursion, in particular by going through "ordinary" nodes (that don't
1979 * need to know whether the rest of the match failed) by a loop instead of
1980 * by recursion.
1981 */
1982/* [lwall] I've hoisted the register declarations to the outer block in order to
1983 * maybe save a little bit of pushing and popping on the stack. It also takes
1984 * advantage of machines that use a register save mask on subroutine entry.
1985 */
1986STATIC I32 /* 0 failure, 1 success */
1987S_regmatch(pTHX_ regnode *prog)
1988{
d6a28714
JH
1989 register regnode *scan; /* Current node. */
1990 regnode *next; /* Next node. */
1991 regnode *inner; /* Next node in internal branch. */
1992 register I32 nextchr; /* renamed nextchr - nextchar colides with
1993 function of same name */
1994 register I32 n; /* no or next */
b7953727
JH
1995 register I32 ln = 0; /* len or last */
1996 register char *s = Nullch; /* operand or save */
d6a28714 1997 register char *locinput = PL_reginput;
b7953727 1998 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 1999 int minmod = 0, sw = 0, logical = 0;
02db2b7b 2000 I32 unwind = 0;
b7953727 2001#if 0
02db2b7b 2002 I32 firstcp = PL_savestack_ix;
b7953727 2003#endif
ffc61ed2 2004 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 2005
d6a28714
JH
2006#ifdef DEBUGGING
2007 PL_regindent++;
2008#endif
2009
2010 /* Note that nextchr is a byte even in UTF */
2011 nextchr = UCHARAT(locinput);
2012 scan = prog;
2013 while (scan != NULL) {
8ba1375e 2014
d6a28714
JH
2015 DEBUG_r( {
2016 SV *prop = sv_newmortal();
2017 int docolor = *PL_colors[0];
2018 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2019 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2020 /* The part of the string before starttry has one color
2021 (pref0_len chars), between starttry and current
2022 position another one (pref_len - pref0_len chars),
2023 after the current position the third one.
2024 We assume that pref0_len <= pref_len, otherwise we
2025 decrease pref0_len. */
9041c2e3 2026 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2027 ? (5 + taill) - l : locinput - PL_bostr;
2028 int pref0_len;
d6a28714 2029
1aa99e6b
IH
2030 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2031 pref_len++;
2032 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2033 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2034 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2035 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2036 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2037 l--;
d6a28714
JH
2038 if (pref0_len < 0)
2039 pref0_len = 0;
2040 if (pref0_len > pref_len)
2041 pref0_len = pref_len;
2042 regprop(prop, scan);
9041c2e3 2043 PerlIO_printf(Perl_debug_log,
b900a521 2044 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2045 (IV)(locinput - PL_bostr),
2046 PL_colors[4], pref0_len,
d6a28714 2047 locinput - pref_len, PL_colors[5],
9041c2e3 2048 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2049 locinput - pref_len + pref0_len, PL_colors[3],
2050 (docolor ? "" : "> <"),
2051 PL_colors[0], l, locinput, PL_colors[1],
2052 15 - l - pref_len + 1,
2053 "",
b900a521 2054 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2055 SvPVX(prop));
2056 } );
2057
2058 next = scan + NEXT_OFF(scan);
2059 if (next == scan)
2060 next = NULL;
2061
2062 switch (OP(scan)) {
2063 case BOL:
12d33761
HS
2064 if (locinput == PL_bostr || (PL_multiline &&
2065 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2066 {
2067 /* regtill = regbol; */
b8c5462f
JH
2068 break;
2069 }
d6a28714
JH
2070 sayNO;
2071 case MBOL:
12d33761
HS
2072 if (locinput == PL_bostr ||
2073 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2074 {
b8c5462f
JH
2075 break;
2076 }
d6a28714
JH
2077 sayNO;
2078 case SBOL:
c2a73568 2079 if (locinput == PL_bostr)
b8c5462f 2080 break;
d6a28714
JH
2081 sayNO;
2082 case GPOS:
2083 if (locinput == PL_reg_ganch)
2084 break;
2085 sayNO;
2086 case EOL:
2087 if (PL_multiline)
2088 goto meol;
2089 else
2090 goto seol;
2091 case MEOL:
2092 meol:
2093 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2094 sayNO;
b8c5462f 2095 break;
d6a28714
JH
2096 case SEOL:
2097 seol:
2098 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2099 sayNO;
d6a28714 2100 if (PL_regeol - locinput > 1)
b8c5462f 2101 sayNO;
b8c5462f 2102 break;
d6a28714
JH
2103 case EOS:
2104 if (PL_regeol != locinput)
b8c5462f 2105 sayNO;
d6a28714 2106 break;
ffc61ed2 2107 case SANY:
d6a28714 2108 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2109 sayNO;
b8c5462f 2110 nextchr = UCHARAT(++locinput);
a0d0e21e 2111 break;
ffc61ed2 2112 case REG_ANY:
1aa99e6b
IH
2113 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2114 sayNO;
2115 if (do_utf8) {
b8c5462f 2116 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2117 if (locinput > PL_regeol)
2118 sayNO;
a0ed51b3 2119 nextchr = UCHARAT(locinput);
a0ed51b3 2120 }
1aa99e6b
IH
2121 else
2122 nextchr = UCHARAT(++locinput);
a0ed51b3 2123 break;
d6a28714 2124 case EXACT:
cd439c50
IZ
2125 s = STRING(scan);
2126 ln = STR_LEN(scan);
1aa99e6b
IH
2127 if (do_utf8 != (UTF!=0)) {
2128 char *l = locinput;
2129 char *e = s + ln;
2130 STRLEN len;
2131 if (do_utf8)
2132 while (s < e) {
2133 if (l >= PL_regeol)
2134 sayNO;
9041c2e3 2135 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2136 sayNO;
2137 s++;
2138 l += len;
2139 }
2140 else
2141 while (s < e) {
2142 if (l >= PL_regeol)
2143 sayNO;
9041c2e3 2144 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2145 sayNO;
2146 s += len;
2147 l++;
2148 }
2149 locinput = l;
2150 nextchr = UCHARAT(locinput);
2151 break;
2152 }
d6a28714
JH
2153 /* Inline the first character, for speed. */
2154 if (UCHARAT(s) != nextchr)
2155 sayNO;
2156 if (PL_regeol - locinput < ln)
2157 sayNO;
2158 if (ln > 1 && memNE(s, locinput, ln))
2159 sayNO;
2160 locinput += ln;
2161 nextchr = UCHARAT(locinput);
2162 break;
2163 case EXACTFL:
b8c5462f
JH
2164 PL_reg_flags |= RF_tainted;
2165 /* FALL THROUGH */
d6a28714 2166 case EXACTF:
cd439c50
IZ
2167 s = STRING(scan);
2168 ln = STR_LEN(scan);
d6a28714 2169
1aa99e6b 2170 if (do_utf8) {
d6a28714 2171 char *l = locinput;
1aa99e6b
IH
2172 char *e;
2173 e = s + ln;
d6a28714
JH
2174 c1 = OP(scan) == EXACTF;
2175 while (s < e) {
1aa99e6b 2176 if (l >= PL_regeol) {
d6a28714
JH
2177 sayNO;
2178 }
2b9d42f0 2179 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2180 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2181 sayNO;
2182 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2183 l += UTF8SKIP(l);
b8c5462f 2184 }
d6a28714 2185 locinput = l;
a0ed51b3
LW
2186 nextchr = UCHARAT(locinput);
2187 break;
2188 }
d6a28714
JH
2189
2190 /* Inline the first character, for speed. */
2191 if (UCHARAT(s) != nextchr &&
2192 UCHARAT(s) != ((OP(scan) == EXACTF)
2193 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2194 sayNO;
d6a28714 2195 if (PL_regeol - locinput < ln)
b8c5462f 2196 sayNO;
d6a28714
JH
2197 if (ln > 1 && (OP(scan) == EXACTF
2198 ? ibcmp(s, locinput, ln)
2199 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2200 sayNO;
d6a28714
JH
2201 locinput += ln;
2202 nextchr = UCHARAT(locinput);
a0d0e21e 2203 break;
d6a28714 2204 case ANYOF:
ffc61ed2
JH
2205 if (do_utf8) {
2206 if (!reginclass(scan, (U8*)locinput, do_utf8))
2207 sayNO;
2208 if (locinput >= PL_regeol)
2209 sayNO;
2210 locinput += PL_utf8skip[nextchr];
b8c5462f 2211 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2212 }
2213 else {
2214 if (nextchr < 0)
2215 nextchr = UCHARAT(locinput);
2216 if (!reginclass(scan, (U8*)locinput, do_utf8))
2217 sayNO;
2218 if (!nextchr && locinput >= PL_regeol)
2219 sayNO;
2220 nextchr = UCHARAT(++locinput);
2221 }
b8c5462f 2222 break;
d6a28714 2223 case ALNUML:
b8c5462f
JH
2224 PL_reg_flags |= RF_tainted;
2225 /* FALL THROUGH */
d6a28714 2226 case ALNUM:
b8c5462f 2227 if (!nextchr)
4633a7c4 2228 sayNO;
ffc61ed2 2229 if (do_utf8) {
ad24be35 2230 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2231 if (!(OP(scan) == ALNUM
3568d838 2232 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2233 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2234 {
2235 sayNO;
a0ed51b3 2236 }
b8c5462f 2237 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2238 nextchr = UCHARAT(locinput);
2239 break;
2240 }
ffc61ed2 2241 if (!(OP(scan) == ALNUM
d6a28714 2242 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2243 sayNO;
b8c5462f 2244 nextchr = UCHARAT(++locinput);
a0d0e21e 2245 break;
d6a28714 2246 case NALNUML:
b8c5462f
JH
2247 PL_reg_flags |= RF_tainted;
2248 /* FALL THROUGH */
d6a28714
JH
2249 case NALNUM:
2250 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2251 sayNO;
ffc61ed2 2252 if (do_utf8) {
8269fa76 2253 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2254 if (OP(scan) == NALNUM
3568d838 2255 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2256 : isALNUM_LC_utf8((U8*)locinput))
2257 {
b8c5462f 2258 sayNO;
d6a28714 2259 }
b8c5462f
JH
2260 locinput += PL_utf8skip[nextchr];
2261 nextchr = UCHARAT(locinput);
2262 break;
2263 }
ffc61ed2 2264 if (OP(scan) == NALNUM
d6a28714 2265 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2266 sayNO;
76e3520e 2267 nextchr = UCHARAT(++locinput);
a0d0e21e 2268 break;
d6a28714
JH
2269 case BOUNDL:
2270 case NBOUNDL:
3280af22 2271 PL_reg_flags |= RF_tainted;
bbce6d69 2272 /* FALL THROUGH */
d6a28714
JH
2273 case BOUND:
2274 case NBOUND:
2275 /* was last char in word? */
ffc61ed2 2276 if (do_utf8) {
12d33761
HS
2277 if (locinput == PL_bostr)
2278 ln = '\n';
ffc61ed2
JH
2279 else {
2280 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2281
2b9d42f0 2282 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2283 }
2284 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2285 ln = isALNUM_uni(ln);
8269fa76 2286 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2287 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2288 }
2289 else {
9041c2e3 2290 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2291 n = isALNUM_LC_utf8((U8*)locinput);
2292 }
a0ed51b3 2293 }
d6a28714 2294 else {
12d33761
HS
2295 ln = (locinput != PL_bostr) ?
2296 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2297 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2298 ln = isALNUM(ln);
2299 n = isALNUM(nextchr);
2300 }
2301 else {
2302 ln = isALNUM_LC(ln);
2303 n = isALNUM_LC(nextchr);
2304 }
d6a28714 2305 }
ffc61ed2
JH
2306 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2307 OP(scan) == BOUNDL))
2308 sayNO;
a0ed51b3 2309 break;
d6a28714 2310 case SPACEL:
3280af22 2311 PL_reg_flags |= RF_tainted;
bbce6d69 2312 /* FALL THROUGH */
d6a28714 2313 case SPACE:
9442cb0e 2314 if (!nextchr)
4633a7c4 2315 sayNO;
1aa99e6b 2316 if (do_utf8) {
fd400ab9 2317 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2318 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2319 if (!(OP(scan) == SPACE
3568d838 2320 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2321 : isSPACE_LC_utf8((U8*)locinput)))
2322 {
2323 sayNO;
2324 }
2325 locinput += PL_utf8skip[nextchr];
2326 nextchr = UCHARAT(locinput);
2327 break;
d6a28714 2328 }
ffc61ed2
JH
2329 if (!(OP(scan) == SPACE
2330 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2331 sayNO;
2332 nextchr = UCHARAT(++locinput);
2333 }
2334 else {
2335 if (!(OP(scan) == SPACE
2336 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2337 sayNO;
2338 nextchr = UCHARAT(++locinput);
a0ed51b3 2339 }
a0ed51b3 2340 break;
d6a28714 2341 case NSPACEL:
3280af22 2342 PL_reg_flags |= RF_tainted;
bbce6d69 2343 /* FALL THROUGH */
d6a28714 2344 case NSPACE:
9442cb0e 2345 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2346 sayNO;
1aa99e6b 2347 if (do_utf8) {
8269fa76 2348 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2349 if (OP(scan) == NSPACE
3568d838 2350 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2351 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2352 {
2353 sayNO;
2354 }
2355 locinput += PL_utf8skip[nextchr];
2356 nextchr = UCHARAT(locinput);
2357 break;
a0ed51b3 2358 }
ffc61ed2 2359 if (OP(scan) == NSPACE
d6a28714 2360 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2361 sayNO;
76e3520e 2362 nextchr = UCHARAT(++locinput);
a0d0e21e 2363 break;
d6a28714 2364 case DIGITL:
a0ed51b3
LW
2365 PL_reg_flags |= RF_tainted;
2366 /* FALL THROUGH */
d6a28714 2367 case DIGIT:
9442cb0e 2368 if (!nextchr)
a0ed51b3 2369 sayNO;
1aa99e6b 2370 if (do_utf8) {
8269fa76 2371 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2372 if (!(OP(scan) == DIGIT
3568d838 2373 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2374 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2375 {
a0ed51b3 2376 sayNO;
dfe13c55 2377 }
6f06b55f 2378 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2379 nextchr = UCHARAT(locinput);
2380 break;
2381 }
ffc61ed2 2382 if (!(OP(scan) == DIGIT
9442cb0e 2383 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2384 sayNO;
2385 nextchr = UCHARAT(++locinput);
2386 break;
d6a28714 2387 case NDIGITL:
b8c5462f
JH
2388 PL_reg_flags |= RF_tainted;
2389 /* FALL THROUGH */
d6a28714 2390 case NDIGIT:
9442cb0e 2391 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2392 sayNO;
1aa99e6b 2393 if (do_utf8) {
8269fa76 2394 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2395 if (OP(scan) == NDIGIT
3568d838 2396 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2397 : isDIGIT_LC_utf8((U8*)locinput))
2398 {
a0ed51b3 2399 sayNO;
9442cb0e 2400 }
6f06b55f 2401 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2402 nextchr = UCHARAT(locinput);
2403 break;
2404 }
ffc61ed2 2405 if (OP(scan) == NDIGIT
9442cb0e 2406 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2407 sayNO;
2408 nextchr = UCHARAT(++locinput);
2409 break;
2410 case CLUMP:
8269fa76 2411 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2412 if (locinput >= PL_regeol ||
2413 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2414 sayNO;
6f06b55f 2415 locinput += PL_utf8skip[nextchr];
3568d838
JH
2416 while (locinput < PL_regeol &&
2417 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2418 locinput += UTF8SKIP(locinput);
2419 if (locinput > PL_regeol)
2420 sayNO;
2421 nextchr = UCHARAT(locinput);
2422 break;
c8756f30 2423 case REFFL:
3280af22 2424 PL_reg_flags |= RF_tainted;
c8756f30 2425 /* FALL THROUGH */
c277df42 2426 case REF:
c8756f30 2427 case REFF:
c277df42 2428 n = ARG(scan); /* which paren pair */
cf93c79d 2429 ln = PL_regstartp[n];
2c2d71f5 2430 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2431 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2432 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2433 if (ln == PL_regendp[n])
a0d0e21e 2434 break;
a0ed51b3 2435
cf93c79d 2436 s = PL_bostr + ln;
1aa99e6b 2437 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2438 char *l = locinput;
cf93c79d 2439 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2440 /*
2441 * Note that we can't do the "other character" lookup trick as
2442 * in the 8-bit case (no pun intended) because in Unicode we
2443 * have to map both upper and title case to lower case.
2444 */
2445 if (OP(scan) == REFF) {
2446 while (s < e) {
2447 if (l >= PL_regeol)
2448 sayNO;
dfe13c55 2449 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2450 sayNO;
2451 s += UTF8SKIP(s);
2452 l += UTF8SKIP(l);
2453 }
2454 }
2455 else {
2456 while (s < e) {
2457 if (l >= PL_regeol)
2458 sayNO;
dfe13c55 2459 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2460 sayNO;
2461 s += UTF8SKIP(s);
2462 l += UTF8SKIP(l);
2463 }
2464 }
2465 locinput = l;
2466 nextchr = UCHARAT(locinput);
2467 break;
2468 }
2469
a0d0e21e 2470 /* Inline the first character, for speed. */
76e3520e 2471 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2472 (OP(scan) == REF ||
2473 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2474 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2475 sayNO;
cf93c79d 2476 ln = PL_regendp[n] - ln;
3280af22 2477 if (locinput + ln > PL_regeol)
4633a7c4 2478 sayNO;
c8756f30
AK
2479 if (ln > 1 && (OP(scan) == REF
2480 ? memNE(s, locinput, ln)
2481 : (OP(scan) == REFF
2482 ? ibcmp(s, locinput, ln)
2483 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2484 sayNO;
a0d0e21e 2485 locinput += ln;
76e3520e 2486 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2487 break;
2488
2489 case NOTHING:
c277df42 2490 case TAIL:
a0d0e21e
LW
2491 break;
2492 case BACK:
2493 break;
c277df42
IZ
2494 case EVAL:
2495 {
2496 dSP;
533c011a 2497 OP_4tree *oop = PL_op;
3280af22
NIS
2498 COP *ocurcop = PL_curcop;
2499 SV **ocurpad = PL_curpad;
c277df42 2500 SV *ret;
9041c2e3 2501
c277df42 2502 n = ARG(scan);
533c011a 2503 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2504 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2505 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2506 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2507
cea2e8a9 2508 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2509 SPAGAIN;
2510 ret = POPs;
2511 PUTBACK;
9041c2e3 2512
0f5d15d6
IZ
2513 PL_op = oop;
2514 PL_curpad = ocurpad;
2515 PL_curcop = ocurcop;
c277df42 2516 if (logical) {
0f5d15d6
IZ
2517 if (logical == 2) { /* Postponed subexpression. */
2518 regexp *re;
22c35a8c 2519 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2520 re_cc_state state;
0f5d15d6
IZ
2521 CHECKPOINT cp, lastcp;
2522
2523 if(SvROK(ret) || SvRMAGICAL(ret)) {
2524 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2525
2526 if(SvMAGICAL(sv))
14befaf4 2527 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2528 }
2529 if (mg) {
2530 re = (regexp *)mg->mg_obj;
df0003d4 2531 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2532 }
2533 else {
2534 STRLEN len;
2535 char *t = SvPV(ret, len);
2536 PMOP pm;
2537 char *oprecomp = PL_regprecomp;
2538 I32 osize = PL_regsize;
2539 I32 onpar = PL_regnpar;
2540
5fcd1c1b 2541 Zero(&pm, 1, PMOP);
cea2e8a9 2542 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2543 if (!(SvFLAGS(ret)
0f5d15d6 2544 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2545 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2546 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2547 PL_regprecomp = oprecomp;
2548 PL_regsize = osize;
2549 PL_regnpar = onpar;
2550 }
2551 DEBUG_r(
9041c2e3 2552 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2553 "Entering embedded `%s%.60s%s%s'\n",
2554 PL_colors[0],
2555 re->precomp,
2556 PL_colors[1],
2557 (strlen(re->precomp) > 60 ? "..." : ""))
2558 );
2559 state.node = next;
2560 state.prev = PL_reg_call_cc;
2561 state.cc = PL_regcc;
2562 state.re = PL_reg_re;
2563
2ab05381 2564 PL_regcc = 0;
9041c2e3 2565
0f5d15d6 2566 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2567 REGCP_SET(lastcp);
0f5d15d6
IZ
2568 cache_re(re);
2569 state.ss = PL_savestack_ix;
2570 *PL_reglastparen = 0;
a01268b5 2571 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2572 PL_reg_call_cc = &state;
2573 PL_reginput = locinput;
2c2d71f5
JH
2574
2575 /* XXXX This is too dramatic a measure... */
2576 PL_reg_maxiter = 0;
2577
0f5d15d6 2578 if (regmatch(re->program + 1)) {
2c914db6
IZ
2579 /* Even though we succeeded, we need to restore
2580 global variables, since we may be wrapped inside
2581 SUSPEND, thus the match may be not finished yet. */
2582
2583 /* XXXX Do this only if SUSPENDed? */
2584 PL_reg_call_cc = state.prev;
2585 PL_regcc = state.cc;
2586 PL_reg_re = state.re;
2587 cache_re(PL_reg_re);
2588
2589 /* XXXX This is too dramatic a measure... */
2590 PL_reg_maxiter = 0;
2591
2592 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2593 ReREFCNT_dec(re);
2594 regcpblow(cp);
2595 sayYES;
2596 }
0f5d15d6 2597 ReREFCNT_dec(re);
02db2b7b 2598 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2599 regcppop();
2600 PL_reg_call_cc = state.prev;
2601 PL_regcc = state.cc;
2602 PL_reg_re = state.re;
d3790889 2603 cache_re(PL_reg_re);
2c2d71f5
JH
2604
2605 /* XXXX This is too dramatic a measure... */
2606 PL_reg_maxiter = 0;
2607
0f5d15d6
IZ
2608 sayNO;
2609 }
c277df42 2610 sw = SvTRUE(ret);
0f5d15d6 2611 logical = 0;
a0ed51b3
LW
2612 }
2613 else
3280af22 2614 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2615 break;
2616 }
a0d0e21e 2617 case OPEN:
c277df42 2618 n = ARG(scan); /* which paren pair */
3280af22
NIS
2619 PL_reg_start_tmp[n] = locinput;
2620 if (n > PL_regsize)
2621 PL_regsize = n;
a0d0e21e
LW
2622 break;
2623 case CLOSE:
c277df42 2624 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2625 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2626 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2627 if (n > *PL_reglastparen)
2628 *PL_reglastparen = n;
a01268b5 2629 *PL_reglastcloseparen = n;
a0d0e21e 2630 break;
c277df42
IZ
2631 case GROUPP:
2632 n = ARG(scan); /* which paren pair */
cf93c79d 2633 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2634 break;
2635 case IFTHEN:
2c2d71f5 2636 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2637 if (sw)
2638 next = NEXTOPER(NEXTOPER(scan));
2639 else {
2640 next = scan + ARG(scan);
2641 if (OP(next) == IFTHEN) /* Fake one. */
2642 next = NEXTOPER(NEXTOPER(next));
2643 }
2644 break;
2645 case LOGICAL:
0f5d15d6 2646 logical = scan->flags;
c277df42 2647 break;
2ab05381
IZ
2648/*******************************************************************
2649 PL_regcc contains infoblock about the innermost (...)* loop, and
2650 a pointer to the next outer infoblock.
2651
2652 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2653
2654 1) After matching X, regnode for CURLYX is processed;
2655
9041c2e3 2656 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2657 regmatch() recursively with the starting point at WHILEM node;
2658
2659 3) Each hit of WHILEM node tries to match A and Z (in the order
2660 depending on the current iteration, min/max of {min,max} and
2661 greediness). The information about where are nodes for "A"
2662 and "Z" is read from the infoblock, as is info on how many times "A"
2663 was already matched, and greediness.
2664
2665 4) After A matches, the same WHILEM node is hit again.
2666
2667 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2668 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2669 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2670 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2671 of the external loop.
2672
2673 Currently present infoblocks form a tree with a stem formed by PL_curcc
2674 and whatever it mentions via ->next, and additional attached trees
2675 corresponding to temporarily unset infoblocks as in "5" above.
2676
9041c2e3 2677 In the following picture infoblocks for outer loop of
2ab05381
IZ
2678 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2679 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2680 infoblocks are drawn below the "reset" infoblock.
2681
2682 In fact in the picture below we do not show failed matches for Z and T
2683 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2684 more obvious *why* one needs to *temporary* unset infoblocks.]
2685
2686 Matched REx position InfoBlocks Comment
2687 (Y(A)*?Z)*?T x
2688 Y(A)*?Z)*?T x <- O
2689 Y (A)*?Z)*?T x <- O
2690 Y A)*?Z)*?T x <- O <- I
2691 YA )*?Z)*?T x <- O <- I
2692 YA A)*?Z)*?T x <- O <- I
2693 YAA )*?Z)*?T x <- O <- I
2694 YAA Z)*?T x <- O # Temporary unset I
2695 I
2696
2697 YAAZ Y(A)*?Z)*?T x <- O
2698 I
2699
2700 YAAZY (A)*?Z)*?T x <- O
2701 I
2702
2703 YAAZY A)*?Z)*?T x <- O <- I
2704 I
2705
2706 YAAZYA )*?Z)*?T x <- O <- I
2707 I
2708
2709 YAAZYA Z)*?T x <- O # Temporary unset I
2710 I,I
2711
2712 YAAZYAZ )*?T x <- O
2713 I,I
2714
2715 YAAZYAZ T x # Temporary unset O
2716 O
2717 I,I
2718
2719 YAAZYAZT x
2720 O
2721 I,I
2722 *******************************************************************/
a0d0e21e
LW
2723 case CURLYX: {
2724 CURCUR cc;
3280af22 2725 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2726 /* No need to save/restore up to this paren */
2727 I32 parenfloor = scan->flags;
c277df42
IZ
2728
2729 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2730 next += ARG(next);
3280af22
NIS
2731 cc.oldcc = PL_regcc;
2732 PL_regcc = &cc;
cb434fcc
IZ
2733 /* XXXX Probably it is better to teach regpush to support
2734 parenfloor > PL_regsize... */
2735 if (parenfloor > *PL_reglastparen)
2736 parenfloor = *PL_reglastparen; /* Pessimization... */
2737 cc.parenfloor = parenfloor;
a0d0e21e
LW
2738 cc.cur = -1;
2739 cc.min = ARG1(scan);
2740 cc.max = ARG2(scan);
c277df42 2741 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2742 cc.next = next;
2743 cc.minmod = minmod;
2744 cc.lastloc = 0;
3280af22 2745 PL_reginput = locinput;
a0d0e21e
LW
2746 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2747 regcpblow(cp);
3280af22 2748 PL_regcc = cc.oldcc;
4633a7c4 2749 saySAME(n);
a0d0e21e
LW
2750 }
2751 /* NOT REACHED */
2752 case WHILEM: {
2753 /*
2754 * This is really hard to understand, because after we match
2755 * what we're trying to match, we must make sure the rest of
2c2d71f5 2756 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2757 * to go back UP the parse tree by recursing ever deeper. And
2758 * if it fails, we have to reset our parent's current state
2759 * that we can try again after backing off.
2760 */
2761
c277df42 2762 CHECKPOINT cp, lastcp;
3280af22 2763 CURCUR* cc = PL_regcc;
c277df42
IZ
2764 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2765
4633a7c4 2766 n = cc->cur + 1; /* how many we know we matched */
3280af22 2767 PL_reginput = locinput;
a0d0e21e 2768
c277df42 2769 DEBUG_r(
9041c2e3
NIS
2770 PerlIO_printf(Perl_debug_log,
2771 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2772 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2773 (long)n, (long)cc->min,
c277df42
IZ
2774 (long)cc->max, (long)cc)
2775 );
4633a7c4 2776
a0d0e21e
LW
2777 /* If degenerate scan matches "", assume scan done. */
2778
579cf2c3 2779 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2780 PL_regcc = cc->oldcc;
2ab05381
IZ
2781 if (PL_regcc)
2782 ln = PL_regcc->cur;
c277df42 2783 DEBUG_r(
c3464db5
DD
2784 PerlIO_printf(Perl_debug_log,
2785 "%*s empty match detected, try continuation...\n",
3280af22 2786 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2787 );
a0d0e21e 2788 if (regmatch(cc->next))
4633a7c4 2789 sayYES;
2ab05381
IZ
2790 if (PL_regcc)
2791 PL_regcc->cur = ln;
3280af22 2792 PL_regcc = cc;
4633a7c4 2793 sayNO;
a0d0e21e
LW
2794 }
2795
2796 /* First just match a string of min scans. */
2797
2798 if (n < cc->min) {
2799 cc->cur = n;
2800 cc->lastloc = locinput;
4633a7c4
LW
2801 if (regmatch(cc->scan))
2802 sayYES;
2803 cc->cur = n - 1;
c277df42 2804 cc->lastloc = lastloc;
4633a7c4 2805 sayNO;
a0d0e21e
LW
2806 }
2807
2c2d71f5
JH
2808 if (scan->flags) {
2809 /* Check whether we already were at this position.
2810 Postpone detection until we know the match is not
2811 *that* much linear. */
2812 if (!PL_reg_maxiter) {
2813 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2814 PL_reg_leftiter = PL_reg_maxiter;
2815 }
2816 if (PL_reg_leftiter-- == 0) {
2817 I32 size = (PL_reg_maxiter + 7)/8;
2818 if (PL_reg_poscache) {
2819 if (PL_reg_poscache_size < size) {
2820 Renew(PL_reg_poscache, size, char);
2821 PL_reg_poscache_size = size;
2822 }
2823 Zero(PL_reg_poscache, size, char);
2824 }
2825 else {
2826 PL_reg_poscache_size = size;
2827 Newz(29, PL_reg_poscache, size, char);
2828 }
2829 DEBUG_r(
2830 PerlIO_printf(Perl_debug_log,
2831 "%sDetected a super-linear match, switching on caching%s...\n",
2832 PL_colors[4], PL_colors[5])
2833 );
2834 }
2835 if (PL_reg_leftiter < 0) {
2836 I32 o = locinput - PL_bostr, b;
2837
2838 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2839 b = o % 8;
2840 o /= 8;
2841 if (PL_reg_poscache[o] & (1<<b)) {
2842 DEBUG_r(
2843 PerlIO_printf(Perl_debug_log,
2844 "%*s already tried at this position...\n",
2845 REPORT_CODE_OFF+PL_regindent*2, "")
2846 );
7821416a 2847 sayNO_SILENT;
2c2d71f5
JH
2848 }
2849 PL_reg_poscache[o] |= (1<<b);
2850 }
2851 }
2852
a0d0e21e
LW
2853 /* Prefer next over scan for minimal matching. */
2854
2855 if (cc->minmod) {
3280af22 2856 PL_regcc = cc->oldcc;
2ab05381
IZ
2857 if (PL_regcc)
2858 ln = PL_regcc->cur;
5f05dabc 2859 cp = regcppush(cc->parenfloor);
02db2b7b 2860 REGCP_SET(lastcp);
5f05dabc 2861 if (regmatch(cc->next)) {
c277df42 2862 regcpblow(cp);
4633a7c4 2863 sayYES; /* All done. */
5f05dabc 2864 }
02db2b7b 2865 REGCP_UNWIND(lastcp);
5f05dabc 2866 regcppop();
2ab05381
IZ
2867 if (PL_regcc)
2868 PL_regcc->cur = ln;
3280af22 2869 PL_regcc = cc;
a0d0e21e 2870
c277df42 2871 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2872 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2873 && !(PL_reg_flags & RF_warned)) {
2874 PL_reg_flags |= RF_warned;
e476b1b5 2875 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2876 "Complex regular subexpression recursion",
2877 REG_INFTY - 1);
c277df42 2878 }
4633a7c4 2879 sayNO;
c277df42 2880 }
a687059c 2881
c277df42 2882 DEBUG_r(
c3464db5
DD
2883 PerlIO_printf(Perl_debug_log,
2884 "%*s trying longer...\n",
3280af22 2885 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2886 );
a0d0e21e 2887 /* Try scanning more and see if it helps. */
3280af22 2888 PL_reginput = locinput;
a0d0e21e
LW
2889 cc->cur = n;
2890 cc->lastloc = locinput;
5f05dabc 2891 cp = regcppush(cc->parenfloor);
02db2b7b 2892 REGCP_SET(lastcp);
5f05dabc 2893 if (regmatch(cc->scan)) {
c277df42 2894 regcpblow(cp);
4633a7c4 2895 sayYES;
5f05dabc 2896 }
02db2b7b 2897 REGCP_UNWIND(lastcp);
5f05dabc 2898 regcppop();
4633a7c4 2899 cc->cur = n - 1;
c277df42 2900 cc->lastloc = lastloc;
4633a7c4 2901 sayNO;
a0d0e21e
LW
2902 }
2903
2904 /* Prefer scan over next for maximal matching. */
2905
2906 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2907 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2908 cc->cur = n;
2909 cc->lastloc = locinput;
02db2b7b 2910 REGCP_SET(lastcp);
5f05dabc 2911 if (regmatch(cc->scan)) {
c277df42 2912 regcpblow(cp);
4633a7c4 2913 sayYES;
5f05dabc 2914 }
02db2b7b 2915 REGCP_UNWIND(lastcp);
a0d0e21e 2916 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2917 PL_reginput = locinput;
c277df42 2918 DEBUG_r(
c3464db5
DD
2919 PerlIO_printf(Perl_debug_log,
2920 "%*s failed, try continuation...\n",
3280af22 2921 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2922 );
2923 }
9041c2e3 2924 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2925 && !(PL_reg_flags & RF_warned)) {
3280af22 2926 PL_reg_flags |= RF_warned;
e476b1b5 2927 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2928 "Complex regular subexpression recursion",
2929 REG_INFTY - 1);
a0d0e21e
LW
2930 }
2931
2932 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2933 PL_regcc = cc->oldcc;
2ab05381
IZ
2934 if (PL_regcc)
2935 ln = PL_regcc->cur;
a0d0e21e 2936 if (regmatch(cc->next))
4633a7c4 2937 sayYES;
2ab05381
IZ
2938 if (PL_regcc)
2939 PL_regcc->cur = ln;
3280af22 2940 PL_regcc = cc;
4633a7c4 2941 cc->cur = n - 1;
c277df42 2942 cc->lastloc = lastloc;
4633a7c4 2943 sayNO;
a0d0e21e
LW
2944 }
2945 /* NOT REACHED */
9041c2e3 2946 case BRANCHJ:
c277df42
IZ
2947 next = scan + ARG(scan);
2948 if (next == scan)
2949 next = NULL;
2950 inner = NEXTOPER(NEXTOPER(scan));
2951 goto do_branch;
9041c2e3 2952 case BRANCH:
c277df42
IZ
2953 inner = NEXTOPER(scan);
2954 do_branch:
2955 {
c277df42
IZ
2956 c1 = OP(scan);
2957 if (OP(next) != c1) /* No choice. */
2958 next = inner; /* Avoid recursion. */
a0d0e21e 2959 else {
02db2b7b
IZ
2960 I32 lastparen = *PL_reglastparen;
2961 I32 unwind1;
2962 re_unwind_branch_t *uw;
2963
2964 /* Put unwinding data on stack */
2965 unwind1 = SSNEWt(1,re_unwind_branch_t);
2966 uw = SSPTRt(unwind1,re_unwind_branch_t);
2967 uw->prev = unwind;
2968 unwind = unwind1;
2969 uw->type = ((c1 == BRANCH)
2970 ? RE_UNWIND_BRANCH
2971 : RE_UNWIND_BRANCHJ);
2972 uw->lastparen = lastparen;
2973 uw->next = next;
2974 uw->locinput = locinput;
2975 uw->nextchr = nextchr;
2976#ifdef DEBUGGING
2977 uw->regindent = ++PL_regindent;
2978#endif
c277df42 2979
02db2b7b
IZ
2980 REGCP_SET(uw->lastcp);
2981
2982 /* Now go into the first branch */
2983 next = inner;
a687059c 2984 }
a0d0e21e
LW
2985 }
2986 break;
2987 case MINMOD:
2988 minmod = 1;
2989 break;
c277df42
IZ
2990 case CURLYM:
2991 {
00db4c45 2992 I32 l = 0;
c277df42 2993 CHECKPOINT lastcp;
9041c2e3 2994
c277df42
IZ
2995 /* We suppose that the next guy does not need
2996 backtracking: in particular, it is of constant length,
2997 and has no parenths to influence future backrefs. */
2998 ln = ARG1(scan); /* min to match */
2999 n = ARG2(scan); /* max to match */
c277df42
IZ
3000 paren = scan->flags;
3001 if (paren) {
3280af22
NIS
3002 if (paren > PL_regsize)
3003 PL_regsize = paren;
3004 if (paren > *PL_reglastparen)
3005 *PL_reglastparen = paren;
c277df42 3006 }
dc45a647 3007 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3008 if (paren)
3009 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3010 PL_reginput = locinput;
c277df42
IZ
3011 if (minmod) {
3012 minmod = 0;
3013 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3014 sayNO;
5f4b28b2 3015 if (ln && l == 0 && n >= ln
c277df42
IZ
3016 /* In fact, this is tricky. If paren, then the
3017 fact that we did/didnot match may influence
3018 future execution. */
3019 && !(paren && ln == 0))
3020 ln = n;
3280af22 3021 locinput = PL_reginput;
22c35a8c 3022 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3023 c1 = (U8)*STRING(next);
c277df42 3024 if (OP(next) == EXACTF)
22c35a8c 3025 c2 = PL_fold[c1];
c277df42 3026 else if (OP(next) == EXACTFL)
22c35a8c 3027 c2 = PL_fold_locale[c1];
c277df42
IZ
3028 else
3029 c2 = c1;
a0ed51b3
LW
3030 }
3031 else
c277df42 3032 c1 = c2 = -1000;
02db2b7b 3033 REGCP_SET(lastcp);
5f4b28b2 3034 /* This may be improved if l == 0. */
c277df42
IZ
3035 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3036 /* If it could work, try it. */
3037 if (c1 == -1000 ||
3280af22
NIS
3038 UCHARAT(PL_reginput) == c1 ||
3039 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3040 {
3041 if (paren) {
3042 if (n) {
cf93c79d
IZ
3043 PL_regstartp[paren] =
3044 HOPc(PL_reginput, -l) - PL_bostr;
3045 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3046 }
3047 else
cf93c79d 3048 PL_regendp[paren] = -1;
c277df42
IZ
3049 }
3050 if (regmatch(next))
3051 sayYES;
02db2b7b 3052 REGCP_UNWIND(lastcp);
c277df42
IZ
3053 }
3054 /* Couldn't or didn't -- move forward. */
3280af22 3055 PL_reginput = locinput;
c277df42
IZ
3056 if (regrepeat_hard(scan, 1, &l)) {
3057 ln++;
3280af22 3058 locinput = PL_reginput;
c277df42
IZ
3059 }
3060 else
3061 sayNO;
3062 }
a0ed51b3
LW
3063 }
3064 else {
c277df42
IZ
3065 n = regrepeat_hard(scan, n, &l);
3066 if (n != 0 && l == 0
3067 /* In fact, this is tricky. If paren, then the
3068 fact that we did/didnot match may influence
3069 future execution. */
3070 && !(paren && ln == 0))
3071 ln = n;
3280af22 3072 locinput = PL_reginput;
c277df42 3073 DEBUG_r(
5c0ca799 3074 PerlIO_printf(Perl_debug_log,
faccc32b 3075 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3076 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3077 (IV) n, (IV)l)
c277df42
IZ
3078 );
3079 if (n >= ln) {
22c35a8c 3080 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3081 c1 = (U8)*STRING(next);
c277df42 3082 if (OP(next) == EXACTF)
22c35a8c 3083 c2 = PL_fold[c1];
c277df42 3084 else if (OP(next) == EXACTFL)
22c35a8c 3085 c2 = PL_fold_locale[c1];
c277df42
IZ
3086 else
3087 c2 = c1;
a0ed51b3
LW
3088 }
3089 else
c277df42
IZ
3090 c1 = c2 = -1000;
3091 }
02db2b7b 3092 REGCP_SET(lastcp);
c277df42
IZ
3093 while (n >= ln) {
3094 /* If it could work, try it. */
3095 if (c1 == -1000 ||
3280af22
NIS
3096 UCHARAT(PL_reginput) == c1 ||
3097 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3098 {
3099 DEBUG_r(
c3464db5 3100 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3101 "%*s trying tail with n=%"IVdf"...\n",
3102 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3103 );
3104 if (paren) {
3105 if (n) {
cf93c79d
IZ
3106 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3107 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3108 }
a0ed51b3 3109 else
cf93c79d 3110 PL_regendp[paren] = -1;
c277df42 3111 }
a0ed51b3
LW
3112 if (regmatch(next))
3113 sayYES;
02db2b7b 3114 REGCP_UNWIND(lastcp);
a0ed51b3 3115 }
c277df42
IZ
3116 /* Couldn't or didn't -- back up. */
3117 n--;
dfe13c55 3118 locinput = HOPc(locinput, -l);
3280af22 3119 PL_reginput = locinput;
c277df42
IZ
3120 }
3121 }
3122 sayNO;
3123 break;
3124 }
3125 case CURLYN:
3126 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3127 if (paren > PL_regsize)
3128 PL_regsize = paren;
3129 if (paren > *PL_reglastparen)
3130 *PL_reglastparen = paren;
c277df42
IZ
3131 ln = ARG1(scan); /* min to match */
3132 n = ARG2(scan); /* max to match */
dc45a647 3133 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3134 goto repeat;
a0d0e21e 3135 case CURLY:
c277df42 3136 paren = 0;
a0d0e21e
LW
3137 ln = ARG1(scan); /* min to match */
3138 n = ARG2(scan); /* max to match */
dc45a647 3139 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3140 goto repeat;
3141 case STAR:
3142 ln = 0;
c277df42 3143 n = REG_INFTY;
a0d0e21e 3144 scan = NEXTOPER(scan);
c277df42 3145 paren = 0;
a0d0e21e
LW
3146 goto repeat;
3147 case PLUS:
c277df42
IZ
3148 ln = 1;
3149 n = REG_INFTY;
3150 scan = NEXTOPER(scan);
3151 paren = 0;
3152 repeat:
a0d0e21e
LW
3153 /*
3154 * Lookahead to avoid useless match attempts
3155 * when we know what character comes next.
3156 */
22c35a8c 3157 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3158 U8 *s = (U8*)STRING(next);
3159 if (!UTF) {
3160 c2 = c1 = *s;
3161 if (OP(next) == EXACTF)
3162 c2 = PL_fold[c1];
3163 else if (OP(next) == EXACTFL)
3164 c2 = PL_fold_locale[c1];
3165 }
3166 else { /* UTF */
3167 if (OP(next) == EXACTF) {
3168 c1 = to_utf8_lower(s);
3169 c2 = to_utf8_upper(s);
3170 }
3171 else {
9041c2e3 3172 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3173 }
3174 }
bbce6d69 3175 }
a0d0e21e 3176 else
bbce6d69 3177 c1 = c2 = -1000;
3280af22 3178 PL_reginput = locinput;
a0d0e21e 3179 if (minmod) {
c277df42 3180 CHECKPOINT lastcp;
a0d0e21e
LW
3181 minmod = 0;
3182 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3183 sayNO;
a0ed51b3 3184 locinput = PL_reginput;
02db2b7b 3185 REGCP_SET(lastcp);
0fe9bf95 3186 if (c1 != -1000) {
1aa99e6b 3187 char *e; /* Should not check after this */
0fe9bf95
IZ
3188 char *old = locinput;
3189
1aa99e6b 3190 if (n == REG_INFTY) {
0fe9bf95 3191 e = PL_regeol - 1;
1aa99e6b
IH
3192 if (do_utf8)
3193 while (UTF8_IS_CONTINUATION(*(U8*)e))
3194 e--;
3195 }
3196 else if (do_utf8) {
3197 int m = n - ln;
3198 for (e = locinput;
3199 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3200 e += UTF8SKIP(e);
3201 }
3202 else {
3203 e = locinput + n - ln;
3204 if (e >= PL_regeol)
3205 e = PL_regeol - 1;
3206 }
0fe9bf95 3207 while (1) {
1aa99e6b 3208 int count;
0fe9bf95 3209 /* Find place 'next' could work */
1aa99e6b
IH
3210 if (!do_utf8) {
3211 if (c1 == c2) {
3212 while (locinput <= e && *locinput != c1)
3213 locinput++;
3214 } else {
9041c2e3 3215 while (locinput <= e
1aa99e6b
IH
3216 && *locinput != c1
3217 && *locinput != c2)
3218 locinput++;
3219 }
3220 count = locinput - old;
3221 }
3222 else {
3223 STRLEN len;
3224 if (c1 == c2) {
3225 for (count = 0;
3226 locinput <= e &&
9041c2e3 3227 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3228 count++)
3229 locinput += len;
3230
3231 } else {
3232 for (count = 0; locinput <= e; count++) {
9041c2e3 3233 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3234 if (c == c1 || c == c2)
3235 break;
9041c2e3 3236 locinput += len;
1aa99e6b
IH
3237 }
3238 }
0fe9bf95 3239 }
9041c2e3 3240 if (locinput > e)
0fe9bf95
IZ
3241 sayNO;
3242 /* PL_reginput == old now */
3243 if (locinput != old) {
3244 ln = 1; /* Did some */
1aa99e6b 3245 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3246 sayNO;
3247 }
3248 /* PL_reginput == locinput now */
29d1e993 3249 TRYPAREN(paren, ln, locinput);
0fe9bf95 3250 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3251 REGCP_UNWIND(lastcp);
0fe9bf95 3252 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3253 old = locinput;
3254 if (do_utf8)
3255 locinput += UTF8SKIP(locinput);
3256 else
3257 locinput++;
0fe9bf95
IZ
3258 }
3259 }
3260 else
c277df42 3261 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3262 UV c;
3263 if (c1 != -1000) {
3264 if (do_utf8)
9041c2e3 3265 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3266 else
9041c2e3 3267 c = UCHARAT(PL_reginput);
2390ecbc
PP
3268 /* If it could work, try it. */
3269 if (c == c1 || c == c2)
3270 {
3271 TRYPAREN(paren, n, PL_reginput);
3272 REGCP_UNWIND(lastcp);
3273 }
1aa99e6b 3274 }
a0d0e21e 3275 /* If it could work, try it. */
2390ecbc 3276 else if (c1 == -1000)
bbce6d69 3277 {
29d1e993 3278 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3279 REGCP_UNWIND(lastcp);
bbce6d69 3280 }
c277df42 3281 /* Couldn't or didn't -- move forward. */
a0ed51b3 3282 PL_reginput = locinput;
a0d0e21e
LW
3283 if (regrepeat(scan, 1)) {
3284 ln++;
a0ed51b3
LW
3285 locinput = PL_reginput;
3286 }
3287 else
4633a7c4 3288 sayNO;
a0d0e21e
LW
3289 }
3290 }
3291 else {
c277df42 3292 CHECKPOINT lastcp;
a0d0e21e 3293 n = regrepeat(scan, n);
a0ed51b3 3294 locinput = PL_reginput;
22c35a8c 3295 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3296 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3297 ln = n; /* why back off? */
1aeab75a
GS
3298 /* ...because $ and \Z can match before *and* after
3299 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3300 We should back off by one in this case. */
3301 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3302 ln--;
3303 }
02db2b7b 3304 REGCP_SET(lastcp);
c277df42 3305 if (paren) {
8fa7f367 3306 UV c = 0;
c277df42 3307 while (n >= ln) {
1aa99e6b
IH
3308 if (c1 != -1000) {
3309 if (do_utf8)
9041c2e3 3310 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3311 else
9041c2e3 3312 c = UCHARAT(PL_reginput);
1aa99e6b 3313 }
c277df42 3314 /* If it could work, try it. */
1aa99e6b 3315 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3316 {
29d1e993 3317 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3318 REGCP_UNWIND(lastcp);
c277df42
IZ
3319 }
3320 /* Couldn't or didn't -- back up. */
3321 n--;
dfe13c55 3322 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3323 }
a0ed51b3
LW
3324 }
3325 else {
8fa7f367 3326 UV c = 0;
c277df42 3327 while (n >= ln) {
1aa99e6b
IH
3328 if (c1 != -1000) {
3329 if (do_utf8)
9041c2e3 3330 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3331 else
9041c2e3 3332 c = UCHARAT(PL_reginput);
1aa99e6b 3333 }
c277df42 3334 /* If it could work, try it. */
1aa99e6b 3335 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3336 {
29d1e993 3337 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3338 REGCP_UNWIND(lastcp);
c277df42
IZ
3339 }
3340 /* Couldn't or didn't -- back up. */
3341 n--;
dfe13c55 3342 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3343 }
a0d0e21e
LW
3344 }
3345 }
4633a7c4 3346 sayNO;
c277df42 3347 break;
a0d0e21e 3348 case END:
0f5d15d6
IZ
3349 if (PL_reg_call_cc) {
3350 re_cc_state *cur_call_cc = PL_reg_call_cc;
3351 CURCUR *cctmp = PL_regcc;
3352 regexp *re = PL_reg_re;
3353 CHECKPOINT cp, lastcp;
3354
3355 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3356 REGCP_SET(lastcp);
0f5d15d6
IZ
3357 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3358 the caller. */
3359 PL_reginput = locinput; /* Make position available to
3360 the callcc. */
3361 cache_re(PL_reg_call_cc->re);
3362 PL_regcc = PL_reg_call_cc->cc;
3363 PL_reg_call_cc = PL_reg_call_cc->prev;
3364 if (regmatch(cur_call_cc->node)) {
3365 PL_reg_call_cc = cur_call_cc;
3366 regcpblow(cp);
3367 sayYES;
3368 }
02db2b7b 3369 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3370 regcppop();
3371 PL_reg_call_cc = cur_call_cc;
3372 PL_regcc = cctmp;
3373 PL_reg_re = re;
3374 cache_re(re);
3375
3376 DEBUG_r(
3377 PerlIO_printf(Perl_debug_log,
3378 "%*s continuation failed...\n",
3379 REPORT_CODE_OFF+PL_regindent*2, "")
3380 );
7821416a 3381 sayNO_SILENT;
0f5d15d6 3382 }
7821416a
IZ
3383 if (locinput < PL_regtill) {
3384 DEBUG_r(PerlIO_printf(Perl_debug_log,
3385 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3386 PL_colors[4],
3387 (long)(locinput - PL_reg_starttry),
3388 (long)(PL_regtill - PL_reg_starttry),
3389 PL_colors[5]));
3390 sayNO_FINAL; /* Cannot match: too short. */
3391 }
3392 PL_reginput = locinput; /* put where regtry can find it */
3393 sayYES_FINAL; /* Success! */
7e5428c5 3394 case SUCCEED:
3280af22 3395 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3396 sayYES_LOUD; /* Success! */
c277df42
IZ
3397 case SUSPEND:
3398 n = 1;
9fe1d20c 3399 PL_reginput = locinput;
9041c2e3 3400 goto do_ifmatch;
a0d0e21e 3401 case UNLESSM:
c277df42 3402 n = 0;
a0ed51b3 3403 if (scan->flags) {
efb30f32
HS
3404 s = HOPBACKc(locinput, scan->flags);
3405 if (!s)
3406 goto say_yes;
3407 PL_reginput = s;
a0ed51b3
LW
3408 }
3409 else
3410 PL_reginput = locinput;
c277df42
IZ
3411 goto do_ifmatch;
3412 case IFMATCH:
3413 n = 1;
a0ed51b3 3414 if (scan->flags) {
efb30f32
HS
3415 s = HOPBACKc(locinput, scan->flags);
3416 if (!s)
3417 goto say_no;
3418 PL_reginput = s;
a0ed51b3
LW
3419 }
3420 else
3421 PL_reginput = locinput;
3422
c277df42 3423 do_ifmatch:
c277df42
IZ
3424 inner = NEXTOPER(NEXTOPER(scan));
3425 if (regmatch(inner) != n) {
3426 say_no:
3427 if (logical) {
3428 logical = 0;
3429 sw = 0;
3430 goto do_longjump;
a0ed51b3
LW
3431 }
3432 else
c277df42
IZ
3433 sayNO;
3434 }
3435 say_yes:
3436 if (logical) {
3437 logical = 0;
3438 sw = 1;
3439 }
fe44a5e8 3440 if (OP(scan) == SUSPEND) {
3280af22 3441 locinput = PL_reginput;
565764a8 3442 nextchr = UCHARAT(locinput);
fe44a5e8 3443 }
c277df42
IZ
3444 /* FALL THROUGH. */
3445 case LONGJMP:
3446 do_longjump:
3447 next = scan + ARG(scan);
3448 if (next == scan)
3449 next = NULL;
a0d0e21e
LW
3450 break;
3451 default:
b900a521 3452 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3453 PTR2UV(scan), OP(scan));
cea2e8a9 3454 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3455 }
02db2b7b 3456 reenter:
a0d0e21e
LW
3457 scan = next;
3458 }
a687059c 3459
a0d0e21e
LW
3460 /*
3461 * We get here only if there's trouble -- normally "case END" is
3462 * the terminating point.
3463 */
cea2e8a9 3464 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3465 /*NOTREACHED*/
4633a7c4
LW
3466 sayNO;
3467
7821416a
IZ
3468yes_loud:
3469 DEBUG_r(
3470 PerlIO_printf(Perl_debug_log,
3471 "%*s %scould match...%s\n",
3472 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3473 );
3474 goto yes;
3475yes_final:
3476 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3477 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3478yes:
3479#ifdef DEBUGGING
3280af22 3480 PL_regindent--;
4633a7c4 3481#endif
02db2b7b
IZ
3482
3483#if 0 /* Breaks $^R */
3484 if (unwind)
3485 regcpblow(firstcp);
3486#endif
4633a7c4
LW
3487 return 1;
3488
3489no:
7821416a
IZ
3490 DEBUG_r(
3491 PerlIO_printf(Perl_debug_log,
3492 "%*s %sfailed...%s\n",
3493 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3494 );
3495 goto do_no;
3496no_final:
3497do_no:
02db2b7b
IZ
3498 if (unwind) {
3499 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3500
3501 switch (uw->type) {
3502 case RE_UNWIND_BRANCH:
3503 case RE_UNWIND_BRANCHJ:
3504 {
3505 re_unwind_branch_t *uwb = &(uw->branch);
3506 I32 lastparen = uwb->lastparen;
9041c2e3 3507
02db2b7b
IZ
3508 REGCP_UNWIND(uwb->lastcp);
3509 for (n = *PL_reglastparen; n > lastparen; n--)
3510 PL_regendp[n] = -1;
3511 *PL_reglastparen = n;
3512 scan = next = uwb->next;
9041c2e3
NIS
3513 if ( !scan ||
3514 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3515 ? BRANCH : BRANCHJ) ) { /* Failure */
3516 unwind = uwb->prev;
3517#ifdef DEBUGGING
3518 PL_regindent--;
3519#endif
3520 goto do_no;
3521 }
3522 /* Have more choice yet. Reuse the same uwb. */
3523 /*SUPPRESS 560*/
3524 if ((n = (uwb->type == RE_UNWIND_BRANCH
3525 ? NEXT_OFF(next) : ARG(next))))
3526 next += n;
3527 else
3528 next = NULL; /* XXXX Needn't unwinding in this case... */
3529 uwb->next = next;
3530 next = NEXTOPER(scan);
3531 if (uwb->type == RE_UNWIND_BRANCHJ)
3532 next = NEXTOPER(next);
3533 locinput = uwb->locinput;
3534 nextchr = uwb->nextchr;
3535#ifdef DEBUGGING
3536 PL_regindent = uwb->regindent;
3537#endif
3538
3539 goto reenter;
3540 }
3541 /* NOT REACHED */
3542 default:
3543 Perl_croak(aTHX_ "regexp unwind memory corruption");
3544 }
3545 /* NOT REACHED */
3546 }
4633a7c4 3547#ifdef DEBUGGING
3280af22 3548 PL_regindent--;
4633a7c4 3549#endif
a0d0e21e 3550 return 0;
a687059c
LW
3551}
3552
3553/*
3554 - regrepeat - repeatedly match something simple, report how many
3555 */
3556/*
3557 * [This routine now assumes that it will only match on things of length 1.
3558 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3559 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3560 */
76e3520e 3561STATIC I32
cea2e8a9 3562S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3563{
a0d0e21e 3564 register char *scan;
a0d0e21e 3565 register I32 c;
3280af22 3566 register char *loceol = PL_regeol;
a0ed51b3 3567 register I32 hardcount = 0;
ffc61ed2 3568 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3569
3280af22 3570 scan = PL_reginput;
c277df42 3571 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3572 loceol = scan + max;
a0d0e21e 3573 switch (OP(p)) {
22c35a8c 3574 case REG_ANY:
1aa99e6b 3575 if (do_utf8) {
ffc61ed2 3576 loceol = PL_regeol;
1aa99e6b 3577 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3578 scan += UTF8SKIP(scan);
3579 hardcount++;
3580 }
3581 } else {
3582 while (scan < loceol && *scan != '\n')
3583 scan++;
a0ed51b3
LW
3584 }
3585 break;
ffc61ed2 3586 case SANY:
3baa4c62 3587 scan = loceol;
a0ed51b3 3588 break;
bbce6d69 3589 case EXACT: /* length of string is 1 */
cd439c50 3590 c = (U8)*STRING(p);
bbce6d69 3591 while (scan < loceol && UCHARAT(scan) == c)
3592 scan++;
3593 break;
3594 case EXACTF: /* length of string is 1 */
cd439c50 3595 c = (U8)*STRING(p);
bbce6d69 3596 while (scan < loceol &&
22c35a8c 3597 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3598 scan++;
3599 break;
3600 case EXACTFL: /* length of string is 1 */
3280af22 3601 PL_reg_flags |= RF_tainted;
cd439c50 3602 c = (U8)*STRING(p);
bbce6d69 3603 while (scan < loceol &&
22c35a8c 3604 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3605 scan++;
3606 break;
3607 case ANYOF:
ffc61ed2
JH
3608 if (do_utf8) {
3609 loceol = PL_regeol;
1aa99e6b
IH
3610 while (hardcount < max && scan < loceol &&
3611 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3612 scan += UTF8SKIP(scan);
3613 hardcount++;
3614 }
3615 } else {
3616 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3617 scan++;
3618 }
a0d0e21e
LW
3619 break;
3620 case ALNUM:
1aa99e6b 3621 if (do_utf8) {
ffc61ed2 3622 loceol = PL_regeol;
8269fa76 3623 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3624 while (hardcount < max && scan < loceol &&
3568d838 3625 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3626 scan += UTF8SKIP(scan);
3627 hardcount++;
3628 }
3629 } else {
3630 while (scan < loceol && isALNUM(*scan))
3631 scan++;
a0ed51b3
LW
3632 }
3633 break;
bbce6d69 3634 case ALNUML:
3280af22 3635 PL_reg_flags |= RF_tainted;
1aa99e6b 3636 if (do_utf8) {
ffc61ed2 3637 loceol = PL_regeol;
1aa99e6b
IH
3638 while (hardcount < max && scan < loceol &&
3639 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3640 scan += UTF8SKIP(scan);
3641 hardcount++;
3642 }
3643 } else {
3644 while (scan < loceol && isALNUM_LC(*scan))
3645 scan++;
a0ed51b3
LW
3646 }
3647 break;
a0d0e21e 3648 case NALNUM:
1aa99e6b 3649 if (do_utf8) {
ffc61ed2 3650 loceol = PL_regeol;
8269fa76 3651 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3652 while (hardcount < max && scan < loceol &&
3568d838 3653 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3654 scan += UTF8SKIP(scan);
3655 hardcount++;
3656 }
3657 } else {
3658 while (scan < loceol && !isALNUM(*scan))
3659 scan++;
a0ed51b3
LW
3660 }
3661 break;
bbce6d69 3662 case NALNUML:
3280af22 3663 PL_reg_flags |= RF_tainted;
1aa99e6b 3664 if (do_utf8) {
ffc61ed2 3665 loceol = PL_regeol;
1aa99e6b
IH
3666 while (hardcount < max && scan < loceol &&
3667 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3668 scan += UTF8SKIP(scan);
3669 hardcount++;
3670 }
3671 } else {
3672 while (scan < loceol && !isALNUM_LC(*scan))
3673 scan++;
a0ed51b3
LW
3674 }
3675 break;
a0d0e21e 3676 case SPACE:
1aa99e6b 3677 if (do_utf8) {
ffc61ed2 3678 loceol = PL_regeol;
8269fa76 3679 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3680 while (hardcount < max && scan < loceol &&
3568d838
JH
3681 (*scan == ' ' ||
3682 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3683 scan += UTF8SKIP(scan);
3684 hardcount++;
3685 }
3686 } else {
3687 while (scan < loceol && isSPACE(*scan))
3688 scan++;
a0ed51b3
LW
3689 }
3690 break;
bbce6d69 3691 case SPACEL:
3280af22 3692 PL_reg_flags |= RF_tainted;
1aa99e6b 3693 if (do_utf8) {
ffc61ed2 3694 loceol = PL_regeol;
1aa99e6b 3695 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3696 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3697 scan += UTF8SKIP(scan);
3698 hardcount++;
3699 }
3700 } else {
3701 while (scan < loceol && isSPACE_LC(*scan))
3702 scan++;
a0ed51b3
LW
3703 }
3704 break;
a0d0e21e 3705 case NSPACE:
1aa99e6b 3706 if (do_utf8) {
ffc61ed2 3707 loceol = PL_regeol;
8269fa76 3708 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3709 while (hardcount < max && scan < loceol &&
3568d838
JH
3710 !(*scan == ' ' ||
3711 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3712 scan += UTF8SKIP(scan);
3713 hardcount++;
3714 }
3715 } else {
3716 while (scan < loceol && !isSPACE(*scan))
3717 scan++;
3718 break;
a0ed51b3 3719 }
bbce6d69 3720 case NSPACEL:
3280af22 3721 PL_reg_flags |= RF_tainted;
1aa99e6b 3722 if (do_utf8) {
ffc61ed2 3723 loceol = PL_regeol;
1aa99e6b 3724 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3725 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3726 scan += UTF8SKIP(scan);
3727 hardcount++;
3728 }
3729 } else {
3730 while (scan < loceol && !isSPACE_LC(*scan))
3731 scan++;
a0ed51b3
LW
3732 }
3733 break;
a0d0e21e 3734 case DIGIT:
1aa99e6b 3735 if (do_utf8) {
ffc61ed2 3736 loceol = PL_regeol;
8269fa76 3737 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3738 while (hardcount < max && scan < loceol &&
3568d838 3739 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3740 scan += UTF8SKIP(scan);
3741 hardcount++;
3742 }
3743 } else {
3744 while (scan < loceol && isDIGIT(*scan))
3745 scan++;
a0ed51b3
LW
3746 }
3747 break;
a0d0e21e 3748 case NDIGIT:
1aa99e6b 3749 if (do_utf8) {
ffc61ed2 3750 loceol = PL_regeol;
8269fa76 3751 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3752 while (hardcount < max && scan < loceol &&
3568d838 3753 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3754 scan += UTF8SKIP(scan);
3755 hardcount++;
3756 }
3757 } else {
3758 while (scan < loceol && !isDIGIT(*scan))
3759 scan++;
a0ed51b3
LW
3760 }
3761 break;
a0d0e21e
LW
3762 default: /* Called on something of 0 width. */
3763 break; /* So match right here or not at all. */
3764 }
a687059c 3765
a0ed51b3
LW
3766 if (hardcount)
3767 c = hardcount;
3768 else
3769 c = scan - PL_reginput;
3280af22 3770 PL_reginput = scan;
a687059c 3771
9041c2e3 3772 DEBUG_r(
c277df42
IZ
3773 {
3774 SV *prop = sv_newmortal();
3775
3776 regprop(prop, p);
9041c2e3
NIS
3777 PerlIO_printf(Perl_debug_log,
3778 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3779 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3780 });
9041c2e3 3781
a0d0e21e 3782 return(c);
a687059c
LW
3783}
3784
3785/*
c277df42 3786 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3787 *
c277df42
IZ
3788 * The repeater is supposed to have constant length.
3789 */
3790
76e3520e 3791STATIC I32
cea2e8a9 3792S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3793{
b7953727 3794 register char *scan = Nullch;
c277df42 3795 register char *start;
3280af22 3796 register char *loceol = PL_regeol;
a0ed51b3 3797 I32 l = 0;
708e3b05 3798 I32 count = 0, res = 1;
a0ed51b3
LW
3799
3800 if (!max)
3801 return 0;
c277df42 3802
3280af22 3803 start = PL_reginput;
1aa99e6b 3804 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3805 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3806 if (!count++) {
3807 l = 0;
3808 while (start < PL_reginput) {
3809 l++;
3810 start += UTF8SKIP(start);
3811 }
3812 *lp = l;
3813 if (l == 0)
3814 return max;
3815 }
3816 if (count == max)
3817 return count;
3818 }
3819 }
3820 else {
708e3b05 3821 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3822 if (!count++) {
3823 *lp = l = PL_reginput - start;
3824 if (max != REG_INFTY && l*max < loceol - scan)
3825 loceol = scan + l*max;
3826 if (l == 0)
3827 return max;
c277df42
IZ
3828 }
3829 }
3830 }
708e3b05 3831 if (!res)
3280af22 3832 PL_reginput = scan;
9041c2e3 3833
a0ed51b3 3834 return count;
c277df42
IZ
3835}
3836
3837/*
ffc61ed2
JH
3838- regclass_swash - prepare the utf8 swash
3839*/
3840
3841SV *
3842Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3843{
3844 SV *sw = NULL;
3845 SV *si = NULL;
3846
3847 if (PL_regdata && PL_regdata->count) {
3848 U32 n = ARG(node);
3849
3850 if (PL_regdata->what[n] == 's') {
3851 SV *rv = (SV*)PL_regdata->data[n];
3852 AV *av = (AV*)SvRV((SV*)rv);
3853 SV **a;
9041c2e3 3854
ffc61ed2
JH
3855 si = *av_fetch(av, 0, FALSE);
3856 a = av_fetch(av, 1, FALSE);
9041c2e3 3857
ffc61ed2
JH
3858 if (a)
3859 sw = *a;
3860 else if (si && doinit) {
3861 sw = swash_init("utf8", "", si, 1, 0);
3862 (void)av_store(av, 1, sw);
3863 }
3864 }
3865 }
3866
3867 if (initsvp)
3868 *initsvp = si;
3869
3870 return sw;
3871}
3872
3873/*
cb8d8820 3874 - reginclass - determine if a character falls into a character class
bbce6d69 3875 */
3876
76e3520e 3877STATIC bool
ffc61ed2 3878S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3879{
ffc61ed2 3880 char flags = ANYOF_FLAGS(n);
bbce6d69 3881 bool match = FALSE;
1aa99e6b 3882 UV c;
3568d838 3883 STRLEN len = 0;
1aa99e6b 3884
3568d838 3885 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3886
ffc61ed2
JH
3887 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3888 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3889 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3890 match = TRUE;
bbce6d69 3891 }
3568d838 3892 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3893 match = TRUE;
ffc61ed2
JH
3894 if (!match) {
3895 SV *sw = regclass_swash(n, TRUE, 0);
3896
3897 if (sw) {
3568d838 3898 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3899 match = TRUE;
3900 else if (flags & ANYOF_FOLD) {
3901 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3902
ffc61ed2
JH
3903 if (flags & ANYOF_LOCALE) {
3904 PL_reg_flags |= RF_tainted;
9041c2e3 3905 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3906 }
1aa99e6b 3907 else
9041c2e3 3908 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3909 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3910 match = TRUE;
3911 }
3912 }
bbce6d69 3913 }
3914 }
1aa99e6b 3915 if (!match && c < 256) {
ffc61ed2
JH
3916 if (ANYOF_BITMAP_TEST(n, c))
3917 match = TRUE;
3918 else if (flags & ANYOF_FOLD) {
3568d838 3919 I32 f;
a0ed51b3 3920
ffc61ed2
JH
3921 if (flags & ANYOF_LOCALE) {
3922 PL_reg_flags |= RF_tainted;
3923 f = PL_fold_locale[c];
3924 }
3925 else
3926 f = PL_fold[c];
3927 if (f != c && ANYOF_BITMAP_TEST(n, f))
3928 match = TRUE;
3929 }
3930
3931 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3932 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3933 if (
3934 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3943 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3944 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3945 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3946 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3947 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3948 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3949 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3950 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3951 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3952 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3953 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3954 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3955 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3956 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3957 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3958 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3959 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3960 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3961 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3962 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3963 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3964 ) /* How's that for a conditional? */
3965 {
3966 match = TRUE;
3967 }
a0ed51b3 3968 }
a0ed51b3
LW
3969 }
3970
a0ed51b3
LW
3971 return (flags & ANYOF_INVERT) ? !match : match;
3972}
161b471a 3973
dfe13c55 3974STATIC U8 *
cea2e8a9 3975S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3976{
1aa99e6b
IH
3977 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3978}
3979
3980STATIC U8 *
3981S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3982{
a0ed51b3 3983 if (off >= 0) {
1aa99e6b 3984 while (off-- && s < lim) {
ffc61ed2 3985 /* XXX could check well-formedness here */
a0ed51b3 3986 s += UTF8SKIP(s);
ffc61ed2 3987 }
a0ed51b3
LW
3988 }
3989 else {
3990 while (off++) {
1aa99e6b 3991 if (s > lim) {
a0ed51b3 3992 s--;
ffc61ed2 3993 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3994 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3995 s--;
ffc61ed2
JH
3996 }
3997 /* XXX could check well-formedness here */
a0ed51b3
LW
3998 }
3999 }
4000 }
4001 return s;
4002}
161b471a 4003
dfe13c55 4004STATIC U8 *
1aa99e6b 4005S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4006{
1aa99e6b
IH
4007 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4008}
4009
4010STATIC U8 *
4011S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4012{
4013 if (off >= 0) {
1aa99e6b 4014 while (off-- && s < lim) {
ffc61ed2 4015 /* XXX could check well-formedness here */
a0ed51b3 4016 s += UTF8SKIP(s);
ffc61ed2 4017 }
a0ed51b3
LW
4018 if (off >= 0)
4019 return 0;
4020 }
4021 else {
4022 while (off++) {
1aa99e6b 4023 if (s > lim) {
a0ed51b3 4024 s--;
ffc61ed2 4025 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4026 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4027 s--;
ffc61ed2
JH
4028 }
4029 /* XXX could check well-formedness here */
a0ed51b3
LW
4030 }
4031 else
4032 break;
4033 }
4034 if (off <= 0)
4035 return 0;
4036 }
4037 return s;
4038}
51371543
GS
4039
4040#ifdef PERL_OBJECT
51371543
GS
4041#include "XSUB.h"
4042#endif
4043
4044static void
4045restore_pos(pTHXo_ void *arg)
4046{
51371543
GS
4047 if (PL_reg_eval_set) {
4048 if (PL_reg_oldsaved) {
4049 PL_reg_re->subbeg = PL_reg_oldsaved;
4050 PL_reg_re->sublen = PL_reg_oldsavedlen;
4051 RX_MATCH_COPIED_on(PL_reg_re);
4052 }
4053 PL_reg_magic->mg_len = PL_reg_oldpos;
4054 PL_reg_eval_set = 0;
4055 PL_curpm = PL_reg_oldcurpm;
4056 }
4057}