This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
RE: Win32 Failure, was Re: perl@11278 - LAST CALL FOR 5.7.2
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
e49a9654
IH
147 if (paren_elems_to_push < 0)
148 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
149
a01268b5 150#define REGCP_OTHER_ELEMS 6
b1ce53c5 151 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 152 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 153/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
154 SSPUSHINT(PL_regendp[p]);
155 SSPUSHINT(PL_regstartp[p]);
3280af22 156 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
157 SSPUSHINT(p);
158 }
b1ce53c5 159/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
160 SSPUSHINT(PL_regsize);
161 SSPUSHINT(*PL_reglastparen);
a01268b5 162 SSPUSHINT(*PL_reglastcloseparen);
3280af22 163 SSPUSHPTR(PL_reginput);
41123dfd
JH
164#define REGCP_FRAME_ELEMS 2
165/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
166 * are needed for the regexp context stack bookkeeping. */
167 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 168 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 169
a0d0e21e
LW
170 return retval;
171}
172
c277df42 173/* These are needed since we do not localize EVAL nodes: */
02db2b7b 174# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 175 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 176 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 177
02db2b7b 178# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 179 PerlIO_printf(Perl_debug_log, \
faccc32b 180 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 181 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 182
76e3520e 183STATIC char *
cea2e8a9 184S_regcppop(pTHX)
a0d0e21e 185{
b1ce53c5 186 I32 i;
a0d0e21e
LW
187 U32 paren = 0;
188 char *input;
cf93c79d 189 I32 tmps;
b1ce53c5
JH
190
191 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 192 i = SSPOPINT;
b1ce53c5
JH
193 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
194 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 195 input = (char *) SSPOPPTR;
a01268b5 196 *PL_reglastcloseparen = SSPOPINT;
3280af22
NIS
197 *PL_reglastparen = SSPOPINT;
198 PL_regsize = SSPOPINT;
b1ce53c5
JH
199
200 /* Now restore the parentheses context. */
41123dfd
JH
201 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
202 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 203 paren = (U32)SSPOPINT;
3280af22 204 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
205 PL_regstartp[paren] = SSPOPINT;
206 tmps = SSPOPINT;
3280af22
NIS
207 if (paren <= *PL_reglastparen)
208 PL_regendp[paren] = tmps;
c277df42 209 DEBUG_r(
c3464db5 210 PerlIO_printf(Perl_debug_log,
b900a521 211 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 212 (UV)paren, (IV)PL_regstartp[paren],
b900a521 213 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 214 (IV)PL_regendp[paren],
3280af22 215 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 216 );
a0d0e21e 217 }
c277df42 218 DEBUG_r(
3280af22 219 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 220 PerlIO_printf(Perl_debug_log,
faccc32b
JH
221 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
222 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
223 }
224 );
daf18116 225#if 1
dafc8851
JH
226 /* It would seem that the similar code in regtry()
227 * already takes care of this, and in fact it is in
228 * a better location to since this code can #if 0-ed out
229 * but the code in regtry() is needed or otherwise tests
230 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
231 * (as of patchlevel 7877) will fail. Then again,
232 * this code seems to be necessary or otherwise
233 * building DynaLoader will fail:
234 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
235 * --jhi */
3280af22
NIS
236 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
237 if (paren > PL_regsize)
cf93c79d
IZ
238 PL_regstartp[paren] = -1;
239 PL_regendp[paren] = -1;
a0d0e21e 240 }
dafc8851 241#endif
a0d0e21e
LW
242 return input;
243}
244
0f5d15d6 245STATIC char *
cea2e8a9 246S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
247{
248 I32 tmp = PL_savestack_ix;
249
250 PL_savestack_ix = ss;
251 regcppop();
252 PL_savestack_ix = tmp;
942e002e 253 return Nullch;
0f5d15d6
IZ
254}
255
256typedef struct re_cc_state
257{
258 I32 ss;
259 regnode *node;
260 struct re_cc_state *prev;
261 CURCUR *cc;
262 regexp *re;
263} re_cc_state;
264
02db2b7b 265#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 266
29d1e993
HS
267#define TRYPAREN(paren, n, input) { \
268 if (paren) { \
269 if (n) { \
270 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
271 PL_regendp[paren] = input - PL_bostr; \
272 } \
273 else \
274 PL_regendp[paren] = -1; \
275 } \
276 if (regmatch(next)) \
277 sayYES; \
278 if (paren && n) \
279 PL_regendp[paren] = -1; \
280}
281
282
a687059c 283/*
e50aee73 284 * pregexec and friends
a687059c
LW
285 */
286
287/*
c277df42 288 - pregexec - match a regexp against a string
a687059c 289 */
c277df42 290I32
864dbfa3 291Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 292 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
293/* strend: pointer to null at end of string */
294/* strbeg: real beginning of string */
295/* minend: end of match must be >=minend after stringarg. */
296/* nosave: For optimizations. */
297{
298 return
9041c2e3 299 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
300 nosave ? 0 : REXEC_COPY_STR);
301}
0f5d15d6
IZ
302
303STATIC void
cea2e8a9 304S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
305{
306 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
307#ifdef DEBUGGING
308 PL_regprogram = prog->program;
309#endif
310 PL_regnpar = prog->nparens;
9041c2e3
NIS
311 PL_regdata = prog->data;
312 PL_reg_re = prog;
0f5d15d6 313}
22e551b9 314
9041c2e3 315/*
cad2e5aa
JH
316 * Need to implement the following flags for reg_anch:
317 *
318 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
319 * USE_INTUIT_ML
320 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
321 * INTUIT_AUTORITATIVE_ML
322 * INTUIT_ONCE_NOML - Intuit can match in one location only.
323 * INTUIT_ONCE_ML
324 *
325 * Another flag for this function: SECOND_TIME (so that float substrs
326 * with giant delta may be not rechecked).
327 */
328
329/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
330
2c2d71f5 331/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
332 Otherwise, only SvCUR(sv) is used to get strbeg. */
333
334/* XXXX We assume that strpos is strbeg unless sv. */
335
6eb5f6b9
JH
336/* XXXX Some places assume that there is a fixed substring.
337 An update may be needed if optimizer marks as "INTUITable"
338 RExen without fixed substrings. Similarly, it is assumed that
339 lengths of all the strings are no more than minlen, thus they
340 cannot come from lookahead.
341 (Or minlen should take into account lookahead.) */
342
2c2d71f5
JH
343/* A failure to find a constant substring means that there is no need to make
344 an expensive call to REx engine, thus we celebrate a failure. Similarly,
345 finding a substring too deep into the string means that less calls to
30944b6d
IZ
346 regtry() should be needed.
347
348 REx compiler's optimizer found 4 possible hints:
349 a) Anchored substring;
350 b) Fixed substring;
351 c) Whether we are anchored (beginning-of-line or \G);
352 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 353 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
354 string which does not contradict any of them.
355 */
2c2d71f5 356
6eb5f6b9
JH
357/* Most of decisions we do here should have been done at compile time.
358 The nodes of the REx which we used for the search should have been
359 deleted from the finite automaton. */
360
cad2e5aa
JH
361char *
362Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
363 char *strend, U32 flags, re_scream_pos_data *data)
364{
b7953727 365 register I32 start_shift = 0;
cad2e5aa 366 /* Should be nonnegative! */
b7953727 367 register I32 end_shift = 0;
2c2d71f5
JH
368 register char *s;
369 register SV *check;
a1933d95 370 char *strbeg;
cad2e5aa
JH
371 char *t;
372 I32 ml_anch;
6eb5f6b9 373 register char *other_last = Nullch; /* other substr checked before this */
b7953727 374 char *check_at = Nullch; /* check substr found at this pos */
30944b6d
IZ
375#ifdef DEBUGGING
376 char *i_strpos = strpos;
377#endif
cad2e5aa
JH
378
379 DEBUG_r( if (!PL_colorset) reginitcolors() );
380 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 381 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
382 PL_colors[4],PL_colors[5],PL_colors[0],
383 prog->precomp,
384 PL_colors[1],
385 (strlen(prog->precomp) > 60 ? "..." : ""),
386 PL_colors[0],
b900a521 387 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
388 strpos, PL_colors[1],
389 (strend - strpos > 60 ? "..." : ""))
390 );
391
1aa99e6b
IH
392 if (prog->reganch & ROPT_UTF8)
393 PL_reg_flags |= RF_utf8;
394
395 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 396 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 397 goto fail;
2c2d71f5 398 }
a1933d95 399 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 400 PL_regeol = strend;
653099ff 401 check = prog->check_substr;
2c2d71f5 402 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
403 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
404 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 405 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 406
7e25d62c
JH
407 if (!ml_anch) {
408 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
409 | ROPT_IMPLICIT)) /* not a real BOL */
410 /* SvCUR is not set on references: SvRV and SvPVX overlap */
411 && sv && !SvROK(sv)
412 && (strpos != strbeg)) {
413 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
414 goto fail;
415 }
416 if (prog->check_offset_min == prog->check_offset_max &&
417 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 418 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
419 I32 slen;
420
1aa99e6b 421 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
422 if (SvTAIL(check)) {
423 slen = SvCUR(check); /* >= 1 */
cad2e5aa 424
9041c2e3 425 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
426 || (strend - s == slen && strend[-1] != '\n')) {
427 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
428 goto fail_finish;
cad2e5aa
JH
429 }
430 /* Now should match s[0..slen-2] */
431 slen--;
653099ff 432 if (slen && (*SvPVX(check) != *s
cad2e5aa 433 || (slen > 1
653099ff 434 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
435 report_neq:
436 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
437 goto fail_finish;
438 }
cad2e5aa 439 }
653099ff
GS
440 else if (*SvPVX(check) != *s
441 || ((slen = SvCUR(check)) > 1
442 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
443 goto report_neq;
444 goto success_at_start;
7e25d62c 445 }
cad2e5aa 446 }
2c2d71f5 447 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 448 s = strpos;
2c2d71f5 449 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 450 end_shift = prog->minlen - start_shift -
653099ff 451 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 452 if (!ml_anch) {
653099ff
GS
453 I32 end = prog->check_offset_max + CHR_SVLEN(check)
454 - (SvTAIL(check) != 0);
1aa99e6b 455 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
456
457 if (end_shift < eshift)
458 end_shift = eshift;
459 }
cad2e5aa 460 }
2c2d71f5 461 else { /* Can match at random position */
cad2e5aa
JH
462 ml_anch = 0;
463 s = strpos;
2c2d71f5
JH
464 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
465 /* Should be nonnegative! */
466 end_shift = prog->minlen - start_shift -
653099ff 467 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
468 }
469
2c2d71f5 470#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 471 if (end_shift < 0)
6bbae5e6 472 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
473#endif
474
2c2d71f5
JH
475 restart:
476 /* Find a possible match in the region s..strend by looking for
477 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 478 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
479 I32 p = -1; /* Internal iterator of scream. */
480 I32 *pp = data ? data->scream_pos : &p;
481
2c2d71f5
JH
482 if (PL_screamfirst[BmRARE(check)] >= 0
483 || ( BmRARE(check) == '\n'
484 && (BmPREVIOUS(check) == SvCUR(check) - 1)
485 && SvTAIL(check) ))
9041c2e3 486 s = screaminstr(sv, check,
2c2d71f5 487 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 488 else
2c2d71f5 489 goto fail_finish;
cad2e5aa
JH
490 if (data)
491 *data->scream_olds = s;
492 }
3baa4c62
JH
493 else if (prog->reganch & ROPT_SANY_SEEN)
494 s = fbm_instr((U8*)(s + start_shift),
495 (U8*)(strend - end_shift),
496 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 497 else
1aa99e6b
IH
498 s = fbm_instr(HOP3(s, start_shift, strend),
499 HOP3(strend, -end_shift, strbeg),
2c2d71f5 500 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
501
502 /* Update the count-of-usability, remove useless subpatterns,
503 unshift s. */
2c2d71f5
JH
504
505 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
506 (s ? "Found" : "Did not find"),
507 ((check == prog->anchored_substr) ? "anchored" : "floating"),
508 PL_colors[0],
7b0972df
JH
509 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
510 SvPVX(check),
2c2d71f5
JH
511 PL_colors[1], (SvTAIL(check) ? "$" : ""),
512 (s ? " at offset " : "...\n") ) );
513
514 if (!s)
515 goto fail_finish;
516
6eb5f6b9
JH
517 check_at = s;
518
2c2d71f5 519 /* Finish the diagnostic message */
30944b6d 520 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
521
522 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
523 Start with the other substr.
524 XXXX no SCREAM optimization yet - and a very coarse implementation
525 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
526 *always* match. Probably should be marked during compile...
527 Probably it is right to do no SCREAM here...
528 */
529
530 if (prog->float_substr && prog->anchored_substr) {
30944b6d 531 /* Take into account the "other" substring. */
2c2d71f5
JH
532 /* XXXX May be hopelessly wrong for UTF... */
533 if (!other_last)
6eb5f6b9 534 other_last = strpos;
2c2d71f5 535 if (check == prog->float_substr) {
30944b6d
IZ
536 do_other_anchored:
537 {
1aa99e6b 538 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
539 char *s1 = s;
540
2c2d71f5
JH
541 t = s - prog->check_offset_max;
542 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
543 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 544 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 545 && t > strpos)))
30944b6d 546 /* EMPTY */;
2c2d71f5
JH
547 else
548 t = strpos;
1aa99e6b 549 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
550 if (t < other_last) /* These positions already checked */
551 t = other_last;
1aa99e6b 552 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
553 if (last < last1)
554 last1 = last;
555 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
556 /* On end-of-str: see comment below. */
557 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
558 HOP3(HOP3(last1, prog->anchored_offset, strend)
559 + SvCUR(prog->anchored_substr),
560 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
561 prog->anchored_substr,
562 PL_multiline ? FBMrf_MULTILINE : 0);
563 DEBUG_r(PerlIO_printf(Perl_debug_log,
564 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
565 (s ? "Found" : "Contradicts"),
566 PL_colors[0],
7b0972df
JH
567 (int)(SvCUR(prog->anchored_substr)
568 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
569 SvPVX(prog->anchored_substr),
570 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
571 if (!s) {
572 if (last1 >= last2) {
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", giving up...\n"));
575 goto fail_finish;
576 }
577 DEBUG_r(PerlIO_printf(Perl_debug_log,
578 ", trying floating at offset %ld...\n",
1aa99e6b
IH
579 (long)(HOP3c(s1, 1, strend) - i_strpos)));
580 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
581 s = HOP3c(last, 1, strend);
2c2d71f5
JH
582 goto restart;
583 }
584 else {
585 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 586 (long)(s - i_strpos)));
1aa99e6b
IH
587 t = HOP3c(s, -prog->anchored_offset, strbeg);
588 other_last = HOP3c(s, 1, strend);
30944b6d 589 s = s1;
2c2d71f5
JH
590 if (t == strpos)
591 goto try_at_start;
2c2d71f5
JH
592 goto try_at_offset;
593 }
30944b6d 594 }
2c2d71f5
JH
595 }
596 else { /* Take into account the floating substring. */
597 char *last, *last1;
598 char *s1 = s;
599
1aa99e6b
IH
600 t = HOP3c(s, -start_shift, strbeg);
601 last1 = last =
602 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
603 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
604 last = HOP3c(t, prog->float_max_offset, strend);
605 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
606 if (s < other_last)
607 s = other_last;
2c2d71f5
JH
608 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
609 /* fbm_instr() takes into account exact value of end-of-str
610 if the check is SvTAIL(ed). Since false positives are OK,
611 and end-of-str is not later than strend we are OK. */
612 s = fbm_instr((unsigned char*)s,
613 (unsigned char*)last + SvCUR(prog->float_substr)
614 - (SvTAIL(prog->float_substr)!=0),
615 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
616 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
617 (s ? "Found" : "Contradicts"),
618 PL_colors[0],
7b0972df
JH
619 (int)(SvCUR(prog->float_substr)
620 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
621 SvPVX(prog->float_substr),
622 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
623 if (!s) {
624 if (last1 == last) {
625 DEBUG_r(PerlIO_printf(Perl_debug_log,
626 ", giving up...\n"));
627 goto fail_finish;
628 }
629 DEBUG_r(PerlIO_printf(Perl_debug_log,
630 ", trying anchored starting at offset %ld...\n",
30944b6d 631 (long)(s1 + 1 - i_strpos)));
803ff556 632 other_last = last;
1aa99e6b 633 s = HOP3c(t, 1, strend);
2c2d71f5
JH
634 goto restart;
635 }
636 else {
637 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 638 (long)(s - i_strpos)));
803ff556 639 other_last = s; /* Fix this later. --Hugo */
30944b6d 640 s = s1;
2c2d71f5
JH
641 if (t == strpos)
642 goto try_at_start;
2c2d71f5
JH
643 goto try_at_offset;
644 }
645 }
cad2e5aa 646 }
2c2d71f5
JH
647
648 t = s - prog->check_offset_max;
2c2d71f5
JH
649 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
650 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
651 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
652 && t > strpos))) {
2c2d71f5
JH
653 /* Fixed substring is found far enough so that the match
654 cannot start at strpos. */
655 try_at_offset:
cad2e5aa 656 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
657 /* Eventually fbm_*() should handle this, but often
658 anchored_offset is not 0, so this check will not be wasted. */
659 /* XXXX In the code below we prefer to look for "^" even in
660 presence of anchored substrings. And we search even
661 beyond the found float position. These pessimizations
662 are historical artefacts only. */
663 find_anchor:
2c2d71f5 664 while (t < strend - prog->minlen) {
cad2e5aa 665 if (*t == '\n') {
4ee3650e 666 if (t < check_at - prog->check_offset_min) {
30944b6d 667 if (prog->anchored_substr) {
4ee3650e
GS
668 /* Since we moved from the found position,
669 we definitely contradict the found anchored
30944b6d
IZ
670 substr. Due to the above check we do not
671 contradict "check" substr.
672 Thus we can arrive here only if check substr
673 is float. Redo checking for "other"=="fixed".
674 */
9041c2e3 675 strpos = t + 1;
30944b6d
IZ
676 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
677 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
678 goto do_other_anchored;
679 }
4ee3650e
GS
680 /* We don't contradict the found floating substring. */
681 /* XXXX Why not check for STCLASS? */
cad2e5aa 682 s = t + 1;
2c2d71f5 683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 684 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
685 goto set_useful;
686 }
4ee3650e
GS
687 /* Position contradicts check-string */
688 /* XXXX probably better to look for check-string
689 than for "\n", so one should lower the limit for t? */
690 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 691 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 692 other_last = strpos = s = t + 1;
cad2e5aa
JH
693 goto restart;
694 }
695 t++;
696 }
2c2d71f5
JH
697 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
698 PL_colors[0],PL_colors[1]));
699 goto fail_finish;
cad2e5aa 700 }
f5952150
GS
701 else {
702 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
703 PL_colors[0],PL_colors[1]));
704 }
cad2e5aa
JH
705 s = t;
706 set_useful:
2c2d71f5 707 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
708 }
709 else {
f5952150 710 /* The found string does not prohibit matching at strpos,
2c2d71f5 711 - no optimization of calling REx engine can be performed,
f5952150
GS
712 unless it was an MBOL and we are not after MBOL,
713 or a future STCLASS check will fail this. */
2c2d71f5
JH
714 try_at_start:
715 /* Even in this situation we may use MBOL flag if strpos is offset
716 wrt the start of the string. */
05b4157f 717 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 718 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
719 /* May be due to an implicit anchor of m{.*foo} */
720 && !(prog->reganch & ROPT_IMPLICIT))
721 {
cad2e5aa
JH
722 t = strpos;
723 goto find_anchor;
724 }
30944b6d 725 DEBUG_r( if (ml_anch)
f5952150
GS
726 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
727 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 728 );
2c2d71f5 729 success_at_start:
30944b6d 730 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 731 && prog->check_substr /* Could be deleted already */
cad2e5aa 732 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
733 && prog->check_substr == prog->float_substr)
734 {
cad2e5aa 735 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 736 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
737 SvREFCNT_dec(prog->check_substr);
738 prog->check_substr = Nullsv; /* disable */
739 prog->float_substr = Nullsv; /* clear */
5e39e1e5 740 check = Nullsv; /* abort */
cad2e5aa 741 s = strpos;
3cf5c195
IZ
742 /* XXXX This is a remnant of the old implementation. It
743 looks wasteful, since now INTUIT can use many
6eb5f6b9 744 other heuristics. */
cad2e5aa
JH
745 prog->reganch &= ~RE_USE_INTUIT;
746 }
747 else
748 s = strpos;
749 }
750
6eb5f6b9
JH
751 /* Last resort... */
752 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
753 if (prog->regstclass) {
754 /* minlen == 0 is possible if regstclass is \b or \B,
755 and the fixed substr is ''$.
756 Since minlen is already taken into account, s+1 is before strend;
757 accidentally, minlen >= 1 guaranties no false positives at s + 1
758 even for \b or \B. But (minlen? 1 : 0) below assumes that
759 regstclass does not come from lookahead... */
760 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
761 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 762 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 763 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 764 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 765 : 1);
6eb5f6b9 766 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
767 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
768 : (prog->float_substr
769 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
770 cl_l, strend)
771 : strend);
a1933d95 772 char *startpos = strbeg;
6eb5f6b9
JH
773
774 t = s;
76384e4a 775 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 776 PL_regdata = prog->data;
76384e4a
GS
777 PL_bostr = startpos;
778 }
6eb5f6b9
JH
779 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
780 if (!s) {
781#ifdef DEBUGGING
b7953727 782 char *what = 0;
6eb5f6b9
JH
783#endif
784 if (endpos == strend) {
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Could not match STCLASS...\n") );
787 goto fail;
788 }
66e933ab
GS
789 DEBUG_r( PerlIO_printf(Perl_debug_log,
790 "This position contradicts STCLASS...\n") );
653099ff
GS
791 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
792 goto fail;
6eb5f6b9
JH
793 /* Contradict one of substrings */
794 if (prog->anchored_substr) {
6eb5f6b9
JH
795 if (prog->anchored_substr == check) {
796 DEBUG_r( what = "anchored" );
797 hop_and_restart:
1aa99e6b 798 s = HOP3c(t, 1, strend);
66e933ab
GS
799 if (s + start_shift + end_shift > strend) {
800 /* XXXX Should be taken into account earlier? */
801 DEBUG_r( PerlIO_printf(Perl_debug_log,
802 "Could not match STCLASS...\n") );
803 goto fail;
804 }
5e39e1e5
HS
805 if (!check)
806 goto giveup;
6eb5f6b9 807 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 808 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
809 what, (long)(s + start_shift - i_strpos)) );
810 goto restart;
811 }
66e933ab 812 /* Have both, check_string is floating */
6eb5f6b9
JH
813 if (t + start_shift >= check_at) /* Contradicts floating=check */
814 goto retry_floating_check;
815 /* Recheck anchored substring, but not floating... */
9041c2e3 816 s = check_at;
5e39e1e5
HS
817 if (!check)
818 goto giveup;
6eb5f6b9 819 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 820 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
821 (long)(other_last - i_strpos)) );
822 goto do_other_anchored;
823 }
60e71179
GS
824 /* Another way we could have checked stclass at the
825 current position only: */
826 if (ml_anch) {
827 s = t = t + 1;
5e39e1e5
HS
828 if (!check)
829 goto giveup;
60e71179 830 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
831 "Looking for /%s^%s/m starting at offset %ld...\n",
832 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 833 goto try_at_offset;
66e933ab 834 }
60e71179
GS
835 if (!prog->float_substr) /* Could have been deleted */
836 goto fail;
6eb5f6b9
JH
837 /* Check is floating subtring. */
838 retry_floating_check:
839 t = check_at - start_shift;
840 DEBUG_r( what = "floating" );
841 goto hop_and_restart;
842 }
b7953727
JH
843 if (t != s) {
844 DEBUG_r(PerlIO_printf(Perl_debug_log,
6eb5f6b9 845 "By STCLASS: moving %ld --> %ld\n",
b7953727
JH
846 (long)(t - i_strpos), (long)(s - i_strpos))
847 );
848 }
849 else {
850 DEBUG_r(PerlIO_printf(Perl_debug_log,
851 "Does not contradict STCLASS...\n");
852 );
853 }
6eb5f6b9 854 }
5e39e1e5
HS
855 giveup:
856 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
857 PL_colors[4], (check ? "Guessed" : "Giving up"),
858 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 859 return s;
2c2d71f5
JH
860
861 fail_finish: /* Substring not found */
66e933ab
GS
862 if (prog->check_substr) /* could be removed already */
863 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 864 fail:
2c2d71f5 865 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
866 PL_colors[4],PL_colors[5]));
867 return Nullch;
868}
9661b544 869
6eb5f6b9 870/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
871STATIC char *
872S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 873{
6eb5f6b9
JH
874 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
875 char *m;
d8093b23
G
876 STRLEN ln;
877 unsigned int c1;
878 unsigned int c2;
6eb5f6b9
JH
879 char *e;
880 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 881 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 882
6eb5f6b9
JH
883 /* We know what class it must start with. */
884 switch (OP(c)) {
6eb5f6b9 885 case ANYOF:
a0ed51b3 886 while (s < strend) {
ffc61ed2 887 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
888 if (tmp && (norun || regtry(prog, s)))
889 goto got_it;
890 else
891 tmp = doevery;
a0ed51b3 892 }
6eb5f6b9
JH
893 else
894 tmp = 1;
ffc61ed2 895 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 896 }
6eb5f6b9
JH
897 break;
898 case EXACTF:
899 m = STRING(c);
900 ln = STR_LEN(c);
1aa99e6b
IH
901 if (UTF) {
902 c1 = to_utf8_lower((U8*)m);
903 c2 = to_utf8_upper((U8*)m);
904 }
905 else {
906 c1 = *(U8*)m;
907 c2 = PL_fold[c1];
908 }
6eb5f6b9
JH
909 goto do_exactf;
910 case EXACTFL:
911 m = STRING(c);
912 ln = STR_LEN(c);
d8093b23 913 c1 = *(U8*)m;
6eb5f6b9
JH
914 c2 = PL_fold_locale[c1];
915 do_exactf:
916 e = strend - ln;
b3c9acc1 917
6eb5f6b9
JH
918 if (norun && e < s)
919 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
920
921 if (do_utf8) {
922 STRLEN len;
923 if (c1 == c2)
924 while (s <= e) {
9041c2e3 925 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
926 && regtry(prog, s) )
927 goto got_it;
928 s += len;
929 }
930 else
931 while (s <= e) {
9041c2e3 932 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
933 if ( (c == c1 || c == c2) && regtry(prog, s) )
934 goto got_it;
935 s += len;
936 }
937 }
938 else {
939 if (c1 == c2)
940 while (s <= e) {
941 if ( *(U8*)s == c1
942 && (ln == 1 || !(OP(c) == EXACTF
943 ? ibcmp(s, m, ln)
944 : ibcmp_locale(s, m, ln)))
945 && (norun || regtry(prog, s)) )
946 goto got_it;
947 s++;
948 }
949 else
950 while (s <= e) {
951 if ( (*(U8*)s == c1 || *(U8*)s == c2)
952 && (ln == 1 || !(OP(c) == EXACTF
953 ? ibcmp(s, m, ln)
954 : ibcmp_locale(s, m, ln)))
955 && (norun || regtry(prog, s)) )
956 goto got_it;
957 s++;
958 }
b3c9acc1
IZ
959 }
960 break;
bbce6d69 961 case BOUNDL:
3280af22 962 PL_reg_flags |= RF_tainted;
bbce6d69 963 /* FALL THROUGH */
a0d0e21e 964 case BOUND:
ffc61ed2 965 if (do_utf8) {
12d33761 966 if (s == PL_bostr)
ffc61ed2
JH
967 tmp = '\n';
968 else {
1aa99e6b 969 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 970
0064a8a9
JH
971 if (s > (char*)r)
972 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
973 }
974 tmp = ((OP(c) == BOUND ?
9041c2e3 975 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 976 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
977 while (s < strend) {
978 if (tmp == !(OP(c) == BOUND ?
3568d838 979 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
980 isALNUM_LC_utf8((U8*)s)))
981 {
982 tmp = !tmp;
983 if ((norun || regtry(prog, s)))
984 goto got_it;
985 }
986 s += UTF8SKIP(s);
a687059c 987 }
a0d0e21e 988 }
667bb95a 989 else {
12d33761 990 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
991 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
992 while (s < strend) {
993 if (tmp ==
994 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
995 tmp = !tmp;
996 if ((norun || regtry(prog, s)))
997 goto got_it;
998 }
999 s++;
a0ed51b3 1000 }
a0ed51b3 1001 }
6eb5f6b9 1002 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1003 goto got_it;
1004 break;
bbce6d69 1005 case NBOUNDL:
3280af22 1006 PL_reg_flags |= RF_tainted;
bbce6d69 1007 /* FALL THROUGH */
a0d0e21e 1008 case NBOUND:
ffc61ed2 1009 if (do_utf8) {
12d33761 1010 if (s == PL_bostr)
ffc61ed2
JH
1011 tmp = '\n';
1012 else {
1aa99e6b 1013 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1014
0064a8a9
JH
1015 if (s > (char*)r)
1016 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1017 }
1018 tmp = ((OP(c) == NBOUND ?
9041c2e3 1019 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1020 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1021 while (s < strend) {
1022 if (tmp == !(OP(c) == NBOUND ?
3568d838 1023 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1024 isALNUM_LC_utf8((U8*)s)))
1025 tmp = !tmp;
1026 else if ((norun || regtry(prog, s)))
1027 goto got_it;
1028 s += UTF8SKIP(s);
1029 }
a0d0e21e 1030 }
667bb95a 1031 else {
12d33761 1032 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1033 tmp = ((OP(c) == NBOUND ?
1034 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1035 while (s < strend) {
1036 if (tmp ==
1037 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1038 tmp = !tmp;
1039 else if ((norun || regtry(prog, s)))
1040 goto got_it;
1041 s++;
1042 }
a0ed51b3 1043 }
6eb5f6b9 1044 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1045 goto got_it;
1046 break;
a0d0e21e 1047 case ALNUM:
ffc61ed2 1048 if (do_utf8) {
8269fa76 1049 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1050 while (s < strend) {
3568d838 1051 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1052 if (tmp && (norun || regtry(prog, s)))
1053 goto got_it;
1054 else
1055 tmp = doevery;
1056 }
bbce6d69 1057 else
ffc61ed2
JH
1058 tmp = 1;
1059 s += UTF8SKIP(s);
bbce6d69 1060 }
bbce6d69 1061 }
ffc61ed2
JH
1062 else {
1063 while (s < strend) {
1064 if (isALNUM(*s)) {
1065 if (tmp && (norun || regtry(prog, s)))
1066 goto got_it;
1067 else
1068 tmp = doevery;
1069 }
a0ed51b3 1070 else
ffc61ed2
JH
1071 tmp = 1;
1072 s++;
a0ed51b3 1073 }
a0ed51b3
LW
1074 }
1075 break;
bbce6d69 1076 case ALNUML:
3280af22 1077 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1078 if (do_utf8) {
1079 while (s < strend) {
1080 if (isALNUM_LC_utf8((U8*)s)) {
1081 if (tmp && (norun || regtry(prog, s)))
1082 goto got_it;
1083 else
1084 tmp = doevery;
1085 }
a687059c 1086 else
ffc61ed2
JH
1087 tmp = 1;
1088 s += UTF8SKIP(s);
a0d0e21e 1089 }
a0d0e21e 1090 }
ffc61ed2
JH
1091 else {
1092 while (s < strend) {
1093 if (isALNUM_LC(*s)) {
1094 if (tmp && (norun || regtry(prog, s)))
1095 goto got_it;
1096 else
1097 tmp = doevery;
1098 }
a0ed51b3 1099 else
ffc61ed2
JH
1100 tmp = 1;
1101 s++;
a0ed51b3 1102 }
a0ed51b3
LW
1103 }
1104 break;
a0d0e21e 1105 case NALNUM:
ffc61ed2 1106 if (do_utf8) {
8269fa76 1107 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1108 while (s < strend) {
3568d838 1109 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1110 if (tmp && (norun || regtry(prog, s)))
1111 goto got_it;
1112 else
1113 tmp = doevery;
1114 }
bbce6d69 1115 else
ffc61ed2
JH
1116 tmp = 1;
1117 s += UTF8SKIP(s);
bbce6d69 1118 }
bbce6d69 1119 }
ffc61ed2
JH
1120 else {
1121 while (s < strend) {
1122 if (!isALNUM(*s)) {
1123 if (tmp && (norun || regtry(prog, s)))
1124 goto got_it;
1125 else
1126 tmp = doevery;
1127 }
a0ed51b3 1128 else
ffc61ed2
JH
1129 tmp = 1;
1130 s++;
a0ed51b3 1131 }
a0ed51b3
LW
1132 }
1133 break;
bbce6d69 1134 case NALNUML:
3280af22 1135 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1136 if (do_utf8) {
1137 while (s < strend) {
1138 if (!isALNUM_LC_utf8((U8*)s)) {
1139 if (tmp && (norun || regtry(prog, s)))
1140 goto got_it;
1141 else
1142 tmp = doevery;
1143 }
a687059c 1144 else
ffc61ed2
JH
1145 tmp = 1;
1146 s += UTF8SKIP(s);
a687059c 1147 }
a0d0e21e 1148 }
ffc61ed2
JH
1149 else {
1150 while (s < strend) {
1151 if (!isALNUM_LC(*s)) {
1152 if (tmp && (norun || regtry(prog, s)))
1153 goto got_it;
1154 else
1155 tmp = doevery;
1156 }
a0ed51b3 1157 else
ffc61ed2
JH
1158 tmp = 1;
1159 s++;
a0ed51b3 1160 }
a0ed51b3
LW
1161 }
1162 break;
a0d0e21e 1163 case SPACE:
ffc61ed2 1164 if (do_utf8) {
8269fa76 1165 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1166 while (s < strend) {
3568d838 1167 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1168 if (tmp && (norun || regtry(prog, s)))
1169 goto got_it;
1170 else
1171 tmp = doevery;
1172 }
a0d0e21e 1173 else
ffc61ed2
JH
1174 tmp = 1;
1175 s += UTF8SKIP(s);
2304df62 1176 }
a0d0e21e 1177 }
ffc61ed2
JH
1178 else {
1179 while (s < strend) {
1180 if (isSPACE(*s)) {
1181 if (tmp && (norun || regtry(prog, s)))
1182 goto got_it;
1183 else
1184 tmp = doevery;
1185 }
a0ed51b3 1186 else
ffc61ed2
JH
1187 tmp = 1;
1188 s++;
a0ed51b3 1189 }
a0ed51b3
LW
1190 }
1191 break;
bbce6d69 1192 case SPACEL:
3280af22 1193 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1194 if (do_utf8) {
1195 while (s < strend) {
1196 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1197 if (tmp && (norun || regtry(prog, s)))
1198 goto got_it;
1199 else
1200 tmp = doevery;
1201 }
bbce6d69 1202 else
ffc61ed2
JH
1203 tmp = 1;
1204 s += UTF8SKIP(s);
bbce6d69 1205 }
bbce6d69 1206 }
ffc61ed2
JH
1207 else {
1208 while (s < strend) {
1209 if (isSPACE_LC(*s)) {
1210 if (tmp && (norun || regtry(prog, s)))
1211 goto got_it;
1212 else
1213 tmp = doevery;
1214 }
a0ed51b3 1215 else
ffc61ed2
JH
1216 tmp = 1;
1217 s++;
a0ed51b3 1218 }
a0ed51b3
LW
1219 }
1220 break;
a0d0e21e 1221 case NSPACE:
ffc61ed2 1222 if (do_utf8) {
8269fa76 1223 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1224 while (s < strend) {
3568d838 1225 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1226 if (tmp && (norun || regtry(prog, s)))
1227 goto got_it;
1228 else
1229 tmp = doevery;
1230 }
a0d0e21e 1231 else
ffc61ed2
JH
1232 tmp = 1;
1233 s += UTF8SKIP(s);
a687059c 1234 }
a0d0e21e 1235 }
ffc61ed2
JH
1236 else {
1237 while (s < strend) {
1238 if (!isSPACE(*s)) {
1239 if (tmp && (norun || regtry(prog, s)))
1240 goto got_it;
1241 else
1242 tmp = doevery;
1243 }
a0ed51b3 1244 else
ffc61ed2
JH
1245 tmp = 1;
1246 s++;
a0ed51b3 1247 }
a0ed51b3
LW
1248 }
1249 break;
bbce6d69 1250 case NSPACEL:
3280af22 1251 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1252 if (do_utf8) {
1253 while (s < strend) {
1254 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1255 if (tmp && (norun || regtry(prog, s)))
1256 goto got_it;
1257 else
1258 tmp = doevery;
1259 }
bbce6d69 1260 else
ffc61ed2
JH
1261 tmp = 1;
1262 s += UTF8SKIP(s);
bbce6d69 1263 }
bbce6d69 1264 }
ffc61ed2
JH
1265 else {
1266 while (s < strend) {
1267 if (!isSPACE_LC(*s)) {
1268 if (tmp && (norun || regtry(prog, s)))
1269 goto got_it;
1270 else
1271 tmp = doevery;
1272 }
a0ed51b3 1273 else
ffc61ed2
JH
1274 tmp = 1;
1275 s++;
a0ed51b3 1276 }
a0ed51b3
LW
1277 }
1278 break;
a0d0e21e 1279 case DIGIT:
ffc61ed2 1280 if (do_utf8) {
8269fa76 1281 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1282 while (s < strend) {
3568d838 1283 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1284 if (tmp && (norun || regtry(prog, s)))
1285 goto got_it;
1286 else
1287 tmp = doevery;
1288 }
a0d0e21e 1289 else
ffc61ed2
JH
1290 tmp = 1;
1291 s += UTF8SKIP(s);
2b69d0c2 1292 }
a0d0e21e 1293 }
ffc61ed2
JH
1294 else {
1295 while (s < strend) {
1296 if (isDIGIT(*s)) {
1297 if (tmp && (norun || regtry(prog, s)))
1298 goto got_it;
1299 else
1300 tmp = doevery;
1301 }
a0ed51b3 1302 else
ffc61ed2
JH
1303 tmp = 1;
1304 s++;
a0ed51b3 1305 }
a0ed51b3
LW
1306 }
1307 break;
b8c5462f
JH
1308 case DIGITL:
1309 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1310 if (do_utf8) {
1311 while (s < strend) {
1312 if (isDIGIT_LC_utf8((U8*)s)) {
1313 if (tmp && (norun || regtry(prog, s)))
1314 goto got_it;
1315 else
1316 tmp = doevery;
1317 }
b8c5462f 1318 else
ffc61ed2
JH
1319 tmp = 1;
1320 s += UTF8SKIP(s);
b8c5462f 1321 }
b8c5462f 1322 }
ffc61ed2
JH
1323 else {
1324 while (s < strend) {
1325 if (isDIGIT_LC(*s)) {
1326 if (tmp && (norun || regtry(prog, s)))
1327 goto got_it;
1328 else
1329 tmp = doevery;
1330 }
b8c5462f 1331 else
ffc61ed2
JH
1332 tmp = 1;
1333 s++;
b8c5462f 1334 }
b8c5462f
JH
1335 }
1336 break;
a0d0e21e 1337 case NDIGIT:
ffc61ed2 1338 if (do_utf8) {
8269fa76 1339 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1340 while (s < strend) {
3568d838 1341 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1342 if (tmp && (norun || regtry(prog, s)))
1343 goto got_it;
1344 else
1345 tmp = doevery;
1346 }
a0d0e21e 1347 else
ffc61ed2
JH
1348 tmp = 1;
1349 s += UTF8SKIP(s);
a687059c 1350 }
a0d0e21e 1351 }
ffc61ed2
JH
1352 else {
1353 while (s < strend) {
1354 if (!isDIGIT(*s)) {
1355 if (tmp && (norun || regtry(prog, s)))
1356 goto got_it;
1357 else
1358 tmp = doevery;
1359 }
a0ed51b3 1360 else
ffc61ed2
JH
1361 tmp = 1;
1362 s++;
a0ed51b3 1363 }
a0ed51b3
LW
1364 }
1365 break;
b8c5462f
JH
1366 case NDIGITL:
1367 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1368 if (do_utf8) {
1369 while (s < strend) {
1370 if (!isDIGIT_LC_utf8((U8*)s)) {
1371 if (tmp && (norun || regtry(prog, s)))
1372 goto got_it;
1373 else
1374 tmp = doevery;
1375 }
b8c5462f 1376 else
ffc61ed2
JH
1377 tmp = 1;
1378 s += UTF8SKIP(s);
b8c5462f 1379 }
a0ed51b3 1380 }
ffc61ed2
JH
1381 else {
1382 while (s < strend) {
1383 if (!isDIGIT_LC(*s)) {
1384 if (tmp && (norun || regtry(prog, s)))
1385 goto got_it;
1386 else
1387 tmp = doevery;
1388 }
cf93c79d 1389 else
ffc61ed2
JH
1390 tmp = 1;
1391 s++;
b8c5462f 1392 }
b8c5462f
JH
1393 }
1394 break;
b3c9acc1 1395 default:
3c3eec57
GS
1396 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1397 break;
d6a28714 1398 }
6eb5f6b9
JH
1399 return 0;
1400 got_it:
1401 return s;
1402}
1403
1404/*
1405 - regexec_flags - match a regexp against a string
1406 */
1407I32
1408Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1409 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1410/* strend: pointer to null at end of string */
1411/* strbeg: real beginning of string */
1412/* minend: end of match must be >=minend after stringarg. */
1413/* data: May be used for some additional optimizations. */
1414/* nosave: For optimizations. */
1415{
6eb5f6b9
JH
1416 register char *s;
1417 register regnode *c;
1418 register char *startpos = stringarg;
6eb5f6b9
JH
1419 I32 minlen; /* must match at least this many chars */
1420 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1421 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1422 constant substr. */ /* CC */
1423 I32 end_shift = 0; /* Same for the end. */ /* CC */
1424 I32 scream_pos = -1; /* Internal iterator of scream. */
1425 char *scream_olds;
1426 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1427 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1428
1429 PL_regcc = 0;
1430
1431 cache_re(prog);
1432#ifdef DEBUGGING
aea4f609 1433 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1434#endif
1435
1436 /* Be paranoid... */
1437 if (prog == NULL || startpos == NULL) {
1438 Perl_croak(aTHX_ "NULL regexp parameter");
1439 return 0;
1440 }
1441
1442 minlen = prog->minlen;
1aa99e6b 1443 if (do_utf8) {
3baa4c62
JH
1444 if (!(prog->reganch & ROPT_SANY_SEEN))
1445 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1446 }
1447 else {
1448 if (strend - startpos < minlen) goto phooey;
1449 }
6eb5f6b9 1450
6eb5f6b9
JH
1451 /* Check validity of program. */
1452 if (UCHARAT(prog->program) != REG_MAGIC) {
1453 Perl_croak(aTHX_ "corrupted regexp program");
1454 }
1455
1456 PL_reg_flags = 0;
1457 PL_reg_eval_set = 0;
1458 PL_reg_maxiter = 0;
1459
1460 if (prog->reganch & ROPT_UTF8)
1461 PL_reg_flags |= RF_utf8;
1462
1463 /* Mark beginning of line for ^ and lookbehind. */
1464 PL_regbol = startpos;
1465 PL_bostr = strbeg;
1466 PL_reg_sv = sv;
1467
1468 /* Mark end of line for $ (and such) */
1469 PL_regeol = strend;
1470
1471 /* see how far we have to get to not match where we matched before */
1472 PL_regtill = startpos+minend;
1473
1474 /* We start without call_cc context. */
1475 PL_reg_call_cc = 0;
1476
1477 /* If there is a "must appear" string, look for it. */
1478 s = startpos;
1479
1480 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1481 MAGIC *mg;
1482
1483 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1484 PL_reg_ganch = startpos;
1485 else if (sv && SvTYPE(sv) >= SVt_PVMG
1486 && SvMAGIC(sv)
14befaf4
DM
1487 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1488 && mg->mg_len >= 0) {
6eb5f6b9
JH
1489 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1490 if (prog->reganch & ROPT_ANCH_GPOS) {
1491 if (s > PL_reg_ganch)
1492 goto phooey;
1493 s = PL_reg_ganch;
1494 }
1495 }
1496 else /* pos() not defined */
1497 PL_reg_ganch = strbeg;
1498 }
1499
699c3c34
JH
1500 if (do_utf8 == (UTF!=0) &&
1501 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1502 re_scream_pos_data d;
1503
1504 d.scream_olds = &scream_olds;
1505 d.scream_pos = &scream_pos;
1506 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1507 if (!s)
1508 goto phooey; /* not present */
1509 }
1510
1511 DEBUG_r( if (!PL_colorset) reginitcolors() );
1512 DEBUG_r(PerlIO_printf(Perl_debug_log,
1513 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1514 PL_colors[4],PL_colors[5],PL_colors[0],
1515 prog->precomp,
1516 PL_colors[1],
1517 (strlen(prog->precomp) > 60 ? "..." : ""),
1518 PL_colors[0],
1519 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1520 startpos, PL_colors[1],
1521 (strend - startpos > 60 ? "..." : ""))
1522 );
1523
1524 /* Simplest case: anchored match need be tried only once. */
1525 /* [unless only anchor is BOL and multiline is set] */
1526 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1527 if (s == startpos && regtry(prog, startpos))
1528 goto got_it;
1529 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1530 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1531 {
1532 char *end;
1533
1534 if (minlen)
1535 dontbother = minlen - 1;
1aa99e6b 1536 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1537 /* for multiline we only have to try after newlines */
1538 if (prog->check_substr) {
1539 if (s == startpos)
1540 goto after_try;
1541 while (1) {
1542 if (regtry(prog, s))
1543 goto got_it;
1544 after_try:
1545 if (s >= end)
1546 goto phooey;
1547 if (prog->reganch & RE_USE_INTUIT) {
1548 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1549 if (!s)
1550 goto phooey;
1551 }
1552 else
1553 s++;
1554 }
1555 } else {
1556 if (s > startpos)
1557 s--;
1558 while (s < end) {
1559 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1560 if (regtry(prog, s))
1561 goto got_it;
1562 }
1563 }
1564 }
1565 }
1566 goto phooey;
1567 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1568 if (regtry(prog, PL_reg_ganch))
1569 goto got_it;
1570 goto phooey;
1571 }
1572
1573 /* Messy cases: unanchored match. */
9041c2e3 1574 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1575 /* we have /x+whatever/ */
1576 /* it must be a one character string (XXXX Except UTF?) */
1577 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1578#ifdef DEBUGGING
1579 int did_match = 0;
1580#endif
1581
1aa99e6b 1582 if (do_utf8) {
6eb5f6b9
JH
1583 while (s < strend) {
1584 if (*s == ch) {
bf93d4cc 1585 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1586 if (regtry(prog, s)) goto got_it;
1587 s += UTF8SKIP(s);
1588 while (s < strend && *s == ch)
1589 s += UTF8SKIP(s);
1590 }
1591 s += UTF8SKIP(s);
1592 }
1593 }
1594 else {
1595 while (s < strend) {
1596 if (*s == ch) {
bf93d4cc 1597 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1598 if (regtry(prog, s)) goto got_it;
1599 s++;
1600 while (s < strend && *s == ch)
1601 s++;
1602 }
1603 s++;
1604 }
1605 }
b7953727 1606 DEBUG_r(if (!did_match)
bf93d4cc 1607 PerlIO_printf(Perl_debug_log,
b7953727
JH
1608 "Did not find anchored character...\n")
1609 );
6eb5f6b9
JH
1610 }
1611 /*SUPPRESS 560*/
1aa99e6b
IH
1612 else if (do_utf8 == (UTF!=0) &&
1613 (prog->anchored_substr != Nullsv
9041c2e3 1614 || (prog->float_substr != Nullsv
1aa99e6b 1615 && prog->float_max_offset < strend - s))) {
9041c2e3 1616 SV *must = prog->anchored_substr
6eb5f6b9 1617 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1618 I32 back_max =
6eb5f6b9 1619 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1620 I32 back_min =
6eb5f6b9 1621 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1622 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1623 -(I32)(CHR_SVLEN(must)
1aa99e6b 1624 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1625 char *last1; /* Last position checked before */
bf93d4cc
GS
1626#ifdef DEBUGGING
1627 int did_match = 0;
1628#endif
6eb5f6b9
JH
1629
1630 if (s > PL_bostr)
1631 last1 = HOPc(s, -1);
1632 else
1633 last1 = s - 1; /* bogus */
1634
1635 /* XXXX check_substr already used to find `s', can optimize if
1636 check_substr==must. */
1637 scream_pos = -1;
1638 dontbother = end_shift;
1639 strend = HOPc(strend, -dontbother);
1640 while ( (s <= last) &&
9041c2e3 1641 ((flags & REXEC_SCREAM)
1aa99e6b 1642 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1643 end_shift, &scream_pos, 0))
1aa99e6b 1644 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1645 (unsigned char*)strend, must,
6eb5f6b9 1646 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1647 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1648 if (HOPc(s, -back_max) > last1) {
1649 last1 = HOPc(s, -back_min);
1650 s = HOPc(s, -back_max);
1651 }
1652 else {
1653 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1654
1655 last1 = HOPc(s, -back_min);
1656 s = t;
1657 }
1aa99e6b 1658 if (do_utf8) {
6eb5f6b9
JH
1659 while (s <= last1) {
1660 if (regtry(prog, s))
1661 goto got_it;
1662 s += UTF8SKIP(s);
1663 }
1664 }
1665 else {
1666 while (s <= last1) {
1667 if (regtry(prog, s))
1668 goto got_it;
1669 s++;
1670 }
1671 }
1672 }
b7953727
JH
1673 DEBUG_r(if (!did_match)
1674 PerlIO_printf(Perl_debug_log,
1675 "Did not find %s substr `%s%.*s%s'%s...\n",
bf93d4cc
GS
1676 ((must == prog->anchored_substr)
1677 ? "anchored" : "floating"),
1678 PL_colors[0],
1679 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1680 SvPVX(must),
b7953727
JH
1681 PL_colors[1], (SvTAIL(must) ? "$" : ""))
1682 );
6eb5f6b9
JH
1683 goto phooey;
1684 }
155aba94 1685 else if ((c = prog->regstclass)) {
66e933ab
GS
1686 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1687 /* don't bother with what can't match */
6eb5f6b9 1688 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1689 DEBUG_r({
1690 SV *prop = sv_newmortal();
1691 regprop(prop, c);
1692 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1693 });
6eb5f6b9
JH
1694 if (find_byclass(prog, c, s, strend, startpos, 0))
1695 goto got_it;
bf93d4cc 1696 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1697 }
1698 else {
1699 dontbother = 0;
1700 if (prog->float_substr != Nullsv) { /* Trim the end. */
1701 char *last;
d6a28714
JH
1702
1703 if (flags & REXEC_SCREAM) {
1704 last = screaminstr(sv, prog->float_substr, s - strbeg,
1705 end_shift, &scream_pos, 1); /* last one */
1706 if (!last)
ffc61ed2 1707 last = scream_olds; /* Only one occurrence. */
b8c5462f 1708 }
d6a28714
JH
1709 else {
1710 STRLEN len;
1711 char *little = SvPV(prog->float_substr, len);
1712
1713 if (SvTAIL(prog->float_substr)) {
1714 if (memEQ(strend - len + 1, little, len - 1))
1715 last = strend - len + 1;
1716 else if (!PL_multiline)
9041c2e3 1717 last = memEQ(strend - len, little, len)
d6a28714 1718 ? strend - len : Nullch;
b8c5462f 1719 else
d6a28714
JH
1720 goto find_last;
1721 } else {
1722 find_last:
9041c2e3 1723 if (len)
d6a28714 1724 last = rninstr(s, strend, little, little + len);
b8c5462f 1725 else
d6a28714 1726 last = strend; /* matching `$' */
b8c5462f 1727 }
b8c5462f 1728 }
bf93d4cc
GS
1729 if (last == NULL) {
1730 DEBUG_r(PerlIO_printf(Perl_debug_log,
1731 "%sCan't trim the tail, match fails (should not happen)%s\n",
1732 PL_colors[4],PL_colors[5]));
1733 goto phooey; /* Should not happen! */
1734 }
d6a28714
JH
1735 dontbother = strend - last + prog->float_min_offset;
1736 }
1737 if (minlen && (dontbother < minlen))
1738 dontbother = minlen - 1;
1739 strend -= dontbother; /* this one's always in bytes! */
1740 /* We don't know much -- general case. */
1aa99e6b 1741 if (do_utf8) {
d6a28714
JH
1742 for (;;) {
1743 if (regtry(prog, s))
1744 goto got_it;
1745 if (s >= strend)
1746 break;
b8c5462f 1747 s += UTF8SKIP(s);
d6a28714
JH
1748 };
1749 }
1750 else {
1751 do {
1752 if (regtry(prog, s))
1753 goto got_it;
1754 } while (s++ < strend);
1755 }
1756 }
1757
1758 /* Failure. */
1759 goto phooey;
1760
1761got_it:
1762 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1763
1764 if (PL_reg_eval_set) {
1765 /* Preserve the current value of $^R */
1766 if (oreplsv != GvSV(PL_replgv))
1767 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1768 restored, the value remains
1769 the same. */
1770 restore_pos(aTHXo_ 0);
1771 }
1772
1773 /* make sure $`, $&, $', and $digit will work later */
1774 if ( !(flags & REXEC_NOT_FIRST) ) {
1775 if (RX_MATCH_COPIED(prog)) {
1776 Safefree(prog->subbeg);
1777 RX_MATCH_COPIED_off(prog);
1778 }
1779 if (flags & REXEC_COPY_STR) {
1780 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1781
1782 s = savepvn(strbeg, i);
1783 prog->subbeg = s;
1784 prog->sublen = i;
1785 RX_MATCH_COPIED_on(prog);
1786 }
1787 else {
1788 prog->subbeg = strbeg;
1789 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1790 }
1791 }
9041c2e3 1792
d6a28714
JH
1793 return 1;
1794
1795phooey:
bf93d4cc
GS
1796 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1797 PL_colors[4],PL_colors[5]));
d6a28714
JH
1798 if (PL_reg_eval_set)
1799 restore_pos(aTHXo_ 0);
1800 return 0;
1801}
1802
1803/*
1804 - regtry - try match at specific point
1805 */
1806STATIC I32 /* 0 failure, 1 success */
1807S_regtry(pTHX_ regexp *prog, char *startpos)
1808{
d6a28714
JH
1809 register I32 i;
1810 register I32 *sp;
1811 register I32 *ep;
1812 CHECKPOINT lastcp;
1813
02db2b7b
IZ
1814#ifdef DEBUGGING
1815 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1816#endif
d6a28714
JH
1817 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1818 MAGIC *mg;
1819
1820 PL_reg_eval_set = RS_init;
1821 DEBUG_r(DEBUG_s(
b900a521
JH
1822 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1823 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1824 ));
e8347627 1825 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1826 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1827 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1828 SAVETMPS;
1829 /* Apparently this is not needed, judging by wantarray. */
e8347627 1830 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1831 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1832
1833 if (PL_reg_sv) {
1834 /* Make $_ available to executed code. */
1835 if (PL_reg_sv != DEFSV) {
1836 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1837 SAVESPTR(DEFSV);
1838 DEFSV = PL_reg_sv;
b8c5462f 1839 }
d6a28714 1840
9041c2e3 1841 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1842 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1843 /* prepare for quick setting of pos */
14befaf4
DM
1844 sv_magic(PL_reg_sv, (SV*)0,
1845 PERL_MAGIC_regex_global, Nullch, 0);
1846 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1847 mg->mg_len = -1;
b8c5462f 1848 }
d6a28714
JH
1849 PL_reg_magic = mg;
1850 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1851 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1852 }
1853 if (!PL_reg_curpm)
0f79a09d 1854 Newz(22,PL_reg_curpm, 1, PMOP);
aaa362c4 1855 PM_SETRE(PL_reg_curpm, prog);
d6a28714
JH
1856 PL_reg_oldcurpm = PL_curpm;
1857 PL_curpm = PL_reg_curpm;
1858 if (RX_MATCH_COPIED(prog)) {
1859 /* Here is a serious problem: we cannot rewrite subbeg,
1860 since it may be needed if this match fails. Thus
1861 $` inside (?{}) could fail... */
1862 PL_reg_oldsaved = prog->subbeg;
1863 PL_reg_oldsavedlen = prog->sublen;
1864 RX_MATCH_COPIED_off(prog);
1865 }
1866 else
1867 PL_reg_oldsaved = Nullch;
1868 prog->subbeg = PL_bostr;
1869 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1870 }
1871 prog->startp[0] = startpos - PL_bostr;
1872 PL_reginput = startpos;
1873 PL_regstartp = prog->startp;
1874 PL_regendp = prog->endp;
1875 PL_reglastparen = &prog->lastparen;
a01268b5 1876 PL_reglastcloseparen = &prog->lastcloseparen;
d6a28714
JH
1877 prog->lastparen = 0;
1878 PL_regsize = 0;
1879 DEBUG_r(PL_reg_starttry = startpos);
1880 if (PL_reg_start_tmpl <= prog->nparens) {
1881 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1882 if(PL_reg_start_tmp)
1883 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1884 else
1885 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1886 }
1887
1888 /* XXXX What this code is doing here?!!! There should be no need
1889 to do this again and again, PL_reglastparen should take care of
3dd2943c 1890 this! --ilya*/
dafc8851
JH
1891
1892 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1893 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1894 * PL_reglastparen), is not needed at all by the test suite
1895 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1896 * enough, for building DynaLoader, or otherwise this
1897 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1898 * will happen. Meanwhile, this code *is* needed for the
1899 * above-mentioned test suite tests to succeed. The common theme
1900 * on those tests seems to be returning null fields from matches.
1901 * --jhi */
dafc8851 1902#if 1
d6a28714
JH
1903 sp = prog->startp;
1904 ep = prog->endp;
1905 if (prog->nparens) {
09e8ae3b 1906 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1907 *++sp = -1;
1908 *++ep = -1;
1909 }
1910 }
dafc8851 1911#endif
02db2b7b 1912 REGCP_SET(lastcp);
d6a28714
JH
1913 if (regmatch(prog->program + 1)) {
1914 prog->endp[0] = PL_reginput - PL_bostr;
1915 return 1;
1916 }
02db2b7b 1917 REGCP_UNWIND(lastcp);
d6a28714
JH
1918 return 0;
1919}
1920
02db2b7b
IZ
1921#define RE_UNWIND_BRANCH 1
1922#define RE_UNWIND_BRANCHJ 2
1923
1924union re_unwind_t;
1925
1926typedef struct { /* XX: makes sense to enlarge it... */
1927 I32 type;
1928 I32 prev;
1929 CHECKPOINT lastcp;
1930} re_unwind_generic_t;
1931
1932typedef struct {
1933 I32 type;
1934 I32 prev;
1935 CHECKPOINT lastcp;
1936 I32 lastparen;
1937 regnode *next;
1938 char *locinput;
1939 I32 nextchr;
1940#ifdef DEBUGGING
1941 int regindent;
1942#endif
1943} re_unwind_branch_t;
1944
1945typedef union re_unwind_t {
1946 I32 type;
1947 re_unwind_generic_t generic;
1948 re_unwind_branch_t branch;
1949} re_unwind_t;
1950
d6a28714
JH
1951/*
1952 - regmatch - main matching routine
1953 *
1954 * Conceptually the strategy is simple: check to see whether the current
1955 * node matches, call self recursively to see whether the rest matches,
1956 * and then act accordingly. In practice we make some effort to avoid
1957 * recursion, in particular by going through "ordinary" nodes (that don't
1958 * need to know whether the rest of the match failed) by a loop instead of
1959 * by recursion.
1960 */
1961/* [lwall] I've hoisted the register declarations to the outer block in order to
1962 * maybe save a little bit of pushing and popping on the stack. It also takes
1963 * advantage of machines that use a register save mask on subroutine entry.
1964 */
1965STATIC I32 /* 0 failure, 1 success */
1966S_regmatch(pTHX_ regnode *prog)
1967{
d6a28714
JH
1968 register regnode *scan; /* Current node. */
1969 regnode *next; /* Next node. */
1970 regnode *inner; /* Next node in internal branch. */
1971 register I32 nextchr; /* renamed nextchr - nextchar colides with
1972 function of same name */
1973 register I32 n; /* no or next */
b7953727
JH
1974 register I32 ln = 0; /* len or last */
1975 register char *s = Nullch; /* operand or save */
d6a28714 1976 register char *locinput = PL_reginput;
b7953727 1977 register I32 c1 = 0, c2 = 0, paren; /* case fold search, parenth */
d6a28714 1978 int minmod = 0, sw = 0, logical = 0;
02db2b7b 1979 I32 unwind = 0;
b7953727 1980#if 0
02db2b7b 1981 I32 firstcp = PL_savestack_ix;
b7953727 1982#endif
ffc61ed2 1983 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1984
d6a28714
JH
1985#ifdef DEBUGGING
1986 PL_regindent++;
1987#endif
1988
1989 /* Note that nextchr is a byte even in UTF */
1990 nextchr = UCHARAT(locinput);
1991 scan = prog;
1992 while (scan != NULL) {
1993#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1994#if 1
d6a28714
JH
1995# define sayYES goto yes
1996# define sayNO goto no
7821416a
IZ
1997# define sayYES_FINAL goto yes_final
1998# define sayYES_LOUD goto yes_loud
1999# define sayNO_FINAL goto no_final
2000# define sayNO_SILENT goto do_no
d6a28714
JH
2001# define saySAME(x) if (x) goto yes; else goto no
2002# define REPORT_CODE_OFF 24
2003#else
2004# define sayYES return 1
2005# define sayNO return 0
7821416a
IZ
2006# define sayYES_FINAL return 1
2007# define sayYES_LOUD return 1
2008# define sayNO_FINAL return 0
2009# define sayNO_SILENT return 0
d6a28714
JH
2010# define saySAME(x) return x
2011#endif
2012 DEBUG_r( {
2013 SV *prop = sv_newmortal();
2014 int docolor = *PL_colors[0];
2015 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2016 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2017 /* The part of the string before starttry has one color
2018 (pref0_len chars), between starttry and current
2019 position another one (pref_len - pref0_len chars),
2020 after the current position the third one.
2021 We assume that pref0_len <= pref_len, otherwise we
2022 decrease pref0_len. */
9041c2e3 2023 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2024 ? (5 + taill) - l : locinput - PL_bostr;
2025 int pref0_len;
d6a28714 2026
1aa99e6b
IH
2027 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2028 pref_len++;
2029 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2030 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2031 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2032 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2033 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2034 l--;
d6a28714
JH
2035 if (pref0_len < 0)
2036 pref0_len = 0;
2037 if (pref0_len > pref_len)
2038 pref0_len = pref_len;
2039 regprop(prop, scan);
9041c2e3 2040 PerlIO_printf(Perl_debug_log,
b900a521 2041 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2042 (IV)(locinput - PL_bostr),
2043 PL_colors[4], pref0_len,
d6a28714 2044 locinput - pref_len, PL_colors[5],
9041c2e3 2045 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2046 locinput - pref_len + pref0_len, PL_colors[3],
2047 (docolor ? "" : "> <"),
2048 PL_colors[0], l, locinput, PL_colors[1],
2049 15 - l - pref_len + 1,
2050 "",
b900a521 2051 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2052 SvPVX(prop));
2053 } );
2054
2055 next = scan + NEXT_OFF(scan);
2056 if (next == scan)
2057 next = NULL;
2058
2059 switch (OP(scan)) {
2060 case BOL:
12d33761
HS
2061 if (locinput == PL_bostr || (PL_multiline &&
2062 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2063 {
2064 /* regtill = regbol; */
b8c5462f
JH
2065 break;
2066 }
d6a28714
JH
2067 sayNO;
2068 case MBOL:
12d33761
HS
2069 if (locinput == PL_bostr ||
2070 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2071 {
b8c5462f
JH
2072 break;
2073 }
d6a28714
JH
2074 sayNO;
2075 case SBOL:
c2a73568 2076 if (locinput == PL_bostr)
b8c5462f 2077 break;
d6a28714
JH
2078 sayNO;
2079 case GPOS:
2080 if (locinput == PL_reg_ganch)
2081 break;
2082 sayNO;
2083 case EOL:
2084 if (PL_multiline)
2085 goto meol;
2086 else
2087 goto seol;
2088 case MEOL:
2089 meol:
2090 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2091 sayNO;
b8c5462f 2092 break;
d6a28714
JH
2093 case SEOL:
2094 seol:
2095 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2096 sayNO;
d6a28714 2097 if (PL_regeol - locinput > 1)
b8c5462f 2098 sayNO;
b8c5462f 2099 break;
d6a28714
JH
2100 case EOS:
2101 if (PL_regeol != locinput)
b8c5462f 2102 sayNO;
d6a28714 2103 break;
ffc61ed2 2104 case SANY:
d6a28714 2105 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2106 sayNO;
b8c5462f 2107 nextchr = UCHARAT(++locinput);
a0d0e21e 2108 break;
ffc61ed2 2109 case REG_ANY:
1aa99e6b
IH
2110 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2111 sayNO;
2112 if (do_utf8) {
b8c5462f 2113 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2114 if (locinput > PL_regeol)
2115 sayNO;
a0ed51b3 2116 nextchr = UCHARAT(locinput);
a0ed51b3 2117 }
1aa99e6b
IH
2118 else
2119 nextchr = UCHARAT(++locinput);
a0ed51b3 2120 break;
d6a28714 2121 case EXACT:
cd439c50
IZ
2122 s = STRING(scan);
2123 ln = STR_LEN(scan);
1aa99e6b
IH
2124 if (do_utf8 != (UTF!=0)) {
2125 char *l = locinput;
2126 char *e = s + ln;
2127 STRLEN len;
2128 if (do_utf8)
2129 while (s < e) {
2130 if (l >= PL_regeol)
2131 sayNO;
9041c2e3 2132 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2133 sayNO;
2134 s++;
2135 l += len;
2136 }
2137 else
2138 while (s < e) {
2139 if (l >= PL_regeol)
2140 sayNO;
9041c2e3 2141 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2142 sayNO;
2143 s += len;
2144 l++;
2145 }
2146 locinput = l;
2147 nextchr = UCHARAT(locinput);
2148 break;
2149 }
d6a28714
JH
2150 /* Inline the first character, for speed. */
2151 if (UCHARAT(s) != nextchr)
2152 sayNO;
2153 if (PL_regeol - locinput < ln)
2154 sayNO;
2155 if (ln > 1 && memNE(s, locinput, ln))
2156 sayNO;
2157 locinput += ln;
2158 nextchr = UCHARAT(locinput);
2159 break;
2160 case EXACTFL:
b8c5462f
JH
2161 PL_reg_flags |= RF_tainted;
2162 /* FALL THROUGH */
d6a28714 2163 case EXACTF:
cd439c50
IZ
2164 s = STRING(scan);
2165 ln = STR_LEN(scan);
d6a28714 2166
1aa99e6b 2167 if (do_utf8) {
d6a28714 2168 char *l = locinput;
1aa99e6b
IH
2169 char *e;
2170 e = s + ln;
d6a28714
JH
2171 c1 = OP(scan) == EXACTF;
2172 while (s < e) {
1aa99e6b 2173 if (l >= PL_regeol) {
d6a28714
JH
2174 sayNO;
2175 }
2b9d42f0 2176 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2177 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2178 sayNO;
2179 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2180 l += UTF8SKIP(l);
b8c5462f 2181 }
d6a28714 2182 locinput = l;
a0ed51b3
LW
2183 nextchr = UCHARAT(locinput);
2184 break;
2185 }
d6a28714
JH
2186
2187 /* Inline the first character, for speed. */
2188 if (UCHARAT(s) != nextchr &&
2189 UCHARAT(s) != ((OP(scan) == EXACTF)
2190 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2191 sayNO;
d6a28714 2192 if (PL_regeol - locinput < ln)
b8c5462f 2193 sayNO;
d6a28714
JH
2194 if (ln > 1 && (OP(scan) == EXACTF
2195 ? ibcmp(s, locinput, ln)
2196 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2197 sayNO;
d6a28714
JH
2198 locinput += ln;
2199 nextchr = UCHARAT(locinput);
a0d0e21e 2200 break;
d6a28714 2201 case ANYOF:
ffc61ed2
JH
2202 if (do_utf8) {
2203 if (!reginclass(scan, (U8*)locinput, do_utf8))
2204 sayNO;
2205 if (locinput >= PL_regeol)
2206 sayNO;
2207 locinput += PL_utf8skip[nextchr];
b8c5462f 2208 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2209 }
2210 else {
2211 if (nextchr < 0)
2212 nextchr = UCHARAT(locinput);
2213 if (!reginclass(scan, (U8*)locinput, do_utf8))
2214 sayNO;
2215 if (!nextchr && locinput >= PL_regeol)
2216 sayNO;
2217 nextchr = UCHARAT(++locinput);
2218 }
b8c5462f 2219 break;
d6a28714 2220 case ALNUML:
b8c5462f
JH
2221 PL_reg_flags |= RF_tainted;
2222 /* FALL THROUGH */
d6a28714 2223 case ALNUM:
b8c5462f 2224 if (!nextchr)
4633a7c4 2225 sayNO;
ffc61ed2 2226 if (do_utf8) {
ad24be35 2227 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2228 if (!(OP(scan) == ALNUM
3568d838 2229 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2230 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2231 {
2232 sayNO;
a0ed51b3 2233 }
b8c5462f 2234 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2235 nextchr = UCHARAT(locinput);
2236 break;
2237 }
ffc61ed2 2238 if (!(OP(scan) == ALNUM
d6a28714 2239 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2240 sayNO;
b8c5462f 2241 nextchr = UCHARAT(++locinput);
a0d0e21e 2242 break;
d6a28714 2243 case NALNUML:
b8c5462f
JH
2244 PL_reg_flags |= RF_tainted;
2245 /* FALL THROUGH */
d6a28714
JH
2246 case NALNUM:
2247 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2248 sayNO;
ffc61ed2 2249 if (do_utf8) {
8269fa76 2250 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2251 if (OP(scan) == NALNUM
3568d838 2252 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2253 : isALNUM_LC_utf8((U8*)locinput))
2254 {
b8c5462f 2255 sayNO;
d6a28714 2256 }
b8c5462f
JH
2257 locinput += PL_utf8skip[nextchr];
2258 nextchr = UCHARAT(locinput);
2259 break;
2260 }
ffc61ed2 2261 if (OP(scan) == NALNUM
d6a28714 2262 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2263 sayNO;
76e3520e 2264 nextchr = UCHARAT(++locinput);
a0d0e21e 2265 break;
d6a28714
JH
2266 case BOUNDL:
2267 case NBOUNDL:
3280af22 2268 PL_reg_flags |= RF_tainted;
bbce6d69 2269 /* FALL THROUGH */
d6a28714
JH
2270 case BOUND:
2271 case NBOUND:
2272 /* was last char in word? */
ffc61ed2 2273 if (do_utf8) {
12d33761
HS
2274 if (locinput == PL_bostr)
2275 ln = '\n';
ffc61ed2
JH
2276 else {
2277 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2278
2b9d42f0 2279 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2280 }
2281 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2282 ln = isALNUM_uni(ln);
8269fa76 2283 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2284 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2285 }
2286 else {
9041c2e3 2287 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2288 n = isALNUM_LC_utf8((U8*)locinput);
2289 }
a0ed51b3 2290 }
d6a28714 2291 else {
12d33761
HS
2292 ln = (locinput != PL_bostr) ?
2293 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2294 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2295 ln = isALNUM(ln);
2296 n = isALNUM(nextchr);
2297 }
2298 else {
2299 ln = isALNUM_LC(ln);
2300 n = isALNUM_LC(nextchr);
2301 }
d6a28714 2302 }
ffc61ed2
JH
2303 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2304 OP(scan) == BOUNDL))
2305 sayNO;
a0ed51b3 2306 break;
d6a28714 2307 case SPACEL:
3280af22 2308 PL_reg_flags |= RF_tainted;
bbce6d69 2309 /* FALL THROUGH */
d6a28714 2310 case SPACE:
9442cb0e 2311 if (!nextchr)
4633a7c4 2312 sayNO;
1aa99e6b 2313 if (do_utf8) {
fd400ab9 2314 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2315 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2316 if (!(OP(scan) == SPACE
3568d838 2317 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2318 : isSPACE_LC_utf8((U8*)locinput)))
2319 {
2320 sayNO;
2321 }
2322 locinput += PL_utf8skip[nextchr];
2323 nextchr = UCHARAT(locinput);
2324 break;
d6a28714 2325 }
ffc61ed2
JH
2326 if (!(OP(scan) == SPACE
2327 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2328 sayNO;
2329 nextchr = UCHARAT(++locinput);
2330 }
2331 else {
2332 if (!(OP(scan) == SPACE
2333 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2334 sayNO;
2335 nextchr = UCHARAT(++locinput);
a0ed51b3 2336 }
a0ed51b3 2337 break;
d6a28714 2338 case NSPACEL:
3280af22 2339 PL_reg_flags |= RF_tainted;
bbce6d69 2340 /* FALL THROUGH */
d6a28714 2341 case NSPACE:
9442cb0e 2342 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2343 sayNO;
1aa99e6b 2344 if (do_utf8) {
8269fa76 2345 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2346 if (OP(scan) == NSPACE
3568d838 2347 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2348 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2349 {
2350 sayNO;
2351 }
2352 locinput += PL_utf8skip[nextchr];
2353 nextchr = UCHARAT(locinput);
2354 break;
a0ed51b3 2355 }
ffc61ed2 2356 if (OP(scan) == NSPACE
d6a28714 2357 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2358 sayNO;
76e3520e 2359 nextchr = UCHARAT(++locinput);
a0d0e21e 2360 break;
d6a28714 2361 case DIGITL:
a0ed51b3
LW
2362 PL_reg_flags |= RF_tainted;
2363 /* FALL THROUGH */
d6a28714 2364 case DIGIT:
9442cb0e 2365 if (!nextchr)
a0ed51b3 2366 sayNO;
1aa99e6b 2367 if (do_utf8) {
8269fa76 2368 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2369 if (!(OP(scan) == DIGIT
3568d838 2370 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2371 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2372 {
a0ed51b3 2373 sayNO;
dfe13c55 2374 }
6f06b55f 2375 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2376 nextchr = UCHARAT(locinput);
2377 break;
2378 }
ffc61ed2 2379 if (!(OP(scan) == DIGIT
9442cb0e 2380 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2381 sayNO;
2382 nextchr = UCHARAT(++locinput);
2383 break;
d6a28714 2384 case NDIGITL:
b8c5462f
JH
2385 PL_reg_flags |= RF_tainted;
2386 /* FALL THROUGH */
d6a28714 2387 case NDIGIT:
9442cb0e 2388 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2389 sayNO;
1aa99e6b 2390 if (do_utf8) {
8269fa76 2391 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2392 if (OP(scan) == NDIGIT
3568d838 2393 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2394 : isDIGIT_LC_utf8((U8*)locinput))
2395 {
a0ed51b3 2396 sayNO;
9442cb0e 2397 }
6f06b55f 2398 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2399 nextchr = UCHARAT(locinput);
2400 break;
2401 }
ffc61ed2 2402 if (OP(scan) == NDIGIT
9442cb0e 2403 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2404 sayNO;
2405 nextchr = UCHARAT(++locinput);
2406 break;
2407 case CLUMP:
8269fa76 2408 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2409 if (locinput >= PL_regeol ||
2410 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2411 sayNO;
6f06b55f 2412 locinput += PL_utf8skip[nextchr];
3568d838
JH
2413 while (locinput < PL_regeol &&
2414 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2415 locinput += UTF8SKIP(locinput);
2416 if (locinput > PL_regeol)
2417 sayNO;
2418 nextchr = UCHARAT(locinput);
2419 break;
c8756f30 2420 case REFFL:
3280af22 2421 PL_reg_flags |= RF_tainted;
c8756f30 2422 /* FALL THROUGH */
c277df42 2423 case REF:
c8756f30 2424 case REFF:
c277df42 2425 n = ARG(scan); /* which paren pair */
cf93c79d 2426 ln = PL_regstartp[n];
2c2d71f5 2427 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2428 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2429 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2430 if (ln == PL_regendp[n])
a0d0e21e 2431 break;
a0ed51b3 2432
cf93c79d 2433 s = PL_bostr + ln;
1aa99e6b 2434 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2435 char *l = locinput;
cf93c79d 2436 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2437 /*
2438 * Note that we can't do the "other character" lookup trick as
2439 * in the 8-bit case (no pun intended) because in Unicode we
2440 * have to map both upper and title case to lower case.
2441 */
2442 if (OP(scan) == REFF) {
2443 while (s < e) {
2444 if (l >= PL_regeol)
2445 sayNO;
dfe13c55 2446 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2447 sayNO;
2448 s += UTF8SKIP(s);
2449 l += UTF8SKIP(l);
2450 }
2451 }
2452 else {
2453 while (s < e) {
2454 if (l >= PL_regeol)
2455 sayNO;
dfe13c55 2456 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2457 sayNO;
2458 s += UTF8SKIP(s);
2459 l += UTF8SKIP(l);
2460 }
2461 }
2462 locinput = l;
2463 nextchr = UCHARAT(locinput);
2464 break;
2465 }
2466
a0d0e21e 2467 /* Inline the first character, for speed. */
76e3520e 2468 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2469 (OP(scan) == REF ||
2470 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2471 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2472 sayNO;
cf93c79d 2473 ln = PL_regendp[n] - ln;
3280af22 2474 if (locinput + ln > PL_regeol)
4633a7c4 2475 sayNO;
c8756f30
AK
2476 if (ln > 1 && (OP(scan) == REF
2477 ? memNE(s, locinput, ln)
2478 : (OP(scan) == REFF
2479 ? ibcmp(s, locinput, ln)
2480 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2481 sayNO;
a0d0e21e 2482 locinput += ln;
76e3520e 2483 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2484 break;
2485
2486 case NOTHING:
c277df42 2487 case TAIL:
a0d0e21e
LW
2488 break;
2489 case BACK:
2490 break;
c277df42
IZ
2491 case EVAL:
2492 {
2493 dSP;
533c011a 2494 OP_4tree *oop = PL_op;
3280af22
NIS
2495 COP *ocurcop = PL_curcop;
2496 SV **ocurpad = PL_curpad;
c277df42 2497 SV *ret;
9041c2e3 2498
c277df42 2499 n = ARG(scan);
533c011a 2500 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2501 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2502 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2503 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2504
cea2e8a9 2505 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2506 SPAGAIN;
2507 ret = POPs;
2508 PUTBACK;
9041c2e3 2509
0f5d15d6
IZ
2510 PL_op = oop;
2511 PL_curpad = ocurpad;
2512 PL_curcop = ocurcop;
c277df42 2513 if (logical) {
0f5d15d6
IZ
2514 if (logical == 2) { /* Postponed subexpression. */
2515 regexp *re;
22c35a8c 2516 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2517 re_cc_state state;
0f5d15d6
IZ
2518 CHECKPOINT cp, lastcp;
2519
2520 if(SvROK(ret) || SvRMAGICAL(ret)) {
2521 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2522
2523 if(SvMAGICAL(sv))
14befaf4 2524 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2525 }
2526 if (mg) {
2527 re = (regexp *)mg->mg_obj;
df0003d4 2528 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2529 }
2530 else {
2531 STRLEN len;
2532 char *t = SvPV(ret, len);
2533 PMOP pm;
2534 char *oprecomp = PL_regprecomp;
2535 I32 osize = PL_regsize;
2536 I32 onpar = PL_regnpar;
2537
5fcd1c1b 2538 Zero(&pm, 1, PMOP);
cea2e8a9 2539 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2540 if (!(SvFLAGS(ret)
0f5d15d6 2541 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2542 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2543 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2544 PL_regprecomp = oprecomp;
2545 PL_regsize = osize;
2546 PL_regnpar = onpar;
2547 }
2548 DEBUG_r(
9041c2e3 2549 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2550 "Entering embedded `%s%.60s%s%s'\n",
2551 PL_colors[0],
2552 re->precomp,
2553 PL_colors[1],
2554 (strlen(re->precomp) > 60 ? "..." : ""))
2555 );
2556 state.node = next;
2557 state.prev = PL_reg_call_cc;
2558 state.cc = PL_regcc;
2559 state.re = PL_reg_re;
2560
2ab05381 2561 PL_regcc = 0;
9041c2e3 2562
0f5d15d6 2563 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2564 REGCP_SET(lastcp);
0f5d15d6
IZ
2565 cache_re(re);
2566 state.ss = PL_savestack_ix;
2567 *PL_reglastparen = 0;
a01268b5 2568 *PL_reglastcloseparen = 0;
0f5d15d6
IZ
2569 PL_reg_call_cc = &state;
2570 PL_reginput = locinput;
2c2d71f5
JH
2571
2572 /* XXXX This is too dramatic a measure... */
2573 PL_reg_maxiter = 0;
2574
0f5d15d6 2575 if (regmatch(re->program + 1)) {
2c914db6
IZ
2576 /* Even though we succeeded, we need to restore
2577 global variables, since we may be wrapped inside
2578 SUSPEND, thus the match may be not finished yet. */
2579
2580 /* XXXX Do this only if SUSPENDed? */
2581 PL_reg_call_cc = state.prev;
2582 PL_regcc = state.cc;
2583 PL_reg_re = state.re;
2584 cache_re(PL_reg_re);
2585
2586 /* XXXX This is too dramatic a measure... */
2587 PL_reg_maxiter = 0;
2588
2589 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2590 ReREFCNT_dec(re);
2591 regcpblow(cp);
2592 sayYES;
2593 }
0f5d15d6 2594 ReREFCNT_dec(re);
02db2b7b 2595 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2596 regcppop();
2597 PL_reg_call_cc = state.prev;
2598 PL_regcc = state.cc;
2599 PL_reg_re = state.re;
d3790889 2600 cache_re(PL_reg_re);
2c2d71f5
JH
2601
2602 /* XXXX This is too dramatic a measure... */
2603 PL_reg_maxiter = 0;
2604
0f5d15d6
IZ
2605 sayNO;
2606 }
c277df42 2607 sw = SvTRUE(ret);
0f5d15d6 2608 logical = 0;
a0ed51b3
LW
2609 }
2610 else
3280af22 2611 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2612 break;
2613 }
a0d0e21e 2614 case OPEN:
c277df42 2615 n = ARG(scan); /* which paren pair */
3280af22
NIS
2616 PL_reg_start_tmp[n] = locinput;
2617 if (n > PL_regsize)
2618 PL_regsize = n;
a0d0e21e
LW
2619 break;
2620 case CLOSE:
c277df42 2621 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2622 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2623 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2624 if (n > *PL_reglastparen)
2625 *PL_reglastparen = n;
a01268b5 2626 *PL_reglastcloseparen = n;
a0d0e21e 2627 break;
c277df42
IZ
2628 case GROUPP:
2629 n = ARG(scan); /* which paren pair */
cf93c79d 2630 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2631 break;
2632 case IFTHEN:
2c2d71f5 2633 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2634 if (sw)
2635 next = NEXTOPER(NEXTOPER(scan));
2636 else {
2637 next = scan + ARG(scan);
2638 if (OP(next) == IFTHEN) /* Fake one. */
2639 next = NEXTOPER(NEXTOPER(next));
2640 }
2641 break;
2642 case LOGICAL:
0f5d15d6 2643 logical = scan->flags;
c277df42 2644 break;
2ab05381
IZ
2645/*******************************************************************
2646 PL_regcc contains infoblock about the innermost (...)* loop, and
2647 a pointer to the next outer infoblock.
2648
2649 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2650
2651 1) After matching X, regnode for CURLYX is processed;
2652
9041c2e3 2653 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2654 regmatch() recursively with the starting point at WHILEM node;
2655
2656 3) Each hit of WHILEM node tries to match A and Z (in the order
2657 depending on the current iteration, min/max of {min,max} and
2658 greediness). The information about where are nodes for "A"
2659 and "Z" is read from the infoblock, as is info on how many times "A"
2660 was already matched, and greediness.
2661
2662 4) After A matches, the same WHILEM node is hit again.
2663
2664 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2665 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2666 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2667 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2668 of the external loop.
2669
2670 Currently present infoblocks form a tree with a stem formed by PL_curcc
2671 and whatever it mentions via ->next, and additional attached trees
2672 corresponding to temporarily unset infoblocks as in "5" above.
2673
9041c2e3 2674 In the following picture infoblocks for outer loop of
2ab05381
IZ
2675 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2676 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2677 infoblocks are drawn below the "reset" infoblock.
2678
2679 In fact in the picture below we do not show failed matches for Z and T
2680 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2681 more obvious *why* one needs to *temporary* unset infoblocks.]
2682
2683 Matched REx position InfoBlocks Comment
2684 (Y(A)*?Z)*?T x
2685 Y(A)*?Z)*?T x <- O
2686 Y (A)*?Z)*?T x <- O
2687 Y A)*?Z)*?T x <- O <- I
2688 YA )*?Z)*?T x <- O <- I
2689 YA A)*?Z)*?T x <- O <- I
2690 YAA )*?Z)*?T x <- O <- I
2691 YAA Z)*?T x <- O # Temporary unset I
2692 I
2693
2694 YAAZ Y(A)*?Z)*?T x <- O
2695 I
2696
2697 YAAZY (A)*?Z)*?T x <- O
2698 I
2699
2700 YAAZY A)*?Z)*?T x <- O <- I
2701 I
2702
2703 YAAZYA )*?Z)*?T x <- O <- I
2704 I
2705
2706 YAAZYA Z)*?T x <- O # Temporary unset I
2707 I,I
2708
2709 YAAZYAZ )*?T x <- O
2710 I,I
2711
2712 YAAZYAZ T x # Temporary unset O
2713 O
2714 I,I
2715
2716 YAAZYAZT x
2717 O
2718 I,I
2719 *******************************************************************/
a0d0e21e
LW
2720 case CURLYX: {
2721 CURCUR cc;
3280af22 2722 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2723 /* No need to save/restore up to this paren */
2724 I32 parenfloor = scan->flags;
c277df42
IZ
2725
2726 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2727 next += ARG(next);
3280af22
NIS
2728 cc.oldcc = PL_regcc;
2729 PL_regcc = &cc;
cb434fcc
IZ
2730 /* XXXX Probably it is better to teach regpush to support
2731 parenfloor > PL_regsize... */
2732 if (parenfloor > *PL_reglastparen)
2733 parenfloor = *PL_reglastparen; /* Pessimization... */
2734 cc.parenfloor = parenfloor;
a0d0e21e
LW
2735 cc.cur = -1;
2736 cc.min = ARG1(scan);
2737 cc.max = ARG2(scan);
c277df42 2738 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2739 cc.next = next;
2740 cc.minmod = minmod;
2741 cc.lastloc = 0;
3280af22 2742 PL_reginput = locinput;
a0d0e21e
LW
2743 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2744 regcpblow(cp);
3280af22 2745 PL_regcc = cc.oldcc;
4633a7c4 2746 saySAME(n);
a0d0e21e
LW
2747 }
2748 /* NOT REACHED */
2749 case WHILEM: {
2750 /*
2751 * This is really hard to understand, because after we match
2752 * what we're trying to match, we must make sure the rest of
2c2d71f5 2753 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2754 * to go back UP the parse tree by recursing ever deeper. And
2755 * if it fails, we have to reset our parent's current state
2756 * that we can try again after backing off.
2757 */
2758
c277df42 2759 CHECKPOINT cp, lastcp;
3280af22 2760 CURCUR* cc = PL_regcc;
c277df42
IZ
2761 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2762
4633a7c4 2763 n = cc->cur + 1; /* how many we know we matched */
3280af22 2764 PL_reginput = locinput;
a0d0e21e 2765
c277df42 2766 DEBUG_r(
9041c2e3
NIS
2767 PerlIO_printf(Perl_debug_log,
2768 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2769 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2770 (long)n, (long)cc->min,
c277df42
IZ
2771 (long)cc->max, (long)cc)
2772 );
4633a7c4 2773
a0d0e21e
LW
2774 /* If degenerate scan matches "", assume scan done. */
2775
579cf2c3 2776 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2777 PL_regcc = cc->oldcc;
2ab05381
IZ
2778 if (PL_regcc)
2779 ln = PL_regcc->cur;
c277df42 2780 DEBUG_r(
c3464db5
DD
2781 PerlIO_printf(Perl_debug_log,
2782 "%*s empty match detected, try continuation...\n",
3280af22 2783 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2784 );
a0d0e21e 2785 if (regmatch(cc->next))
4633a7c4 2786 sayYES;
2ab05381
IZ
2787 if (PL_regcc)
2788 PL_regcc->cur = ln;
3280af22 2789 PL_regcc = cc;
4633a7c4 2790 sayNO;
a0d0e21e
LW
2791 }
2792
2793 /* First just match a string of min scans. */
2794
2795 if (n < cc->min) {
2796 cc->cur = n;
2797 cc->lastloc = locinput;
4633a7c4
LW
2798 if (regmatch(cc->scan))
2799 sayYES;
2800 cc->cur = n - 1;
c277df42 2801 cc->lastloc = lastloc;
4633a7c4 2802 sayNO;
a0d0e21e
LW
2803 }
2804
2c2d71f5
JH
2805 if (scan->flags) {
2806 /* Check whether we already were at this position.
2807 Postpone detection until we know the match is not
2808 *that* much linear. */
2809 if (!PL_reg_maxiter) {
2810 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2811 PL_reg_leftiter = PL_reg_maxiter;
2812 }
2813 if (PL_reg_leftiter-- == 0) {
2814 I32 size = (PL_reg_maxiter + 7)/8;
2815 if (PL_reg_poscache) {
2816 if (PL_reg_poscache_size < size) {
2817 Renew(PL_reg_poscache, size, char);
2818 PL_reg_poscache_size = size;
2819 }
2820 Zero(PL_reg_poscache, size, char);
2821 }
2822 else {
2823 PL_reg_poscache_size = size;
2824 Newz(29, PL_reg_poscache, size, char);
2825 }
2826 DEBUG_r(
2827 PerlIO_printf(Perl_debug_log,
2828 "%sDetected a super-linear match, switching on caching%s...\n",
2829 PL_colors[4], PL_colors[5])
2830 );
2831 }
2832 if (PL_reg_leftiter < 0) {
2833 I32 o = locinput - PL_bostr, b;
2834
2835 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2836 b = o % 8;
2837 o /= 8;
2838 if (PL_reg_poscache[o] & (1<<b)) {
2839 DEBUG_r(
2840 PerlIO_printf(Perl_debug_log,
2841 "%*s already tried at this position...\n",
2842 REPORT_CODE_OFF+PL_regindent*2, "")
2843 );
7821416a 2844 sayNO_SILENT;
2c2d71f5
JH
2845 }
2846 PL_reg_poscache[o] |= (1<<b);
2847 }
2848 }
2849
a0d0e21e
LW
2850 /* Prefer next over scan for minimal matching. */
2851
2852 if (cc->minmod) {
3280af22 2853 PL_regcc = cc->oldcc;
2ab05381
IZ
2854 if (PL_regcc)
2855 ln = PL_regcc->cur;
5f05dabc 2856 cp = regcppush(cc->parenfloor);
02db2b7b 2857 REGCP_SET(lastcp);
5f05dabc 2858 if (regmatch(cc->next)) {
c277df42 2859 regcpblow(cp);
4633a7c4 2860 sayYES; /* All done. */
5f05dabc 2861 }
02db2b7b 2862 REGCP_UNWIND(lastcp);
5f05dabc 2863 regcppop();
2ab05381
IZ
2864 if (PL_regcc)
2865 PL_regcc->cur = ln;
3280af22 2866 PL_regcc = cc;
a0d0e21e 2867
c277df42 2868 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2869 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2870 && !(PL_reg_flags & RF_warned)) {
2871 PL_reg_flags |= RF_warned;
e476b1b5 2872 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2873 "Complex regular subexpression recursion",
2874 REG_INFTY - 1);
c277df42 2875 }
4633a7c4 2876 sayNO;
c277df42 2877 }
a687059c 2878
c277df42 2879 DEBUG_r(
c3464db5
DD
2880 PerlIO_printf(Perl_debug_log,
2881 "%*s trying longer...\n",
3280af22 2882 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2883 );
a0d0e21e 2884 /* Try scanning more and see if it helps. */
3280af22 2885 PL_reginput = locinput;
a0d0e21e
LW
2886 cc->cur = n;
2887 cc->lastloc = locinput;
5f05dabc 2888 cp = regcppush(cc->parenfloor);
02db2b7b 2889 REGCP_SET(lastcp);
5f05dabc 2890 if (regmatch(cc->scan)) {
c277df42 2891 regcpblow(cp);
4633a7c4 2892 sayYES;
5f05dabc 2893 }
02db2b7b 2894 REGCP_UNWIND(lastcp);
5f05dabc 2895 regcppop();
4633a7c4 2896 cc->cur = n - 1;
c277df42 2897 cc->lastloc = lastloc;
4633a7c4 2898 sayNO;
a0d0e21e
LW
2899 }
2900
2901 /* Prefer scan over next for maximal matching. */
2902
2903 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2904 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2905 cc->cur = n;
2906 cc->lastloc = locinput;
02db2b7b 2907 REGCP_SET(lastcp);
5f05dabc 2908 if (regmatch(cc->scan)) {
c277df42 2909 regcpblow(cp);
4633a7c4 2910 sayYES;
5f05dabc 2911 }
02db2b7b 2912 REGCP_UNWIND(lastcp);
a0d0e21e 2913 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2914 PL_reginput = locinput;
c277df42 2915 DEBUG_r(
c3464db5
DD
2916 PerlIO_printf(Perl_debug_log,
2917 "%*s failed, try continuation...\n",
3280af22 2918 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2919 );
2920 }
9041c2e3 2921 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2922 && !(PL_reg_flags & RF_warned)) {
3280af22 2923 PL_reg_flags |= RF_warned;
e476b1b5 2924 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2925 "Complex regular subexpression recursion",
2926 REG_INFTY - 1);
a0d0e21e
LW
2927 }
2928
2929 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2930 PL_regcc = cc->oldcc;
2ab05381
IZ
2931 if (PL_regcc)
2932 ln = PL_regcc->cur;
a0d0e21e 2933 if (regmatch(cc->next))
4633a7c4 2934 sayYES;
2ab05381
IZ
2935 if (PL_regcc)
2936 PL_regcc->cur = ln;
3280af22 2937 PL_regcc = cc;
4633a7c4 2938 cc->cur = n - 1;
c277df42 2939 cc->lastloc = lastloc;
4633a7c4 2940 sayNO;
a0d0e21e
LW
2941 }
2942 /* NOT REACHED */
9041c2e3 2943 case BRANCHJ:
c277df42
IZ
2944 next = scan + ARG(scan);
2945 if (next == scan)
2946 next = NULL;
2947 inner = NEXTOPER(NEXTOPER(scan));
2948 goto do_branch;
9041c2e3 2949 case BRANCH:
c277df42
IZ
2950 inner = NEXTOPER(scan);
2951 do_branch:
2952 {
c277df42
IZ
2953 c1 = OP(scan);
2954 if (OP(next) != c1) /* No choice. */
2955 next = inner; /* Avoid recursion. */
a0d0e21e 2956 else {
02db2b7b
IZ
2957 I32 lastparen = *PL_reglastparen;
2958 I32 unwind1;
2959 re_unwind_branch_t *uw;
2960
2961 /* Put unwinding data on stack */
2962 unwind1 = SSNEWt(1,re_unwind_branch_t);
2963 uw = SSPTRt(unwind1,re_unwind_branch_t);
2964 uw->prev = unwind;
2965 unwind = unwind1;
2966 uw->type = ((c1 == BRANCH)
2967 ? RE_UNWIND_BRANCH
2968 : RE_UNWIND_BRANCHJ);
2969 uw->lastparen = lastparen;
2970 uw->next = next;
2971 uw->locinput = locinput;
2972 uw->nextchr = nextchr;
2973#ifdef DEBUGGING
2974 uw->regindent = ++PL_regindent;
2975#endif
c277df42 2976
02db2b7b
IZ
2977 REGCP_SET(uw->lastcp);
2978
2979 /* Now go into the first branch */
2980 next = inner;
a687059c 2981 }
a0d0e21e
LW
2982 }
2983 break;
2984 case MINMOD:
2985 minmod = 1;
2986 break;
c277df42
IZ
2987 case CURLYM:
2988 {
00db4c45 2989 I32 l = 0;
c277df42 2990 CHECKPOINT lastcp;
9041c2e3 2991
c277df42
IZ
2992 /* We suppose that the next guy does not need
2993 backtracking: in particular, it is of constant length,
2994 and has no parenths to influence future backrefs. */
2995 ln = ARG1(scan); /* min to match */
2996 n = ARG2(scan); /* max to match */
c277df42
IZ
2997 paren = scan->flags;
2998 if (paren) {
3280af22
NIS
2999 if (paren > PL_regsize)
3000 PL_regsize = paren;
3001 if (paren > *PL_reglastparen)
3002 *PL_reglastparen = paren;
c277df42 3003 }
dc45a647 3004 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
3005 if (paren)
3006 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 3007 PL_reginput = locinput;
c277df42
IZ
3008 if (minmod) {
3009 minmod = 0;
3010 if (ln && regrepeat_hard(scan, ln, &l) < ln)
3011 sayNO;
5f4b28b2 3012 if (ln && l == 0 && n >= ln
c277df42
IZ
3013 /* In fact, this is tricky. If paren, then the
3014 fact that we did/didnot match may influence
3015 future execution. */
3016 && !(paren && ln == 0))
3017 ln = n;
3280af22 3018 locinput = PL_reginput;
22c35a8c 3019 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3020 c1 = (U8)*STRING(next);
c277df42 3021 if (OP(next) == EXACTF)
22c35a8c 3022 c2 = PL_fold[c1];
c277df42 3023 else if (OP(next) == EXACTFL)
22c35a8c 3024 c2 = PL_fold_locale[c1];
c277df42
IZ
3025 else
3026 c2 = c1;
a0ed51b3
LW
3027 }
3028 else
c277df42 3029 c1 = c2 = -1000;
02db2b7b 3030 REGCP_SET(lastcp);
5f4b28b2 3031 /* This may be improved if l == 0. */
c277df42
IZ
3032 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3033 /* If it could work, try it. */
3034 if (c1 == -1000 ||
3280af22
NIS
3035 UCHARAT(PL_reginput) == c1 ||
3036 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3037 {
3038 if (paren) {
3039 if (n) {
cf93c79d
IZ
3040 PL_regstartp[paren] =
3041 HOPc(PL_reginput, -l) - PL_bostr;
3042 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3043 }
3044 else
cf93c79d 3045 PL_regendp[paren] = -1;
c277df42
IZ
3046 }
3047 if (regmatch(next))
3048 sayYES;
02db2b7b 3049 REGCP_UNWIND(lastcp);
c277df42
IZ
3050 }
3051 /* Couldn't or didn't -- move forward. */
3280af22 3052 PL_reginput = locinput;
c277df42
IZ
3053 if (regrepeat_hard(scan, 1, &l)) {
3054 ln++;
3280af22 3055 locinput = PL_reginput;
c277df42
IZ
3056 }
3057 else
3058 sayNO;
3059 }
a0ed51b3
LW
3060 }
3061 else {
c277df42
IZ
3062 n = regrepeat_hard(scan, n, &l);
3063 if (n != 0 && l == 0
3064 /* In fact, this is tricky. If paren, then the
3065 fact that we did/didnot match may influence
3066 future execution. */
3067 && !(paren && ln == 0))
3068 ln = n;
3280af22 3069 locinput = PL_reginput;
c277df42 3070 DEBUG_r(
5c0ca799 3071 PerlIO_printf(Perl_debug_log,
faccc32b 3072 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3073 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3074 (IV) n, (IV)l)
c277df42
IZ
3075 );
3076 if (n >= ln) {
22c35a8c 3077 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3078 c1 = (U8)*STRING(next);
c277df42 3079 if (OP(next) == EXACTF)
22c35a8c 3080 c2 = PL_fold[c1];
c277df42 3081 else if (OP(next) == EXACTFL)
22c35a8c 3082 c2 = PL_fold_locale[c1];
c277df42
IZ
3083 else
3084 c2 = c1;
a0ed51b3
LW
3085 }
3086 else
c277df42
IZ
3087 c1 = c2 = -1000;
3088 }
02db2b7b 3089 REGCP_SET(lastcp);
c277df42
IZ
3090 while (n >= ln) {
3091 /* If it could work, try it. */
3092 if (c1 == -1000 ||
3280af22
NIS
3093 UCHARAT(PL_reginput) == c1 ||
3094 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3095 {
3096 DEBUG_r(
c3464db5 3097 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3098 "%*s trying tail with n=%"IVdf"...\n",
3099 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3100 );
3101 if (paren) {
3102 if (n) {
cf93c79d
IZ
3103 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3104 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3105 }
a0ed51b3 3106 else
cf93c79d 3107 PL_regendp[paren] = -1;
c277df42 3108 }
a0ed51b3
LW
3109 if (regmatch(next))
3110 sayYES;
02db2b7b 3111 REGCP_UNWIND(lastcp);
a0ed51b3 3112 }
c277df42
IZ
3113 /* Couldn't or didn't -- back up. */
3114 n--;
dfe13c55 3115 locinput = HOPc(locinput, -l);
3280af22 3116 PL_reginput = locinput;
c277df42
IZ
3117 }
3118 }
3119 sayNO;
3120 break;
3121 }
3122 case CURLYN:
3123 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3124 if (paren > PL_regsize)
3125 PL_regsize = paren;
3126 if (paren > *PL_reglastparen)
3127 *PL_reglastparen = paren;
c277df42
IZ
3128 ln = ARG1(scan); /* min to match */
3129 n = ARG2(scan); /* max to match */
dc45a647 3130 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3131 goto repeat;
a0d0e21e 3132 case CURLY:
c277df42 3133 paren = 0;
a0d0e21e
LW
3134 ln = ARG1(scan); /* min to match */
3135 n = ARG2(scan); /* max to match */
dc45a647 3136 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3137 goto repeat;
3138 case STAR:
3139 ln = 0;
c277df42 3140 n = REG_INFTY;
a0d0e21e 3141 scan = NEXTOPER(scan);
c277df42 3142 paren = 0;
a0d0e21e
LW
3143 goto repeat;
3144 case PLUS:
c277df42
IZ
3145 ln = 1;
3146 n = REG_INFTY;
3147 scan = NEXTOPER(scan);
3148 paren = 0;
3149 repeat:
a0d0e21e
LW
3150 /*
3151 * Lookahead to avoid useless match attempts
3152 * when we know what character comes next.
3153 */
22c35a8c 3154 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3155 U8 *s = (U8*)STRING(next);
3156 if (!UTF) {
3157 c2 = c1 = *s;
3158 if (OP(next) == EXACTF)
3159 c2 = PL_fold[c1];
3160 else if (OP(next) == EXACTFL)
3161 c2 = PL_fold_locale[c1];
3162 }
3163 else { /* UTF */
3164 if (OP(next) == EXACTF) {
3165 c1 = to_utf8_lower(s);
3166 c2 = to_utf8_upper(s);
3167 }
3168 else {
9041c2e3 3169 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3170 }
3171 }
bbce6d69 3172 }
a0d0e21e 3173 else
bbce6d69 3174 c1 = c2 = -1000;
3280af22 3175 PL_reginput = locinput;
a0d0e21e 3176 if (minmod) {
c277df42 3177 CHECKPOINT lastcp;
a0d0e21e
LW
3178 minmod = 0;
3179 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3180 sayNO;
a0ed51b3 3181 locinput = PL_reginput;
02db2b7b 3182 REGCP_SET(lastcp);
0fe9bf95 3183 if (c1 != -1000) {
1aa99e6b 3184 char *e; /* Should not check after this */
0fe9bf95
IZ
3185 char *old = locinput;
3186
1aa99e6b 3187 if (n == REG_INFTY) {
0fe9bf95 3188 e = PL_regeol - 1;
1aa99e6b
IH
3189 if (do_utf8)
3190 while (UTF8_IS_CONTINUATION(*(U8*)e))
3191 e--;
3192 }
3193 else if (do_utf8) {
3194 int m = n - ln;
3195 for (e = locinput;
3196 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3197 e += UTF8SKIP(e);
3198 }
3199 else {
3200 e = locinput + n - ln;
3201 if (e >= PL_regeol)
3202 e = PL_regeol - 1;
3203 }
0fe9bf95 3204 while (1) {
1aa99e6b 3205 int count;
0fe9bf95 3206 /* Find place 'next' could work */
1aa99e6b
IH
3207 if (!do_utf8) {
3208 if (c1 == c2) {
3209 while (locinput <= e && *locinput != c1)
3210 locinput++;
3211 } else {
9041c2e3 3212 while (locinput <= e
1aa99e6b
IH
3213 && *locinput != c1
3214 && *locinput != c2)
3215 locinput++;
3216 }
3217 count = locinput - old;
3218 }
3219 else {
3220 STRLEN len;
3221 if (c1 == c2) {
3222 for (count = 0;
3223 locinput <= e &&
9041c2e3 3224 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3225 count++)
3226 locinput += len;
3227
3228 } else {
3229 for (count = 0; locinput <= e; count++) {
9041c2e3 3230 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3231 if (c == c1 || c == c2)
3232 break;
9041c2e3 3233 locinput += len;
1aa99e6b
IH
3234 }
3235 }
0fe9bf95 3236 }
9041c2e3 3237 if (locinput > e)
0fe9bf95
IZ
3238 sayNO;
3239 /* PL_reginput == old now */
3240 if (locinput != old) {
3241 ln = 1; /* Did some */
1aa99e6b 3242 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3243 sayNO;
3244 }
3245 /* PL_reginput == locinput now */
29d1e993 3246 TRYPAREN(paren, ln, locinput);
0fe9bf95 3247 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3248 REGCP_UNWIND(lastcp);
0fe9bf95 3249 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3250 old = locinput;
3251 if (do_utf8)
3252 locinput += UTF8SKIP(locinput);
3253 else
3254 locinput++;
0fe9bf95
IZ
3255 }
3256 }
3257 else
c277df42 3258 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3259 UV c;
3260 if (c1 != -1000) {
3261 if (do_utf8)
9041c2e3 3262 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3263 else
9041c2e3 3264 c = UCHARAT(PL_reginput);
2390ecbc
PP
3265 /* If it could work, try it. */
3266 if (c == c1 || c == c2)
3267 {
3268 TRYPAREN(paren, n, PL_reginput);
3269 REGCP_UNWIND(lastcp);
3270 }
1aa99e6b 3271 }
a0d0e21e 3272 /* If it could work, try it. */
2390ecbc 3273 else if (c1 == -1000)
bbce6d69 3274 {
29d1e993 3275 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3276 REGCP_UNWIND(lastcp);
bbce6d69 3277 }
c277df42 3278 /* Couldn't or didn't -- move forward. */
a0ed51b3 3279 PL_reginput = locinput;
a0d0e21e
LW
3280 if (regrepeat(scan, 1)) {
3281 ln++;
a0ed51b3
LW
3282 locinput = PL_reginput;
3283 }
3284 else
4633a7c4 3285 sayNO;
a0d0e21e
LW
3286 }
3287 }
3288 else {
c277df42 3289 CHECKPOINT lastcp;
a0d0e21e 3290 n = regrepeat(scan, n);
a0ed51b3 3291 locinput = PL_reginput;
22c35a8c 3292 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3293 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3294 ln = n; /* why back off? */
1aeab75a
GS
3295 /* ...because $ and \Z can match before *and* after
3296 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3297 We should back off by one in this case. */
3298 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3299 ln--;
3300 }
02db2b7b 3301 REGCP_SET(lastcp);
c277df42 3302 if (paren) {
8fa7f367 3303 UV c = 0;
c277df42 3304 while (n >= ln) {
1aa99e6b
IH
3305 if (c1 != -1000) {
3306 if (do_utf8)
9041c2e3 3307 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3308 else
9041c2e3 3309 c = UCHARAT(PL_reginput);
1aa99e6b 3310 }
c277df42 3311 /* If it could work, try it. */
1aa99e6b 3312 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3313 {
29d1e993 3314 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3315 REGCP_UNWIND(lastcp);
c277df42
IZ
3316 }
3317 /* Couldn't or didn't -- back up. */
3318 n--;
dfe13c55 3319 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3320 }
a0ed51b3
LW
3321 }
3322 else {
8fa7f367 3323 UV c = 0;
c277df42 3324 while (n >= ln) {
1aa99e6b
IH
3325 if (c1 != -1000) {
3326 if (do_utf8)
9041c2e3 3327 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3328 else
9041c2e3 3329 c = UCHARAT(PL_reginput);
1aa99e6b 3330 }
c277df42 3331 /* If it could work, try it. */
1aa99e6b 3332 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3333 {
29d1e993 3334 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3335 REGCP_UNWIND(lastcp);
c277df42
IZ
3336 }
3337 /* Couldn't or didn't -- back up. */
3338 n--;
dfe13c55 3339 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3340 }
a0d0e21e
LW
3341 }
3342 }
4633a7c4 3343 sayNO;
c277df42 3344 break;
a0d0e21e 3345 case END:
0f5d15d6
IZ
3346 if (PL_reg_call_cc) {
3347 re_cc_state *cur_call_cc = PL_reg_call_cc;
3348 CURCUR *cctmp = PL_regcc;
3349 regexp *re = PL_reg_re;
3350 CHECKPOINT cp, lastcp;
3351
3352 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3353 REGCP_SET(lastcp);
0f5d15d6
IZ
3354 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3355 the caller. */
3356 PL_reginput = locinput; /* Make position available to
3357 the callcc. */
3358 cache_re(PL_reg_call_cc->re);
3359 PL_regcc = PL_reg_call_cc->cc;
3360 PL_reg_call_cc = PL_reg_call_cc->prev;
3361 if (regmatch(cur_call_cc->node)) {
3362 PL_reg_call_cc = cur_call_cc;
3363 regcpblow(cp);
3364 sayYES;
3365 }
02db2b7b 3366 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3367 regcppop();
3368 PL_reg_call_cc = cur_call_cc;
3369 PL_regcc = cctmp;
3370 PL_reg_re = re;
3371 cache_re(re);
3372
3373 DEBUG_r(
3374 PerlIO_printf(Perl_debug_log,
3375 "%*s continuation failed...\n",
3376 REPORT_CODE_OFF+PL_regindent*2, "")
3377 );
7821416a 3378 sayNO_SILENT;
0f5d15d6 3379 }
7821416a
IZ
3380 if (locinput < PL_regtill) {
3381 DEBUG_r(PerlIO_printf(Perl_debug_log,
3382 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3383 PL_colors[4],
3384 (long)(locinput - PL_reg_starttry),
3385 (long)(PL_regtill - PL_reg_starttry),
3386 PL_colors[5]));
3387 sayNO_FINAL; /* Cannot match: too short. */
3388 }
3389 PL_reginput = locinput; /* put where regtry can find it */
3390 sayYES_FINAL; /* Success! */
7e5428c5 3391 case SUCCEED:
3280af22 3392 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3393 sayYES_LOUD; /* Success! */
c277df42
IZ
3394 case SUSPEND:
3395 n = 1;
9fe1d20c 3396 PL_reginput = locinput;
9041c2e3 3397 goto do_ifmatch;
a0d0e21e 3398 case UNLESSM:
c277df42 3399 n = 0;
a0ed51b3 3400 if (scan->flags) {
efb30f32
HS
3401 s = HOPBACKc(locinput, scan->flags);
3402 if (!s)
3403 goto say_yes;
3404 PL_reginput = s;
a0ed51b3
LW
3405 }
3406 else
3407 PL_reginput = locinput;
c277df42
IZ
3408 goto do_ifmatch;
3409 case IFMATCH:
3410 n = 1;
a0ed51b3 3411 if (scan->flags) {
efb30f32
HS
3412 s = HOPBACKc(locinput, scan->flags);
3413 if (!s)
3414 goto say_no;
3415 PL_reginput = s;
a0ed51b3
LW
3416 }
3417 else
3418 PL_reginput = locinput;
3419
c277df42 3420 do_ifmatch:
c277df42
IZ
3421 inner = NEXTOPER(NEXTOPER(scan));
3422 if (regmatch(inner) != n) {
3423 say_no:
3424 if (logical) {
3425 logical = 0;
3426 sw = 0;
3427 goto do_longjump;
a0ed51b3
LW
3428 }
3429 else
c277df42
IZ
3430 sayNO;
3431 }
3432 say_yes:
3433 if (logical) {
3434 logical = 0;
3435 sw = 1;
3436 }
fe44a5e8 3437 if (OP(scan) == SUSPEND) {
3280af22 3438 locinput = PL_reginput;
565764a8 3439 nextchr = UCHARAT(locinput);
fe44a5e8 3440 }
c277df42
IZ
3441 /* FALL THROUGH. */
3442 case LONGJMP:
3443 do_longjump:
3444 next = scan + ARG(scan);
3445 if (next == scan)
3446 next = NULL;
a0d0e21e
LW
3447 break;
3448 default:
b900a521 3449 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3450 PTR2UV(scan), OP(scan));
cea2e8a9 3451 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3452 }
02db2b7b 3453 reenter:
a0d0e21e
LW
3454 scan = next;
3455 }
a687059c 3456
a0d0e21e
LW
3457 /*
3458 * We get here only if there's trouble -- normally "case END" is
3459 * the terminating point.
3460 */
cea2e8a9 3461 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3462 /*NOTREACHED*/
4633a7c4
LW
3463 sayNO;
3464
7821416a
IZ
3465yes_loud:
3466 DEBUG_r(
3467 PerlIO_printf(Perl_debug_log,
3468 "%*s %scould match...%s\n",
3469 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3470 );
3471 goto yes;
3472yes_final:
3473 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3474 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3475yes:
3476#ifdef DEBUGGING
3280af22 3477 PL_regindent--;
4633a7c4 3478#endif
02db2b7b
IZ
3479
3480#if 0 /* Breaks $^R */
3481 if (unwind)
3482 regcpblow(firstcp);
3483#endif
4633a7c4
LW
3484 return 1;
3485
3486no:
7821416a
IZ
3487 DEBUG_r(
3488 PerlIO_printf(Perl_debug_log,
3489 "%*s %sfailed...%s\n",
3490 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3491 );
3492 goto do_no;
3493no_final:
3494do_no:
02db2b7b
IZ
3495 if (unwind) {
3496 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3497
3498 switch (uw->type) {
3499 case RE_UNWIND_BRANCH:
3500 case RE_UNWIND_BRANCHJ:
3501 {
3502 re_unwind_branch_t *uwb = &(uw->branch);
3503 I32 lastparen = uwb->lastparen;
9041c2e3 3504
02db2b7b
IZ
3505 REGCP_UNWIND(uwb->lastcp);
3506 for (n = *PL_reglastparen; n > lastparen; n--)
3507 PL_regendp[n] = -1;
3508 *PL_reglastparen = n;
3509 scan = next = uwb->next;
9041c2e3
NIS
3510 if ( !scan ||
3511 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3512 ? BRANCH : BRANCHJ) ) { /* Failure */
3513 unwind = uwb->prev;
3514#ifdef DEBUGGING
3515 PL_regindent--;
3516#endif
3517 goto do_no;
3518 }
3519 /* Have more choice yet. Reuse the same uwb. */
3520 /*SUPPRESS 560*/
3521 if ((n = (uwb->type == RE_UNWIND_BRANCH
3522 ? NEXT_OFF(next) : ARG(next))))
3523 next += n;
3524 else
3525 next = NULL; /* XXXX Needn't unwinding in this case... */
3526 uwb->next = next;
3527 next = NEXTOPER(scan);
3528 if (uwb->type == RE_UNWIND_BRANCHJ)
3529 next = NEXTOPER(next);
3530 locinput = uwb->locinput;
3531 nextchr = uwb->nextchr;
3532#ifdef DEBUGGING
3533 PL_regindent = uwb->regindent;
3534#endif
3535
3536 goto reenter;
3537 }
3538 /* NOT REACHED */
3539 default:
3540 Perl_croak(aTHX_ "regexp unwind memory corruption");
3541 }
3542 /* NOT REACHED */
3543 }
4633a7c4 3544#ifdef DEBUGGING
3280af22 3545 PL_regindent--;
4633a7c4 3546#endif
a0d0e21e 3547 return 0;
a687059c
LW
3548}
3549
3550/*
3551 - regrepeat - repeatedly match something simple, report how many
3552 */
3553/*
3554 * [This routine now assumes that it will only match on things of length 1.
3555 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3556 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3557 */
76e3520e 3558STATIC I32
cea2e8a9 3559S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3560{
a0d0e21e 3561 register char *scan;
a0d0e21e 3562 register I32 c;
3280af22 3563 register char *loceol = PL_regeol;
a0ed51b3 3564 register I32 hardcount = 0;
ffc61ed2 3565 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3566
3280af22 3567 scan = PL_reginput;
c277df42 3568 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3569 loceol = scan + max;
a0d0e21e 3570 switch (OP(p)) {
22c35a8c 3571 case REG_ANY:
1aa99e6b 3572 if (do_utf8) {
ffc61ed2 3573 loceol = PL_regeol;
1aa99e6b 3574 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3575 scan += UTF8SKIP(scan);
3576 hardcount++;
3577 }
3578 } else {
3579 while (scan < loceol && *scan != '\n')
3580 scan++;
a0ed51b3
LW
3581 }
3582 break;
ffc61ed2 3583 case SANY:
3baa4c62 3584 scan = loceol;
a0ed51b3 3585 break;
bbce6d69 3586 case EXACT: /* length of string is 1 */
cd439c50 3587 c = (U8)*STRING(p);
bbce6d69 3588 while (scan < loceol && UCHARAT(scan) == c)
3589 scan++;
3590 break;
3591 case EXACTF: /* length of string is 1 */
cd439c50 3592 c = (U8)*STRING(p);
bbce6d69 3593 while (scan < loceol &&
22c35a8c 3594 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3595 scan++;
3596 break;
3597 case EXACTFL: /* length of string is 1 */
3280af22 3598 PL_reg_flags |= RF_tainted;
cd439c50 3599 c = (U8)*STRING(p);
bbce6d69 3600 while (scan < loceol &&
22c35a8c 3601 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3602 scan++;
3603 break;
3604 case ANYOF:
ffc61ed2
JH
3605 if (do_utf8) {
3606 loceol = PL_regeol;
1aa99e6b
IH
3607 while (hardcount < max && scan < loceol &&
3608 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3609 scan += UTF8SKIP(scan);
3610 hardcount++;
3611 }
3612 } else {
3613 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3614 scan++;
3615 }
a0d0e21e
LW
3616 break;
3617 case ALNUM:
1aa99e6b 3618 if (do_utf8) {
ffc61ed2 3619 loceol = PL_regeol;
8269fa76 3620 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3621 while (hardcount < max && scan < loceol &&
3568d838 3622 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3623 scan += UTF8SKIP(scan);
3624 hardcount++;
3625 }
3626 } else {
3627 while (scan < loceol && isALNUM(*scan))
3628 scan++;
a0ed51b3
LW
3629 }
3630 break;
bbce6d69 3631 case ALNUML:
3280af22 3632 PL_reg_flags |= RF_tainted;
1aa99e6b 3633 if (do_utf8) {
ffc61ed2 3634 loceol = PL_regeol;
1aa99e6b
IH
3635 while (hardcount < max && scan < loceol &&
3636 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3637 scan += UTF8SKIP(scan);
3638 hardcount++;
3639 }
3640 } else {
3641 while (scan < loceol && isALNUM_LC(*scan))
3642 scan++;
a0ed51b3
LW
3643 }
3644 break;
a0d0e21e 3645 case NALNUM:
1aa99e6b 3646 if (do_utf8) {
ffc61ed2 3647 loceol = PL_regeol;
8269fa76 3648 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3649 while (hardcount < max && scan < loceol &&
3568d838 3650 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3651 scan += UTF8SKIP(scan);
3652 hardcount++;
3653 }
3654 } else {
3655 while (scan < loceol && !isALNUM(*scan))
3656 scan++;
a0ed51b3
LW
3657 }
3658 break;
bbce6d69 3659 case NALNUML:
3280af22 3660 PL_reg_flags |= RF_tainted;
1aa99e6b 3661 if (do_utf8) {
ffc61ed2 3662 loceol = PL_regeol;
1aa99e6b
IH
3663 while (hardcount < max && scan < loceol &&
3664 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3665 scan += UTF8SKIP(scan);
3666 hardcount++;
3667 }
3668 } else {
3669 while (scan < loceol && !isALNUM_LC(*scan))
3670 scan++;
a0ed51b3
LW
3671 }
3672 break;
a0d0e21e 3673 case SPACE:
1aa99e6b 3674 if (do_utf8) {
ffc61ed2 3675 loceol = PL_regeol;
8269fa76 3676 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3677 while (hardcount < max && scan < loceol &&
3568d838
JH
3678 (*scan == ' ' ||
3679 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3680 scan += UTF8SKIP(scan);
3681 hardcount++;
3682 }
3683 } else {
3684 while (scan < loceol && isSPACE(*scan))
3685 scan++;
a0ed51b3
LW
3686 }
3687 break;
bbce6d69 3688 case SPACEL:
3280af22 3689 PL_reg_flags |= RF_tainted;
1aa99e6b 3690 if (do_utf8) {
ffc61ed2 3691 loceol = PL_regeol;
1aa99e6b 3692 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3693 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3694 scan += UTF8SKIP(scan);
3695 hardcount++;
3696 }
3697 } else {
3698 while (scan < loceol && isSPACE_LC(*scan))
3699 scan++;
a0ed51b3
LW
3700 }
3701 break;
a0d0e21e 3702 case NSPACE:
1aa99e6b 3703 if (do_utf8) {
ffc61ed2 3704 loceol = PL_regeol;
8269fa76 3705 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3706 while (hardcount < max && scan < loceol &&
3568d838
JH
3707 !(*scan == ' ' ||
3708 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3709 scan += UTF8SKIP(scan);
3710 hardcount++;
3711 }
3712 } else {
3713 while (scan < loceol && !isSPACE(*scan))
3714 scan++;
3715 break;
a0ed51b3 3716 }
bbce6d69 3717 case NSPACEL:
3280af22 3718 PL_reg_flags |= RF_tainted;
1aa99e6b 3719 if (do_utf8) {
ffc61ed2 3720 loceol = PL_regeol;
1aa99e6b 3721 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3722 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3723 scan += UTF8SKIP(scan);
3724 hardcount++;
3725 }
3726 } else {
3727 while (scan < loceol && !isSPACE_LC(*scan))
3728 scan++;
a0ed51b3
LW
3729 }
3730 break;
a0d0e21e 3731 case DIGIT:
1aa99e6b 3732 if (do_utf8) {
ffc61ed2 3733 loceol = PL_regeol;
8269fa76 3734 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3735 while (hardcount < max && scan < loceol &&
3568d838 3736 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3737 scan += UTF8SKIP(scan);
3738 hardcount++;
3739 }
3740 } else {
3741 while (scan < loceol && isDIGIT(*scan))
3742 scan++;
a0ed51b3
LW
3743 }
3744 break;
a0d0e21e 3745 case NDIGIT:
1aa99e6b 3746 if (do_utf8) {
ffc61ed2 3747 loceol = PL_regeol;
8269fa76 3748 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3749 while (hardcount < max && scan < loceol &&
3568d838 3750 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3751 scan += UTF8SKIP(scan);
3752 hardcount++;
3753 }
3754 } else {
3755 while (scan < loceol && !isDIGIT(*scan))
3756 scan++;
a0ed51b3
LW
3757 }
3758 break;
a0d0e21e
LW
3759 default: /* Called on something of 0 width. */
3760 break; /* So match right here or not at all. */
3761 }
a687059c 3762
a0ed51b3
LW
3763 if (hardcount)
3764 c = hardcount;
3765 else
3766 c = scan - PL_reginput;
3280af22 3767 PL_reginput = scan;
a687059c 3768
9041c2e3 3769 DEBUG_r(
c277df42
IZ
3770 {
3771 SV *prop = sv_newmortal();
3772
3773 regprop(prop, p);
9041c2e3
NIS
3774 PerlIO_printf(Perl_debug_log,
3775 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3776 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3777 });
9041c2e3 3778
a0d0e21e 3779 return(c);
a687059c
LW
3780}
3781
3782/*
c277df42 3783 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3784 *
c277df42
IZ
3785 * The repeater is supposed to have constant length.
3786 */
3787
76e3520e 3788STATIC I32
cea2e8a9 3789S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42 3790{
b7953727 3791 register char *scan = Nullch;
c277df42 3792 register char *start;
3280af22 3793 register char *loceol = PL_regeol;
a0ed51b3 3794 I32 l = 0;
708e3b05 3795 I32 count = 0, res = 1;
a0ed51b3
LW
3796
3797 if (!max)
3798 return 0;
c277df42 3799
3280af22 3800 start = PL_reginput;
1aa99e6b 3801 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3802 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3803 if (!count++) {
3804 l = 0;
3805 while (start < PL_reginput) {
3806 l++;
3807 start += UTF8SKIP(start);
3808 }
3809 *lp = l;
3810 if (l == 0)
3811 return max;
3812 }
3813 if (count == max)
3814 return count;
3815 }
3816 }
3817 else {
708e3b05 3818 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3819 if (!count++) {
3820 *lp = l = PL_reginput - start;
3821 if (max != REG_INFTY && l*max < loceol - scan)
3822 loceol = scan + l*max;
3823 if (l == 0)
3824 return max;
c277df42
IZ
3825 }
3826 }
3827 }
708e3b05 3828 if (!res)
3280af22 3829 PL_reginput = scan;
9041c2e3 3830
a0ed51b3 3831 return count;
c277df42
IZ
3832}
3833
3834/*
ffc61ed2
JH
3835- regclass_swash - prepare the utf8 swash
3836*/
3837
3838SV *
3839Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3840{
3841 SV *sw = NULL;
3842 SV *si = NULL;
3843
3844 if (PL_regdata && PL_regdata->count) {
3845 U32 n = ARG(node);
3846
3847 if (PL_regdata->what[n] == 's') {
3848 SV *rv = (SV*)PL_regdata->data[n];
3849 AV *av = (AV*)SvRV((SV*)rv);
3850 SV **a;
9041c2e3 3851
ffc61ed2
JH
3852 si = *av_fetch(av, 0, FALSE);
3853 a = av_fetch(av, 1, FALSE);
9041c2e3 3854
ffc61ed2
JH
3855 if (a)
3856 sw = *a;
3857 else if (si && doinit) {
3858 sw = swash_init("utf8", "", si, 1, 0);
3859 (void)av_store(av, 1, sw);
3860 }
3861 }
3862 }
3863
3864 if (initsvp)
3865 *initsvp = si;
3866
3867 return sw;
3868}
3869
3870/*
cb8d8820 3871 - reginclass - determine if a character falls into a character class
bbce6d69 3872 */
3873
76e3520e 3874STATIC bool
ffc61ed2 3875S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3876{
ffc61ed2 3877 char flags = ANYOF_FLAGS(n);
bbce6d69 3878 bool match = FALSE;
1aa99e6b 3879 UV c;
3568d838 3880 STRLEN len = 0;
1aa99e6b 3881
3568d838 3882 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3883
ffc61ed2
JH
3884 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3885 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3886 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3887 match = TRUE;
bbce6d69 3888 }
3568d838 3889 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3890 match = TRUE;
ffc61ed2
JH
3891 if (!match) {
3892 SV *sw = regclass_swash(n, TRUE, 0);
3893
3894 if (sw) {
3568d838 3895 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3896 match = TRUE;
3897 else if (flags & ANYOF_FOLD) {
3898 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3899
ffc61ed2
JH
3900 if (flags & ANYOF_LOCALE) {
3901 PL_reg_flags |= RF_tainted;
9041c2e3 3902 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3903 }
1aa99e6b 3904 else
9041c2e3 3905 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3906 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3907 match = TRUE;
3908 }
3909 }
bbce6d69 3910 }
3911 }
1aa99e6b 3912 if (!match && c < 256) {
ffc61ed2
JH
3913 if (ANYOF_BITMAP_TEST(n, c))
3914 match = TRUE;
3915 else if (flags & ANYOF_FOLD) {
3568d838 3916 I32 f;
a0ed51b3 3917
ffc61ed2
JH
3918 if (flags & ANYOF_LOCALE) {
3919 PL_reg_flags |= RF_tainted;
3920 f = PL_fold_locale[c];
3921 }
3922 else
3923 f = PL_fold[c];
3924 if (f != c && ANYOF_BITMAP_TEST(n, f))
3925 match = TRUE;
3926 }
3927
3928 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3929 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3930 if (
3931 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3933 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3943 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3944 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3945 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3946 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3947 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3948 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3949 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3950 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3951 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3952 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3953 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3954 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3955 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3956 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3957 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3958 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3959 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3960 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3961 ) /* How's that for a conditional? */
3962 {
3963 match = TRUE;
3964 }
a0ed51b3 3965 }
a0ed51b3
LW
3966 }
3967
a0ed51b3
LW
3968 return (flags & ANYOF_INVERT) ? !match : match;
3969}
161b471a 3970
dfe13c55 3971STATIC U8 *
cea2e8a9 3972S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3973{
1aa99e6b
IH
3974 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3975}
3976
3977STATIC U8 *
3978S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3979{
a0ed51b3 3980 if (off >= 0) {
1aa99e6b 3981 while (off-- && s < lim) {
ffc61ed2 3982 /* XXX could check well-formedness here */
a0ed51b3 3983 s += UTF8SKIP(s);
ffc61ed2 3984 }
a0ed51b3
LW
3985 }
3986 else {
3987 while (off++) {
1aa99e6b 3988 if (s > lim) {
a0ed51b3 3989 s--;
ffc61ed2 3990 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3991 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3992 s--;
ffc61ed2
JH
3993 }
3994 /* XXX could check well-formedness here */
a0ed51b3
LW
3995 }
3996 }
3997 }
3998 return s;
3999}
161b471a 4000
dfe13c55 4001STATIC U8 *
1aa99e6b 4002S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4003{
1aa99e6b
IH
4004 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4005}
4006
4007STATIC U8 *
4008S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4009{
4010 if (off >= 0) {
1aa99e6b 4011 while (off-- && s < lim) {
ffc61ed2 4012 /* XXX could check well-formedness here */
a0ed51b3 4013 s += UTF8SKIP(s);
ffc61ed2 4014 }
a0ed51b3
LW
4015 if (off >= 0)
4016 return 0;
4017 }
4018 else {
4019 while (off++) {
1aa99e6b 4020 if (s > lim) {
a0ed51b3 4021 s--;
ffc61ed2 4022 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4023 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4024 s--;
ffc61ed2
JH
4025 }
4026 /* XXX could check well-formedness here */
a0ed51b3
LW
4027 }
4028 else
4029 break;
4030 }
4031 if (off <= 0)
4032 return 0;
4033 }
4034 return s;
4035}
51371543
GS
4036
4037#ifdef PERL_OBJECT
51371543
GS
4038#include "XSUB.h"
4039#endif
4040
4041static void
4042restore_pos(pTHXo_ void *arg)
4043{
51371543
GS
4044 if (PL_reg_eval_set) {
4045 if (PL_reg_oldsaved) {
4046 PL_reg_re->subbeg = PL_reg_oldsaved;
4047 PL_reg_re->sublen = PL_reg_oldsavedlen;
4048 RX_MATCH_COPIED_on(PL_reg_re);
4049 }
4050 PL_reg_magic->mg_len = PL_reg_oldpos;
4051 PL_reg_eval_set = 0;
4052 PL_curpm = PL_reg_oldcurpm;
4053 }
4054}