This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[PATCH perl.c t/run/runenv.t] (was Re: [[ID 20010514.042] Perl v5.6.1 mangles PERL5OPT])
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
e49a9654
IH
147 if (paren_elems_to_push < 0)
148 Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
149
b1ce53c5
JH
150#define REGCP_OTHER_ELEMS 5
151 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 152 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 153/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
154 SSPUSHINT(PL_regendp[p]);
155 SSPUSHINT(PL_regstartp[p]);
3280af22 156 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
157 SSPUSHINT(p);
158 }
b1ce53c5 159/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
160 SSPUSHINT(PL_regsize);
161 SSPUSHINT(*PL_reglastparen);
162 SSPUSHPTR(PL_reginput);
41123dfd
JH
163#define REGCP_FRAME_ELEMS 2
164/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
165 * are needed for the regexp context stack bookkeeping. */
166 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 167 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 168
a0d0e21e
LW
169 return retval;
170}
171
c277df42 172/* These are needed since we do not localize EVAL nodes: */
02db2b7b 173# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 174 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 175 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 176
02db2b7b 177# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 178 PerlIO_printf(Perl_debug_log, \
faccc32b 179 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 180 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 181
76e3520e 182STATIC char *
cea2e8a9 183S_regcppop(pTHX)
a0d0e21e 184{
b1ce53c5 185 I32 i;
a0d0e21e
LW
186 U32 paren = 0;
187 char *input;
cf93c79d 188 I32 tmps;
b1ce53c5
JH
189
190 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 191 i = SSPOPINT;
b1ce53c5
JH
192 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
193 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 194 input = (char *) SSPOPPTR;
3280af22
NIS
195 *PL_reglastparen = SSPOPINT;
196 PL_regsize = SSPOPINT;
b1ce53c5
JH
197
198 /* Now restore the parentheses context. */
41123dfd
JH
199 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
200 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 201 paren = (U32)SSPOPINT;
3280af22 202 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
203 PL_regstartp[paren] = SSPOPINT;
204 tmps = SSPOPINT;
3280af22
NIS
205 if (paren <= *PL_reglastparen)
206 PL_regendp[paren] = tmps;
c277df42 207 DEBUG_r(
c3464db5 208 PerlIO_printf(Perl_debug_log,
b900a521 209 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 210 (UV)paren, (IV)PL_regstartp[paren],
b900a521 211 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 212 (IV)PL_regendp[paren],
3280af22 213 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 214 );
a0d0e21e 215 }
c277df42 216 DEBUG_r(
3280af22 217 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 218 PerlIO_printf(Perl_debug_log,
faccc32b
JH
219 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
220 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
221 }
222 );
daf18116 223#if 1
dafc8851
JH
224 /* It would seem that the similar code in regtry()
225 * already takes care of this, and in fact it is in
226 * a better location to since this code can #if 0-ed out
227 * but the code in regtry() is needed or otherwise tests
228 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
229 * (as of patchlevel 7877) will fail. Then again,
230 * this code seems to be necessary or otherwise
231 * building DynaLoader will fail:
232 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
233 * --jhi */
3280af22
NIS
234 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
235 if (paren > PL_regsize)
cf93c79d
IZ
236 PL_regstartp[paren] = -1;
237 PL_regendp[paren] = -1;
a0d0e21e 238 }
dafc8851 239#endif
a0d0e21e
LW
240 return input;
241}
242
0f5d15d6 243STATIC char *
cea2e8a9 244S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
245{
246 I32 tmp = PL_savestack_ix;
247
248 PL_savestack_ix = ss;
249 regcppop();
250 PL_savestack_ix = tmp;
942e002e 251 return Nullch;
0f5d15d6
IZ
252}
253
254typedef struct re_cc_state
255{
256 I32 ss;
257 regnode *node;
258 struct re_cc_state *prev;
259 CURCUR *cc;
260 regexp *re;
261} re_cc_state;
262
02db2b7b 263#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 264
29d1e993
HS
265#define TRYPAREN(paren, n, input) { \
266 if (paren) { \
267 if (n) { \
268 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
269 PL_regendp[paren] = input - PL_bostr; \
270 } \
271 else \
272 PL_regendp[paren] = -1; \
273 } \
274 if (regmatch(next)) \
275 sayYES; \
276 if (paren && n) \
277 PL_regendp[paren] = -1; \
278}
279
280
a687059c 281/*
e50aee73 282 * pregexec and friends
a687059c
LW
283 */
284
285/*
c277df42 286 - pregexec - match a regexp against a string
a687059c 287 */
c277df42 288I32
864dbfa3 289Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 290 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
291/* strend: pointer to null at end of string */
292/* strbeg: real beginning of string */
293/* minend: end of match must be >=minend after stringarg. */
294/* nosave: For optimizations. */
295{
296 return
9041c2e3 297 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
298 nosave ? 0 : REXEC_COPY_STR);
299}
0f5d15d6
IZ
300
301STATIC void
cea2e8a9 302S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
303{
304 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
305#ifdef DEBUGGING
306 PL_regprogram = prog->program;
307#endif
308 PL_regnpar = prog->nparens;
9041c2e3
NIS
309 PL_regdata = prog->data;
310 PL_reg_re = prog;
0f5d15d6 311}
22e551b9 312
9041c2e3 313/*
cad2e5aa
JH
314 * Need to implement the following flags for reg_anch:
315 *
316 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
317 * USE_INTUIT_ML
318 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
319 * INTUIT_AUTORITATIVE_ML
320 * INTUIT_ONCE_NOML - Intuit can match in one location only.
321 * INTUIT_ONCE_ML
322 *
323 * Another flag for this function: SECOND_TIME (so that float substrs
324 * with giant delta may be not rechecked).
325 */
326
327/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
328
2c2d71f5 329/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
330 Otherwise, only SvCUR(sv) is used to get strbeg. */
331
332/* XXXX We assume that strpos is strbeg unless sv. */
333
6eb5f6b9
JH
334/* XXXX Some places assume that there is a fixed substring.
335 An update may be needed if optimizer marks as "INTUITable"
336 RExen without fixed substrings. Similarly, it is assumed that
337 lengths of all the strings are no more than minlen, thus they
338 cannot come from lookahead.
339 (Or minlen should take into account lookahead.) */
340
2c2d71f5
JH
341/* A failure to find a constant substring means that there is no need to make
342 an expensive call to REx engine, thus we celebrate a failure. Similarly,
343 finding a substring too deep into the string means that less calls to
30944b6d
IZ
344 regtry() should be needed.
345
346 REx compiler's optimizer found 4 possible hints:
347 a) Anchored substring;
348 b) Fixed substring;
349 c) Whether we are anchored (beginning-of-line or \G);
350 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 351 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
352 string which does not contradict any of them.
353 */
2c2d71f5 354
6eb5f6b9
JH
355/* Most of decisions we do here should have been done at compile time.
356 The nodes of the REx which we used for the search should have been
357 deleted from the finite automaton. */
358
cad2e5aa
JH
359char *
360Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
361 char *strend, U32 flags, re_scream_pos_data *data)
362{
2c2d71f5 363 register I32 start_shift;
cad2e5aa 364 /* Should be nonnegative! */
2c2d71f5
JH
365 register I32 end_shift;
366 register char *s;
367 register SV *check;
a1933d95 368 char *strbeg;
cad2e5aa
JH
369 char *t;
370 I32 ml_anch;
2c2d71f5 371 char *tmp;
6eb5f6b9
JH
372 register char *other_last = Nullch; /* other substr checked before this */
373 char *check_at; /* check substr found at this pos */
30944b6d
IZ
374#ifdef DEBUGGING
375 char *i_strpos = strpos;
376#endif
cad2e5aa
JH
377
378 DEBUG_r( if (!PL_colorset) reginitcolors() );
379 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 380 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
381 PL_colors[4],PL_colors[5],PL_colors[0],
382 prog->precomp,
383 PL_colors[1],
384 (strlen(prog->precomp) > 60 ? "..." : ""),
385 PL_colors[0],
b900a521 386 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
387 strpos, PL_colors[1],
388 (strend - strpos > 60 ? "..." : ""))
389 );
390
1aa99e6b
IH
391 if (prog->reganch & ROPT_UTF8)
392 PL_reg_flags |= RF_utf8;
393
394 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 395 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 396 goto fail;
2c2d71f5 397 }
a1933d95 398 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 399 PL_regeol = strend;
653099ff 400 check = prog->check_substr;
2c2d71f5 401 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
402 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
403 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 404 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 405
7e25d62c
JH
406 if (!ml_anch) {
407 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
408 | ROPT_IMPLICIT)) /* not a real BOL */
409 /* SvCUR is not set on references: SvRV and SvPVX overlap */
410 && sv && !SvROK(sv)
411 && (strpos != strbeg)) {
412 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
413 goto fail;
414 }
415 if (prog->check_offset_min == prog->check_offset_max &&
416 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 417 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
418 I32 slen;
419
1aa99e6b 420 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
421 if (SvTAIL(check)) {
422 slen = SvCUR(check); /* >= 1 */
cad2e5aa 423
9041c2e3 424 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
425 || (strend - s == slen && strend[-1] != '\n')) {
426 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
427 goto fail_finish;
cad2e5aa
JH
428 }
429 /* Now should match s[0..slen-2] */
430 slen--;
653099ff 431 if (slen && (*SvPVX(check) != *s
cad2e5aa 432 || (slen > 1
653099ff 433 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
434 report_neq:
435 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
436 goto fail_finish;
437 }
cad2e5aa 438 }
653099ff
GS
439 else if (*SvPVX(check) != *s
440 || ((slen = SvCUR(check)) > 1
441 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
442 goto report_neq;
443 goto success_at_start;
7e25d62c 444 }
cad2e5aa 445 }
2c2d71f5 446 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 447 s = strpos;
2c2d71f5 448 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 449 end_shift = prog->minlen - start_shift -
653099ff 450 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 451 if (!ml_anch) {
653099ff
GS
452 I32 end = prog->check_offset_max + CHR_SVLEN(check)
453 - (SvTAIL(check) != 0);
1aa99e6b 454 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
455
456 if (end_shift < eshift)
457 end_shift = eshift;
458 }
cad2e5aa 459 }
2c2d71f5 460 else { /* Can match at random position */
cad2e5aa
JH
461 ml_anch = 0;
462 s = strpos;
2c2d71f5
JH
463 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
464 /* Should be nonnegative! */
465 end_shift = prog->minlen - start_shift -
653099ff 466 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
467 }
468
2c2d71f5 469#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 470 if (end_shift < 0)
6bbae5e6 471 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
472#endif
473
2c2d71f5
JH
474 restart:
475 /* Find a possible match in the region s..strend by looking for
476 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 477 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
478 I32 p = -1; /* Internal iterator of scream. */
479 I32 *pp = data ? data->scream_pos : &p;
480
2c2d71f5
JH
481 if (PL_screamfirst[BmRARE(check)] >= 0
482 || ( BmRARE(check) == '\n'
483 && (BmPREVIOUS(check) == SvCUR(check) - 1)
484 && SvTAIL(check) ))
9041c2e3 485 s = screaminstr(sv, check,
2c2d71f5 486 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 487 else
2c2d71f5 488 goto fail_finish;
cad2e5aa
JH
489 if (data)
490 *data->scream_olds = s;
491 }
3baa4c62
JH
492 else if (prog->reganch & ROPT_SANY_SEEN)
493 s = fbm_instr((U8*)(s + start_shift),
494 (U8*)(strend - end_shift),
495 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 496 else
1aa99e6b
IH
497 s = fbm_instr(HOP3(s, start_shift, strend),
498 HOP3(strend, -end_shift, strbeg),
2c2d71f5 499 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
500
501 /* Update the count-of-usability, remove useless subpatterns,
502 unshift s. */
2c2d71f5
JH
503
504 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
505 (s ? "Found" : "Did not find"),
506 ((check == prog->anchored_substr) ? "anchored" : "floating"),
507 PL_colors[0],
7b0972df
JH
508 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
509 SvPVX(check),
2c2d71f5
JH
510 PL_colors[1], (SvTAIL(check) ? "$" : ""),
511 (s ? " at offset " : "...\n") ) );
512
513 if (!s)
514 goto fail_finish;
515
6eb5f6b9
JH
516 check_at = s;
517
2c2d71f5 518 /* Finish the diagnostic message */
30944b6d 519 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
520
521 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
522 Start with the other substr.
523 XXXX no SCREAM optimization yet - and a very coarse implementation
524 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
525 *always* match. Probably should be marked during compile...
526 Probably it is right to do no SCREAM here...
527 */
528
529 if (prog->float_substr && prog->anchored_substr) {
30944b6d 530 /* Take into account the "other" substring. */
2c2d71f5
JH
531 /* XXXX May be hopelessly wrong for UTF... */
532 if (!other_last)
6eb5f6b9 533 other_last = strpos;
2c2d71f5 534 if (check == prog->float_substr) {
30944b6d
IZ
535 do_other_anchored:
536 {
1aa99e6b 537 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
538 char *s1 = s;
539
2c2d71f5
JH
540 t = s - prog->check_offset_max;
541 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
542 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 543 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 544 && t > strpos)))
30944b6d 545 /* EMPTY */;
2c2d71f5
JH
546 else
547 t = strpos;
1aa99e6b 548 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
549 if (t < other_last) /* These positions already checked */
550 t = other_last;
1aa99e6b 551 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
552 if (last < last1)
553 last1 = last;
554 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
555 /* On end-of-str: see comment below. */
556 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
557 HOP3(HOP3(last1, prog->anchored_offset, strend)
558 + SvCUR(prog->anchored_substr),
559 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
560 prog->anchored_substr,
561 PL_multiline ? FBMrf_MULTILINE : 0);
562 DEBUG_r(PerlIO_printf(Perl_debug_log,
563 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
564 (s ? "Found" : "Contradicts"),
565 PL_colors[0],
7b0972df
JH
566 (int)(SvCUR(prog->anchored_substr)
567 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
568 SvPVX(prog->anchored_substr),
569 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
570 if (!s) {
571 if (last1 >= last2) {
572 DEBUG_r(PerlIO_printf(Perl_debug_log,
573 ", giving up...\n"));
574 goto fail_finish;
575 }
576 DEBUG_r(PerlIO_printf(Perl_debug_log,
577 ", trying floating at offset %ld...\n",
1aa99e6b
IH
578 (long)(HOP3c(s1, 1, strend) - i_strpos)));
579 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
580 s = HOP3c(last, 1, strend);
2c2d71f5
JH
581 goto restart;
582 }
583 else {
584 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 585 (long)(s - i_strpos)));
1aa99e6b
IH
586 t = HOP3c(s, -prog->anchored_offset, strbeg);
587 other_last = HOP3c(s, 1, strend);
30944b6d 588 s = s1;
2c2d71f5
JH
589 if (t == strpos)
590 goto try_at_start;
2c2d71f5
JH
591 goto try_at_offset;
592 }
30944b6d 593 }
2c2d71f5
JH
594 }
595 else { /* Take into account the floating substring. */
596 char *last, *last1;
597 char *s1 = s;
598
1aa99e6b
IH
599 t = HOP3c(s, -start_shift, strbeg);
600 last1 = last =
601 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
602 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
603 last = HOP3c(t, prog->float_max_offset, strend);
604 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
605 if (s < other_last)
606 s = other_last;
2c2d71f5
JH
607 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
608 /* fbm_instr() takes into account exact value of end-of-str
609 if the check is SvTAIL(ed). Since false positives are OK,
610 and end-of-str is not later than strend we are OK. */
611 s = fbm_instr((unsigned char*)s,
612 (unsigned char*)last + SvCUR(prog->float_substr)
613 - (SvTAIL(prog->float_substr)!=0),
614 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
615 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
616 (s ? "Found" : "Contradicts"),
617 PL_colors[0],
7b0972df
JH
618 (int)(SvCUR(prog->float_substr)
619 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
620 SvPVX(prog->float_substr),
621 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
622 if (!s) {
623 if (last1 == last) {
624 DEBUG_r(PerlIO_printf(Perl_debug_log,
625 ", giving up...\n"));
626 goto fail_finish;
627 }
628 DEBUG_r(PerlIO_printf(Perl_debug_log,
629 ", trying anchored starting at offset %ld...\n",
30944b6d 630 (long)(s1 + 1 - i_strpos)));
803ff556 631 other_last = last;
1aa99e6b 632 s = HOP3c(t, 1, strend);
2c2d71f5
JH
633 goto restart;
634 }
635 else {
636 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 637 (long)(s - i_strpos)));
803ff556 638 other_last = s; /* Fix this later. --Hugo */
30944b6d 639 s = s1;
2c2d71f5
JH
640 if (t == strpos)
641 goto try_at_start;
2c2d71f5
JH
642 goto try_at_offset;
643 }
644 }
cad2e5aa 645 }
2c2d71f5
JH
646
647 t = s - prog->check_offset_max;
2c2d71f5
JH
648 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
649 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
650 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
651 && t > strpos))) {
2c2d71f5
JH
652 /* Fixed substring is found far enough so that the match
653 cannot start at strpos. */
654 try_at_offset:
cad2e5aa 655 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
656 /* Eventually fbm_*() should handle this, but often
657 anchored_offset is not 0, so this check will not be wasted. */
658 /* XXXX In the code below we prefer to look for "^" even in
659 presence of anchored substrings. And we search even
660 beyond the found float position. These pessimizations
661 are historical artefacts only. */
662 find_anchor:
2c2d71f5 663 while (t < strend - prog->minlen) {
cad2e5aa 664 if (*t == '\n') {
4ee3650e 665 if (t < check_at - prog->check_offset_min) {
30944b6d 666 if (prog->anchored_substr) {
4ee3650e
GS
667 /* Since we moved from the found position,
668 we definitely contradict the found anchored
30944b6d
IZ
669 substr. Due to the above check we do not
670 contradict "check" substr.
671 Thus we can arrive here only if check substr
672 is float. Redo checking for "other"=="fixed".
673 */
9041c2e3 674 strpos = t + 1;
30944b6d
IZ
675 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
676 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
677 goto do_other_anchored;
678 }
4ee3650e
GS
679 /* We don't contradict the found floating substring. */
680 /* XXXX Why not check for STCLASS? */
cad2e5aa 681 s = t + 1;
2c2d71f5 682 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 683 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
684 goto set_useful;
685 }
4ee3650e
GS
686 /* Position contradicts check-string */
687 /* XXXX probably better to look for check-string
688 than for "\n", so one should lower the limit for t? */
689 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 690 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 691 other_last = strpos = s = t + 1;
cad2e5aa
JH
692 goto restart;
693 }
694 t++;
695 }
2c2d71f5
JH
696 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
697 PL_colors[0],PL_colors[1]));
698 goto fail_finish;
cad2e5aa 699 }
f5952150
GS
700 else {
701 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
702 PL_colors[0],PL_colors[1]));
703 }
cad2e5aa
JH
704 s = t;
705 set_useful:
2c2d71f5 706 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
707 }
708 else {
f5952150 709 /* The found string does not prohibit matching at strpos,
2c2d71f5 710 - no optimization of calling REx engine can be performed,
f5952150
GS
711 unless it was an MBOL and we are not after MBOL,
712 or a future STCLASS check will fail this. */
2c2d71f5
JH
713 try_at_start:
714 /* Even in this situation we may use MBOL flag if strpos is offset
715 wrt the start of the string. */
05b4157f 716 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 717 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
718 /* May be due to an implicit anchor of m{.*foo} */
719 && !(prog->reganch & ROPT_IMPLICIT))
720 {
cad2e5aa
JH
721 t = strpos;
722 goto find_anchor;
723 }
30944b6d 724 DEBUG_r( if (ml_anch)
f5952150
GS
725 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
726 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 727 );
2c2d71f5 728 success_at_start:
30944b6d 729 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 730 && prog->check_substr /* Could be deleted already */
cad2e5aa 731 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
732 && prog->check_substr == prog->float_substr)
733 {
cad2e5aa 734 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 735 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
736 SvREFCNT_dec(prog->check_substr);
737 prog->check_substr = Nullsv; /* disable */
738 prog->float_substr = Nullsv; /* clear */
5e39e1e5 739 check = Nullsv; /* abort */
cad2e5aa 740 s = strpos;
3cf5c195
IZ
741 /* XXXX This is a remnant of the old implementation. It
742 looks wasteful, since now INTUIT can use many
6eb5f6b9 743 other heuristics. */
cad2e5aa
JH
744 prog->reganch &= ~RE_USE_INTUIT;
745 }
746 else
747 s = strpos;
748 }
749
6eb5f6b9
JH
750 /* Last resort... */
751 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
752 if (prog->regstclass) {
753 /* minlen == 0 is possible if regstclass is \b or \B,
754 and the fixed substr is ''$.
755 Since minlen is already taken into account, s+1 is before strend;
756 accidentally, minlen >= 1 guaranties no false positives at s + 1
757 even for \b or \B. But (minlen? 1 : 0) below assumes that
758 regstclass does not come from lookahead... */
759 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
760 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 761 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 762 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 763 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 764 : 1);
6eb5f6b9 765 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
766 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
767 : (prog->float_substr
768 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
769 cl_l, strend)
770 : strend);
a1933d95 771 char *startpos = strbeg;
6eb5f6b9
JH
772
773 t = s;
76384e4a 774 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 775 PL_regdata = prog->data;
76384e4a
GS
776 PL_bostr = startpos;
777 }
6eb5f6b9
JH
778 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
779 if (!s) {
780#ifdef DEBUGGING
781 char *what;
782#endif
783 if (endpos == strend) {
784 DEBUG_r( PerlIO_printf(Perl_debug_log,
785 "Could not match STCLASS...\n") );
786 goto fail;
787 }
66e933ab
GS
788 DEBUG_r( PerlIO_printf(Perl_debug_log,
789 "This position contradicts STCLASS...\n") );
653099ff
GS
790 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
791 goto fail;
6eb5f6b9
JH
792 /* Contradict one of substrings */
793 if (prog->anchored_substr) {
6eb5f6b9
JH
794 if (prog->anchored_substr == check) {
795 DEBUG_r( what = "anchored" );
796 hop_and_restart:
1aa99e6b 797 s = HOP3c(t, 1, strend);
66e933ab
GS
798 if (s + start_shift + end_shift > strend) {
799 /* XXXX Should be taken into account earlier? */
800 DEBUG_r( PerlIO_printf(Perl_debug_log,
801 "Could not match STCLASS...\n") );
802 goto fail;
803 }
5e39e1e5
HS
804 if (!check)
805 goto giveup;
6eb5f6b9 806 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 807 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
808 what, (long)(s + start_shift - i_strpos)) );
809 goto restart;
810 }
66e933ab 811 /* Have both, check_string is floating */
6eb5f6b9
JH
812 if (t + start_shift >= check_at) /* Contradicts floating=check */
813 goto retry_floating_check;
814 /* Recheck anchored substring, but not floating... */
9041c2e3 815 s = check_at;
5e39e1e5
HS
816 if (!check)
817 goto giveup;
6eb5f6b9 818 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 819 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
820 (long)(other_last - i_strpos)) );
821 goto do_other_anchored;
822 }
60e71179
GS
823 /* Another way we could have checked stclass at the
824 current position only: */
825 if (ml_anch) {
826 s = t = t + 1;
5e39e1e5
HS
827 if (!check)
828 goto giveup;
60e71179 829 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
830 "Looking for /%s^%s/m starting at offset %ld...\n",
831 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 832 goto try_at_offset;
66e933ab 833 }
60e71179
GS
834 if (!prog->float_substr) /* Could have been deleted */
835 goto fail;
6eb5f6b9
JH
836 /* Check is floating subtring. */
837 retry_floating_check:
838 t = check_at - start_shift;
839 DEBUG_r( what = "floating" );
840 goto hop_and_restart;
841 }
842 DEBUG_r( if (t != s)
9041c2e3 843 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
844 "By STCLASS: moving %ld --> %ld\n",
845 (long)(t - i_strpos), (long)(s - i_strpos));
846 else
9041c2e3 847 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
848 "Does not contradict STCLASS...\n") );
849 }
5e39e1e5
HS
850 giveup:
851 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
852 PL_colors[4], (check ? "Guessed" : "Giving up"),
853 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 854 return s;
2c2d71f5
JH
855
856 fail_finish: /* Substring not found */
66e933ab
GS
857 if (prog->check_substr) /* could be removed already */
858 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 859 fail:
2c2d71f5 860 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
861 PL_colors[4],PL_colors[5]));
862 return Nullch;
863}
9661b544 864
6eb5f6b9 865/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
866STATIC char *
867S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 868{
6eb5f6b9
JH
869 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
870 char *m;
d8093b23
G
871 STRLEN ln;
872 unsigned int c1;
873 unsigned int c2;
6eb5f6b9
JH
874 char *e;
875 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 876 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 877
6eb5f6b9
JH
878 /* We know what class it must start with. */
879 switch (OP(c)) {
6eb5f6b9 880 case ANYOF:
a0ed51b3 881 while (s < strend) {
ffc61ed2 882 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
883 if (tmp && (norun || regtry(prog, s)))
884 goto got_it;
885 else
886 tmp = doevery;
a0ed51b3 887 }
6eb5f6b9
JH
888 else
889 tmp = 1;
ffc61ed2 890 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 891 }
6eb5f6b9
JH
892 break;
893 case EXACTF:
894 m = STRING(c);
895 ln = STR_LEN(c);
1aa99e6b
IH
896 if (UTF) {
897 c1 = to_utf8_lower((U8*)m);
898 c2 = to_utf8_upper((U8*)m);
899 }
900 else {
901 c1 = *(U8*)m;
902 c2 = PL_fold[c1];
903 }
6eb5f6b9
JH
904 goto do_exactf;
905 case EXACTFL:
906 m = STRING(c);
907 ln = STR_LEN(c);
d8093b23 908 c1 = *(U8*)m;
6eb5f6b9
JH
909 c2 = PL_fold_locale[c1];
910 do_exactf:
911 e = strend - ln;
b3c9acc1 912
6eb5f6b9
JH
913 if (norun && e < s)
914 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
915
916 if (do_utf8) {
917 STRLEN len;
918 if (c1 == c2)
919 while (s <= e) {
9041c2e3 920 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
921 && regtry(prog, s) )
922 goto got_it;
923 s += len;
924 }
925 else
926 while (s <= e) {
9041c2e3 927 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
928 if ( (c == c1 || c == c2) && regtry(prog, s) )
929 goto got_it;
930 s += len;
931 }
932 }
933 else {
934 if (c1 == c2)
935 while (s <= e) {
936 if ( *(U8*)s == c1
937 && (ln == 1 || !(OP(c) == EXACTF
938 ? ibcmp(s, m, ln)
939 : ibcmp_locale(s, m, ln)))
940 && (norun || regtry(prog, s)) )
941 goto got_it;
942 s++;
943 }
944 else
945 while (s <= e) {
946 if ( (*(U8*)s == c1 || *(U8*)s == c2)
947 && (ln == 1 || !(OP(c) == EXACTF
948 ? ibcmp(s, m, ln)
949 : ibcmp_locale(s, m, ln)))
950 && (norun || regtry(prog, s)) )
951 goto got_it;
952 s++;
953 }
b3c9acc1
IZ
954 }
955 break;
bbce6d69 956 case BOUNDL:
3280af22 957 PL_reg_flags |= RF_tainted;
bbce6d69 958 /* FALL THROUGH */
a0d0e21e 959 case BOUND:
ffc61ed2 960 if (do_utf8) {
12d33761 961 if (s == PL_bostr)
ffc61ed2
JH
962 tmp = '\n';
963 else {
1aa99e6b 964 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 965
2b9d42f0 966 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
967 }
968 tmp = ((OP(c) == BOUND ?
9041c2e3 969 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 970 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
971 while (s < strend) {
972 if (tmp == !(OP(c) == BOUND ?
3568d838 973 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
974 isALNUM_LC_utf8((U8*)s)))
975 {
976 tmp = !tmp;
977 if ((norun || regtry(prog, s)))
978 goto got_it;
979 }
980 s += UTF8SKIP(s);
a687059c 981 }
a0d0e21e 982 }
667bb95a 983 else {
12d33761 984 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
985 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
986 while (s < strend) {
987 if (tmp ==
988 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
989 tmp = !tmp;
990 if ((norun || regtry(prog, s)))
991 goto got_it;
992 }
993 s++;
a0ed51b3 994 }
a0ed51b3 995 }
6eb5f6b9 996 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
997 goto got_it;
998 break;
bbce6d69 999 case NBOUNDL:
3280af22 1000 PL_reg_flags |= RF_tainted;
bbce6d69 1001 /* FALL THROUGH */
a0d0e21e 1002 case NBOUND:
ffc61ed2 1003 if (do_utf8) {
12d33761 1004 if (s == PL_bostr)
ffc61ed2
JH
1005 tmp = '\n';
1006 else {
1aa99e6b 1007 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1008
2b9d42f0 1009 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1010 }
1011 tmp = ((OP(c) == NBOUND ?
9041c2e3 1012 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1013 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1014 while (s < strend) {
1015 if (tmp == !(OP(c) == NBOUND ?
3568d838 1016 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1017 isALNUM_LC_utf8((U8*)s)))
1018 tmp = !tmp;
1019 else if ((norun || regtry(prog, s)))
1020 goto got_it;
1021 s += UTF8SKIP(s);
1022 }
a0d0e21e 1023 }
667bb95a 1024 else {
12d33761 1025 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1026 tmp = ((OP(c) == NBOUND ?
1027 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1028 while (s < strend) {
1029 if (tmp ==
1030 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1031 tmp = !tmp;
1032 else if ((norun || regtry(prog, s)))
1033 goto got_it;
1034 s++;
1035 }
a0ed51b3 1036 }
6eb5f6b9 1037 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1038 goto got_it;
1039 break;
a0d0e21e 1040 case ALNUM:
ffc61ed2 1041 if (do_utf8) {
8269fa76 1042 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1043 while (s < strend) {
3568d838 1044 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1045 if (tmp && (norun || regtry(prog, s)))
1046 goto got_it;
1047 else
1048 tmp = doevery;
1049 }
bbce6d69 1050 else
ffc61ed2
JH
1051 tmp = 1;
1052 s += UTF8SKIP(s);
bbce6d69 1053 }
bbce6d69 1054 }
ffc61ed2
JH
1055 else {
1056 while (s < strend) {
1057 if (isALNUM(*s)) {
1058 if (tmp && (norun || regtry(prog, s)))
1059 goto got_it;
1060 else
1061 tmp = doevery;
1062 }
a0ed51b3 1063 else
ffc61ed2
JH
1064 tmp = 1;
1065 s++;
a0ed51b3 1066 }
a0ed51b3
LW
1067 }
1068 break;
bbce6d69 1069 case ALNUML:
3280af22 1070 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1071 if (do_utf8) {
1072 while (s < strend) {
1073 if (isALNUM_LC_utf8((U8*)s)) {
1074 if (tmp && (norun || regtry(prog, s)))
1075 goto got_it;
1076 else
1077 tmp = doevery;
1078 }
a687059c 1079 else
ffc61ed2
JH
1080 tmp = 1;
1081 s += UTF8SKIP(s);
a0d0e21e 1082 }
a0d0e21e 1083 }
ffc61ed2
JH
1084 else {
1085 while (s < strend) {
1086 if (isALNUM_LC(*s)) {
1087 if (tmp && (norun || regtry(prog, s)))
1088 goto got_it;
1089 else
1090 tmp = doevery;
1091 }
a0ed51b3 1092 else
ffc61ed2
JH
1093 tmp = 1;
1094 s++;
a0ed51b3 1095 }
a0ed51b3
LW
1096 }
1097 break;
a0d0e21e 1098 case NALNUM:
ffc61ed2 1099 if (do_utf8) {
8269fa76 1100 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1101 while (s < strend) {
3568d838 1102 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1103 if (tmp && (norun || regtry(prog, s)))
1104 goto got_it;
1105 else
1106 tmp = doevery;
1107 }
bbce6d69 1108 else
ffc61ed2
JH
1109 tmp = 1;
1110 s += UTF8SKIP(s);
bbce6d69 1111 }
bbce6d69 1112 }
ffc61ed2
JH
1113 else {
1114 while (s < strend) {
1115 if (!isALNUM(*s)) {
1116 if (tmp && (norun || regtry(prog, s)))
1117 goto got_it;
1118 else
1119 tmp = doevery;
1120 }
a0ed51b3 1121 else
ffc61ed2
JH
1122 tmp = 1;
1123 s++;
a0ed51b3 1124 }
a0ed51b3
LW
1125 }
1126 break;
bbce6d69 1127 case NALNUML:
3280af22 1128 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1129 if (do_utf8) {
1130 while (s < strend) {
1131 if (!isALNUM_LC_utf8((U8*)s)) {
1132 if (tmp && (norun || regtry(prog, s)))
1133 goto got_it;
1134 else
1135 tmp = doevery;
1136 }
a687059c 1137 else
ffc61ed2
JH
1138 tmp = 1;
1139 s += UTF8SKIP(s);
a687059c 1140 }
a0d0e21e 1141 }
ffc61ed2
JH
1142 else {
1143 while (s < strend) {
1144 if (!isALNUM_LC(*s)) {
1145 if (tmp && (norun || regtry(prog, s)))
1146 goto got_it;
1147 else
1148 tmp = doevery;
1149 }
a0ed51b3 1150 else
ffc61ed2
JH
1151 tmp = 1;
1152 s++;
a0ed51b3 1153 }
a0ed51b3
LW
1154 }
1155 break;
a0d0e21e 1156 case SPACE:
ffc61ed2 1157 if (do_utf8) {
8269fa76 1158 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1159 while (s < strend) {
3568d838 1160 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
a0d0e21e 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s += UTF8SKIP(s);
2304df62 1169 }
a0d0e21e 1170 }
ffc61ed2
JH
1171 else {
1172 while (s < strend) {
1173 if (isSPACE(*s)) {
1174 if (tmp && (norun || regtry(prog, s)))
1175 goto got_it;
1176 else
1177 tmp = doevery;
1178 }
a0ed51b3 1179 else
ffc61ed2
JH
1180 tmp = 1;
1181 s++;
a0ed51b3 1182 }
a0ed51b3
LW
1183 }
1184 break;
bbce6d69 1185 case SPACEL:
3280af22 1186 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1187 if (do_utf8) {
1188 while (s < strend) {
1189 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1190 if (tmp && (norun || regtry(prog, s)))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
bbce6d69 1195 else
ffc61ed2
JH
1196 tmp = 1;
1197 s += UTF8SKIP(s);
bbce6d69 1198 }
bbce6d69 1199 }
ffc61ed2
JH
1200 else {
1201 while (s < strend) {
1202 if (isSPACE_LC(*s)) {
1203 if (tmp && (norun || regtry(prog, s)))
1204 goto got_it;
1205 else
1206 tmp = doevery;
1207 }
a0ed51b3 1208 else
ffc61ed2
JH
1209 tmp = 1;
1210 s++;
a0ed51b3 1211 }
a0ed51b3
LW
1212 }
1213 break;
a0d0e21e 1214 case NSPACE:
ffc61ed2 1215 if (do_utf8) {
8269fa76 1216 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1217 while (s < strend) {
3568d838 1218 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1219 if (tmp && (norun || regtry(prog, s)))
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
a0d0e21e 1224 else
ffc61ed2
JH
1225 tmp = 1;
1226 s += UTF8SKIP(s);
a687059c 1227 }
a0d0e21e 1228 }
ffc61ed2
JH
1229 else {
1230 while (s < strend) {
1231 if (!isSPACE(*s)) {
1232 if (tmp && (norun || regtry(prog, s)))
1233 goto got_it;
1234 else
1235 tmp = doevery;
1236 }
a0ed51b3 1237 else
ffc61ed2
JH
1238 tmp = 1;
1239 s++;
a0ed51b3 1240 }
a0ed51b3
LW
1241 }
1242 break;
bbce6d69 1243 case NSPACEL:
3280af22 1244 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1245 if (do_utf8) {
1246 while (s < strend) {
1247 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1248 if (tmp && (norun || regtry(prog, s)))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
bbce6d69 1253 else
ffc61ed2
JH
1254 tmp = 1;
1255 s += UTF8SKIP(s);
bbce6d69 1256 }
bbce6d69 1257 }
ffc61ed2
JH
1258 else {
1259 while (s < strend) {
1260 if (!isSPACE_LC(*s)) {
1261 if (tmp && (norun || regtry(prog, s)))
1262 goto got_it;
1263 else
1264 tmp = doevery;
1265 }
a0ed51b3 1266 else
ffc61ed2
JH
1267 tmp = 1;
1268 s++;
a0ed51b3 1269 }
a0ed51b3
LW
1270 }
1271 break;
a0d0e21e 1272 case DIGIT:
ffc61ed2 1273 if (do_utf8) {
8269fa76 1274 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1275 while (s < strend) {
3568d838 1276 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0d0e21e 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s += UTF8SKIP(s);
2b69d0c2 1285 }
a0d0e21e 1286 }
ffc61ed2
JH
1287 else {
1288 while (s < strend) {
1289 if (isDIGIT(*s)) {
1290 if (tmp && (norun || regtry(prog, s)))
1291 goto got_it;
1292 else
1293 tmp = doevery;
1294 }
a0ed51b3 1295 else
ffc61ed2
JH
1296 tmp = 1;
1297 s++;
a0ed51b3 1298 }
a0ed51b3
LW
1299 }
1300 break;
b8c5462f
JH
1301 case DIGITL:
1302 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1303 if (do_utf8) {
1304 while (s < strend) {
1305 if (isDIGIT_LC_utf8((U8*)s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
b8c5462f 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s += UTF8SKIP(s);
b8c5462f 1314 }
b8c5462f 1315 }
ffc61ed2
JH
1316 else {
1317 while (s < strend) {
1318 if (isDIGIT_LC(*s)) {
1319 if (tmp && (norun || regtry(prog, s)))
1320 goto got_it;
1321 else
1322 tmp = doevery;
1323 }
b8c5462f 1324 else
ffc61ed2
JH
1325 tmp = 1;
1326 s++;
b8c5462f 1327 }
b8c5462f
JH
1328 }
1329 break;
a0d0e21e 1330 case NDIGIT:
ffc61ed2 1331 if (do_utf8) {
8269fa76 1332 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1333 while (s < strend) {
3568d838 1334 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
a0d0e21e 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s += UTF8SKIP(s);
a687059c 1343 }
a0d0e21e 1344 }
ffc61ed2
JH
1345 else {
1346 while (s < strend) {
1347 if (!isDIGIT(*s)) {
1348 if (tmp && (norun || regtry(prog, s)))
1349 goto got_it;
1350 else
1351 tmp = doevery;
1352 }
a0ed51b3 1353 else
ffc61ed2
JH
1354 tmp = 1;
1355 s++;
a0ed51b3 1356 }
a0ed51b3
LW
1357 }
1358 break;
b8c5462f
JH
1359 case NDIGITL:
1360 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1361 if (do_utf8) {
1362 while (s < strend) {
1363 if (!isDIGIT_LC_utf8((U8*)s)) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
b8c5462f 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s += UTF8SKIP(s);
b8c5462f 1372 }
a0ed51b3 1373 }
ffc61ed2
JH
1374 else {
1375 while (s < strend) {
1376 if (!isDIGIT_LC(*s)) {
1377 if (tmp && (norun || regtry(prog, s)))
1378 goto got_it;
1379 else
1380 tmp = doevery;
1381 }
cf93c79d 1382 else
ffc61ed2
JH
1383 tmp = 1;
1384 s++;
b8c5462f 1385 }
b8c5462f
JH
1386 }
1387 break;
b3c9acc1 1388 default:
3c3eec57
GS
1389 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1390 break;
d6a28714 1391 }
6eb5f6b9
JH
1392 return 0;
1393 got_it:
1394 return s;
1395}
1396
1397/*
1398 - regexec_flags - match a regexp against a string
1399 */
1400I32
1401Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1402 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1403/* strend: pointer to null at end of string */
1404/* strbeg: real beginning of string */
1405/* minend: end of match must be >=minend after stringarg. */
1406/* data: May be used for some additional optimizations. */
1407/* nosave: For optimizations. */
1408{
6eb5f6b9
JH
1409 register char *s;
1410 register regnode *c;
1411 register char *startpos = stringarg;
6eb5f6b9
JH
1412 I32 minlen; /* must match at least this many chars */
1413 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1414 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1415 constant substr. */ /* CC */
1416 I32 end_shift = 0; /* Same for the end. */ /* CC */
1417 I32 scream_pos = -1; /* Internal iterator of scream. */
1418 char *scream_olds;
1419 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1420 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1421
1422 PL_regcc = 0;
1423
1424 cache_re(prog);
1425#ifdef DEBUGGING
aea4f609 1426 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1427#endif
1428
1429 /* Be paranoid... */
1430 if (prog == NULL || startpos == NULL) {
1431 Perl_croak(aTHX_ "NULL regexp parameter");
1432 return 0;
1433 }
1434
1435 minlen = prog->minlen;
1aa99e6b 1436 if (do_utf8) {
3baa4c62
JH
1437 if (!(prog->reganch & ROPT_SANY_SEEN))
1438 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1439 }
1440 else {
1441 if (strend - startpos < minlen) goto phooey;
1442 }
6eb5f6b9 1443
6eb5f6b9
JH
1444 /* Check validity of program. */
1445 if (UCHARAT(prog->program) != REG_MAGIC) {
1446 Perl_croak(aTHX_ "corrupted regexp program");
1447 }
1448
1449 PL_reg_flags = 0;
1450 PL_reg_eval_set = 0;
1451 PL_reg_maxiter = 0;
1452
1453 if (prog->reganch & ROPT_UTF8)
1454 PL_reg_flags |= RF_utf8;
1455
1456 /* Mark beginning of line for ^ and lookbehind. */
1457 PL_regbol = startpos;
1458 PL_bostr = strbeg;
1459 PL_reg_sv = sv;
1460
1461 /* Mark end of line for $ (and such) */
1462 PL_regeol = strend;
1463
1464 /* see how far we have to get to not match where we matched before */
1465 PL_regtill = startpos+minend;
1466
1467 /* We start without call_cc context. */
1468 PL_reg_call_cc = 0;
1469
1470 /* If there is a "must appear" string, look for it. */
1471 s = startpos;
1472
1473 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1474 MAGIC *mg;
1475
1476 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1477 PL_reg_ganch = startpos;
1478 else if (sv && SvTYPE(sv) >= SVt_PVMG
1479 && SvMAGIC(sv)
14befaf4
DM
1480 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1481 && mg->mg_len >= 0) {
6eb5f6b9
JH
1482 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1483 if (prog->reganch & ROPT_ANCH_GPOS) {
1484 if (s > PL_reg_ganch)
1485 goto phooey;
1486 s = PL_reg_ganch;
1487 }
1488 }
1489 else /* pos() not defined */
1490 PL_reg_ganch = strbeg;
1491 }
1492
699c3c34
JH
1493 if (do_utf8 == (UTF!=0) &&
1494 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1495 re_scream_pos_data d;
1496
1497 d.scream_olds = &scream_olds;
1498 d.scream_pos = &scream_pos;
1499 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1500 if (!s)
1501 goto phooey; /* not present */
1502 }
1503
1504 DEBUG_r( if (!PL_colorset) reginitcolors() );
1505 DEBUG_r(PerlIO_printf(Perl_debug_log,
1506 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1507 PL_colors[4],PL_colors[5],PL_colors[0],
1508 prog->precomp,
1509 PL_colors[1],
1510 (strlen(prog->precomp) > 60 ? "..." : ""),
1511 PL_colors[0],
1512 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1513 startpos, PL_colors[1],
1514 (strend - startpos > 60 ? "..." : ""))
1515 );
1516
1517 /* Simplest case: anchored match need be tried only once. */
1518 /* [unless only anchor is BOL and multiline is set] */
1519 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1520 if (s == startpos && regtry(prog, startpos))
1521 goto got_it;
1522 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1523 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1524 {
1525 char *end;
1526
1527 if (minlen)
1528 dontbother = minlen - 1;
1aa99e6b 1529 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1530 /* for multiline we only have to try after newlines */
1531 if (prog->check_substr) {
1532 if (s == startpos)
1533 goto after_try;
1534 while (1) {
1535 if (regtry(prog, s))
1536 goto got_it;
1537 after_try:
1538 if (s >= end)
1539 goto phooey;
1540 if (prog->reganch & RE_USE_INTUIT) {
1541 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1542 if (!s)
1543 goto phooey;
1544 }
1545 else
1546 s++;
1547 }
1548 } else {
1549 if (s > startpos)
1550 s--;
1551 while (s < end) {
1552 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1553 if (regtry(prog, s))
1554 goto got_it;
1555 }
1556 }
1557 }
1558 }
1559 goto phooey;
1560 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1561 if (regtry(prog, PL_reg_ganch))
1562 goto got_it;
1563 goto phooey;
1564 }
1565
1566 /* Messy cases: unanchored match. */
9041c2e3 1567 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1568 /* we have /x+whatever/ */
1569 /* it must be a one character string (XXXX Except UTF?) */
1570 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1571#ifdef DEBUGGING
1572 int did_match = 0;
1573#endif
1574
1aa99e6b 1575 if (do_utf8) {
6eb5f6b9
JH
1576 while (s < strend) {
1577 if (*s == ch) {
bf93d4cc 1578 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1579 if (regtry(prog, s)) goto got_it;
1580 s += UTF8SKIP(s);
1581 while (s < strend && *s == ch)
1582 s += UTF8SKIP(s);
1583 }
1584 s += UTF8SKIP(s);
1585 }
1586 }
1587 else {
1588 while (s < strend) {
1589 if (*s == ch) {
bf93d4cc 1590 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1591 if (regtry(prog, s)) goto got_it;
1592 s++;
1593 while (s < strend && *s == ch)
1594 s++;
1595 }
1596 s++;
1597 }
1598 }
bf93d4cc
GS
1599 DEBUG_r(did_match ||
1600 PerlIO_printf(Perl_debug_log,
1601 "Did not find anchored character...\n"));
6eb5f6b9
JH
1602 }
1603 /*SUPPRESS 560*/
1aa99e6b
IH
1604 else if (do_utf8 == (UTF!=0) &&
1605 (prog->anchored_substr != Nullsv
9041c2e3 1606 || (prog->float_substr != Nullsv
1aa99e6b 1607 && prog->float_max_offset < strend - s))) {
9041c2e3 1608 SV *must = prog->anchored_substr
6eb5f6b9 1609 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1610 I32 back_max =
6eb5f6b9 1611 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1612 I32 back_min =
6eb5f6b9 1613 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1614 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1615 -(I32)(CHR_SVLEN(must)
1aa99e6b 1616 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1617 char *last1; /* Last position checked before */
bf93d4cc
GS
1618#ifdef DEBUGGING
1619 int did_match = 0;
1620#endif
6eb5f6b9
JH
1621
1622 if (s > PL_bostr)
1623 last1 = HOPc(s, -1);
1624 else
1625 last1 = s - 1; /* bogus */
1626
1627 /* XXXX check_substr already used to find `s', can optimize if
1628 check_substr==must. */
1629 scream_pos = -1;
1630 dontbother = end_shift;
1631 strend = HOPc(strend, -dontbother);
1632 while ( (s <= last) &&
9041c2e3 1633 ((flags & REXEC_SCREAM)
1aa99e6b 1634 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1635 end_shift, &scream_pos, 0))
1aa99e6b 1636 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1637 (unsigned char*)strend, must,
6eb5f6b9 1638 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1639 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1640 if (HOPc(s, -back_max) > last1) {
1641 last1 = HOPc(s, -back_min);
1642 s = HOPc(s, -back_max);
1643 }
1644 else {
1645 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1646
1647 last1 = HOPc(s, -back_min);
1648 s = t;
1649 }
1aa99e6b 1650 if (do_utf8) {
6eb5f6b9
JH
1651 while (s <= last1) {
1652 if (regtry(prog, s))
1653 goto got_it;
1654 s += UTF8SKIP(s);
1655 }
1656 }
1657 else {
1658 while (s <= last1) {
1659 if (regtry(prog, s))
1660 goto got_it;
1661 s++;
1662 }
1663 }
1664 }
bf93d4cc
GS
1665 DEBUG_r(did_match ||
1666 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1667 ((must == prog->anchored_substr)
1668 ? "anchored" : "floating"),
1669 PL_colors[0],
1670 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1671 SvPVX(must),
1672 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1673 goto phooey;
1674 }
155aba94 1675 else if ((c = prog->regstclass)) {
66e933ab
GS
1676 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1677 /* don't bother with what can't match */
6eb5f6b9 1678 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1679 DEBUG_r({
1680 SV *prop = sv_newmortal();
1681 regprop(prop, c);
1682 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1683 });
6eb5f6b9
JH
1684 if (find_byclass(prog, c, s, strend, startpos, 0))
1685 goto got_it;
bf93d4cc 1686 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1687 }
1688 else {
1689 dontbother = 0;
1690 if (prog->float_substr != Nullsv) { /* Trim the end. */
1691 char *last;
d6a28714
JH
1692
1693 if (flags & REXEC_SCREAM) {
1694 last = screaminstr(sv, prog->float_substr, s - strbeg,
1695 end_shift, &scream_pos, 1); /* last one */
1696 if (!last)
ffc61ed2 1697 last = scream_olds; /* Only one occurrence. */
b8c5462f 1698 }
d6a28714
JH
1699 else {
1700 STRLEN len;
1701 char *little = SvPV(prog->float_substr, len);
1702
1703 if (SvTAIL(prog->float_substr)) {
1704 if (memEQ(strend - len + 1, little, len - 1))
1705 last = strend - len + 1;
1706 else if (!PL_multiline)
9041c2e3 1707 last = memEQ(strend - len, little, len)
d6a28714 1708 ? strend - len : Nullch;
b8c5462f 1709 else
d6a28714
JH
1710 goto find_last;
1711 } else {
1712 find_last:
9041c2e3 1713 if (len)
d6a28714 1714 last = rninstr(s, strend, little, little + len);
b8c5462f 1715 else
d6a28714 1716 last = strend; /* matching `$' */
b8c5462f 1717 }
b8c5462f 1718 }
bf93d4cc
GS
1719 if (last == NULL) {
1720 DEBUG_r(PerlIO_printf(Perl_debug_log,
1721 "%sCan't trim the tail, match fails (should not happen)%s\n",
1722 PL_colors[4],PL_colors[5]));
1723 goto phooey; /* Should not happen! */
1724 }
d6a28714
JH
1725 dontbother = strend - last + prog->float_min_offset;
1726 }
1727 if (minlen && (dontbother < minlen))
1728 dontbother = minlen - 1;
1729 strend -= dontbother; /* this one's always in bytes! */
1730 /* We don't know much -- general case. */
1aa99e6b 1731 if (do_utf8) {
d6a28714
JH
1732 for (;;) {
1733 if (regtry(prog, s))
1734 goto got_it;
1735 if (s >= strend)
1736 break;
b8c5462f 1737 s += UTF8SKIP(s);
d6a28714
JH
1738 };
1739 }
1740 else {
1741 do {
1742 if (regtry(prog, s))
1743 goto got_it;
1744 } while (s++ < strend);
1745 }
1746 }
1747
1748 /* Failure. */
1749 goto phooey;
1750
1751got_it:
1752 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1753
1754 if (PL_reg_eval_set) {
1755 /* Preserve the current value of $^R */
1756 if (oreplsv != GvSV(PL_replgv))
1757 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1758 restored, the value remains
1759 the same. */
1760 restore_pos(aTHXo_ 0);
1761 }
1762
1763 /* make sure $`, $&, $', and $digit will work later */
1764 if ( !(flags & REXEC_NOT_FIRST) ) {
1765 if (RX_MATCH_COPIED(prog)) {
1766 Safefree(prog->subbeg);
1767 RX_MATCH_COPIED_off(prog);
1768 }
1769 if (flags & REXEC_COPY_STR) {
1770 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1771
1772 s = savepvn(strbeg, i);
1773 prog->subbeg = s;
1774 prog->sublen = i;
1775 RX_MATCH_COPIED_on(prog);
1776 }
1777 else {
1778 prog->subbeg = strbeg;
1779 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1780 }
1781 }
9041c2e3 1782
d6a28714
JH
1783 return 1;
1784
1785phooey:
bf93d4cc
GS
1786 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1787 PL_colors[4],PL_colors[5]));
d6a28714
JH
1788 if (PL_reg_eval_set)
1789 restore_pos(aTHXo_ 0);
1790 return 0;
1791}
1792
1793/*
1794 - regtry - try match at specific point
1795 */
1796STATIC I32 /* 0 failure, 1 success */
1797S_regtry(pTHX_ regexp *prog, char *startpos)
1798{
d6a28714
JH
1799 register I32 i;
1800 register I32 *sp;
1801 register I32 *ep;
1802 CHECKPOINT lastcp;
1803
02db2b7b
IZ
1804#ifdef DEBUGGING
1805 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1806#endif
d6a28714
JH
1807 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1808 MAGIC *mg;
1809
1810 PL_reg_eval_set = RS_init;
1811 DEBUG_r(DEBUG_s(
b900a521
JH
1812 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1813 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1814 ));
e8347627 1815 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1816 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1817 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1818 SAVETMPS;
1819 /* Apparently this is not needed, judging by wantarray. */
e8347627 1820 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1821 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1822
1823 if (PL_reg_sv) {
1824 /* Make $_ available to executed code. */
1825 if (PL_reg_sv != DEFSV) {
1826 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1827 SAVESPTR(DEFSV);
1828 DEFSV = PL_reg_sv;
b8c5462f 1829 }
d6a28714 1830
9041c2e3 1831 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1832 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1833 /* prepare for quick setting of pos */
14befaf4
DM
1834 sv_magic(PL_reg_sv, (SV*)0,
1835 PERL_MAGIC_regex_global, Nullch, 0);
1836 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1837 mg->mg_len = -1;
b8c5462f 1838 }
d6a28714
JH
1839 PL_reg_magic = mg;
1840 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1841 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1842 }
1843 if (!PL_reg_curpm)
0f79a09d 1844 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1845 PL_reg_curpm->op_pmregexp = prog;
1846 PL_reg_oldcurpm = PL_curpm;
1847 PL_curpm = PL_reg_curpm;
1848 if (RX_MATCH_COPIED(prog)) {
1849 /* Here is a serious problem: we cannot rewrite subbeg,
1850 since it may be needed if this match fails. Thus
1851 $` inside (?{}) could fail... */
1852 PL_reg_oldsaved = prog->subbeg;
1853 PL_reg_oldsavedlen = prog->sublen;
1854 RX_MATCH_COPIED_off(prog);
1855 }
1856 else
1857 PL_reg_oldsaved = Nullch;
1858 prog->subbeg = PL_bostr;
1859 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1860 }
1861 prog->startp[0] = startpos - PL_bostr;
1862 PL_reginput = startpos;
1863 PL_regstartp = prog->startp;
1864 PL_regendp = prog->endp;
1865 PL_reglastparen = &prog->lastparen;
1866 prog->lastparen = 0;
1867 PL_regsize = 0;
1868 DEBUG_r(PL_reg_starttry = startpos);
1869 if (PL_reg_start_tmpl <= prog->nparens) {
1870 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1871 if(PL_reg_start_tmp)
1872 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1873 else
1874 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1875 }
1876
1877 /* XXXX What this code is doing here?!!! There should be no need
1878 to do this again and again, PL_reglastparen should take care of
3dd2943c 1879 this! --ilya*/
dafc8851
JH
1880
1881 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1882 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1883 * PL_reglastparen), is not needed at all by the test suite
1884 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1885 * enough, for building DynaLoader, or otherwise this
1886 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1887 * will happen. Meanwhile, this code *is* needed for the
1888 * above-mentioned test suite tests to succeed. The common theme
1889 * on those tests seems to be returning null fields from matches.
1890 * --jhi */
dafc8851 1891#if 1
d6a28714
JH
1892 sp = prog->startp;
1893 ep = prog->endp;
1894 if (prog->nparens) {
09e8ae3b 1895 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1896 *++sp = -1;
1897 *++ep = -1;
1898 }
1899 }
dafc8851 1900#endif
02db2b7b 1901 REGCP_SET(lastcp);
d6a28714
JH
1902 if (regmatch(prog->program + 1)) {
1903 prog->endp[0] = PL_reginput - PL_bostr;
1904 return 1;
1905 }
02db2b7b 1906 REGCP_UNWIND(lastcp);
d6a28714
JH
1907 return 0;
1908}
1909
02db2b7b
IZ
1910#define RE_UNWIND_BRANCH 1
1911#define RE_UNWIND_BRANCHJ 2
1912
1913union re_unwind_t;
1914
1915typedef struct { /* XX: makes sense to enlarge it... */
1916 I32 type;
1917 I32 prev;
1918 CHECKPOINT lastcp;
1919} re_unwind_generic_t;
1920
1921typedef struct {
1922 I32 type;
1923 I32 prev;
1924 CHECKPOINT lastcp;
1925 I32 lastparen;
1926 regnode *next;
1927 char *locinput;
1928 I32 nextchr;
1929#ifdef DEBUGGING
1930 int regindent;
1931#endif
1932} re_unwind_branch_t;
1933
1934typedef union re_unwind_t {
1935 I32 type;
1936 re_unwind_generic_t generic;
1937 re_unwind_branch_t branch;
1938} re_unwind_t;
1939
d6a28714
JH
1940/*
1941 - regmatch - main matching routine
1942 *
1943 * Conceptually the strategy is simple: check to see whether the current
1944 * node matches, call self recursively to see whether the rest matches,
1945 * and then act accordingly. In practice we make some effort to avoid
1946 * recursion, in particular by going through "ordinary" nodes (that don't
1947 * need to know whether the rest of the match failed) by a loop instead of
1948 * by recursion.
1949 */
1950/* [lwall] I've hoisted the register declarations to the outer block in order to
1951 * maybe save a little bit of pushing and popping on the stack. It also takes
1952 * advantage of machines that use a register save mask on subroutine entry.
1953 */
1954STATIC I32 /* 0 failure, 1 success */
1955S_regmatch(pTHX_ regnode *prog)
1956{
d6a28714
JH
1957 register regnode *scan; /* Current node. */
1958 regnode *next; /* Next node. */
1959 regnode *inner; /* Next node in internal branch. */
1960 register I32 nextchr; /* renamed nextchr - nextchar colides with
1961 function of same name */
1962 register I32 n; /* no or next */
1963 register I32 ln; /* len or last */
1964 register char *s; /* operand or save */
1965 register char *locinput = PL_reginput;
1966 register I32 c1, c2, paren; /* case fold search, parenth */
1967 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1968 I32 unwind = 0;
1969 I32 firstcp = PL_savestack_ix;
ffc61ed2 1970 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1971
d6a28714
JH
1972#ifdef DEBUGGING
1973 PL_regindent++;
1974#endif
1975
1976 /* Note that nextchr is a byte even in UTF */
1977 nextchr = UCHARAT(locinput);
1978 scan = prog;
1979 while (scan != NULL) {
1980#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1981#if 1
d6a28714
JH
1982# define sayYES goto yes
1983# define sayNO goto no
7821416a
IZ
1984# define sayYES_FINAL goto yes_final
1985# define sayYES_LOUD goto yes_loud
1986# define sayNO_FINAL goto no_final
1987# define sayNO_SILENT goto do_no
d6a28714
JH
1988# define saySAME(x) if (x) goto yes; else goto no
1989# define REPORT_CODE_OFF 24
1990#else
1991# define sayYES return 1
1992# define sayNO return 0
7821416a
IZ
1993# define sayYES_FINAL return 1
1994# define sayYES_LOUD return 1
1995# define sayNO_FINAL return 0
1996# define sayNO_SILENT return 0
d6a28714
JH
1997# define saySAME(x) return x
1998#endif
1999 DEBUG_r( {
2000 SV *prop = sv_newmortal();
2001 int docolor = *PL_colors[0];
2002 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2003 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2004 /* The part of the string before starttry has one color
2005 (pref0_len chars), between starttry and current
2006 position another one (pref_len - pref0_len chars),
2007 after the current position the third one.
2008 We assume that pref0_len <= pref_len, otherwise we
2009 decrease pref0_len. */
9041c2e3 2010 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2011 ? (5 + taill) - l : locinput - PL_bostr;
2012 int pref0_len;
d6a28714 2013
1aa99e6b
IH
2014 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2015 pref_len++;
2016 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2017 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2018 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2019 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2020 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2021 l--;
d6a28714
JH
2022 if (pref0_len < 0)
2023 pref0_len = 0;
2024 if (pref0_len > pref_len)
2025 pref0_len = pref_len;
2026 regprop(prop, scan);
9041c2e3 2027 PerlIO_printf(Perl_debug_log,
b900a521 2028 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2029 (IV)(locinput - PL_bostr),
2030 PL_colors[4], pref0_len,
d6a28714 2031 locinput - pref_len, PL_colors[5],
9041c2e3 2032 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2033 locinput - pref_len + pref0_len, PL_colors[3],
2034 (docolor ? "" : "> <"),
2035 PL_colors[0], l, locinput, PL_colors[1],
2036 15 - l - pref_len + 1,
2037 "",
b900a521 2038 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2039 SvPVX(prop));
2040 } );
2041
2042 next = scan + NEXT_OFF(scan);
2043 if (next == scan)
2044 next = NULL;
2045
2046 switch (OP(scan)) {
2047 case BOL:
12d33761
HS
2048 if (locinput == PL_bostr || (PL_multiline &&
2049 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2050 {
2051 /* regtill = regbol; */
b8c5462f
JH
2052 break;
2053 }
d6a28714
JH
2054 sayNO;
2055 case MBOL:
12d33761
HS
2056 if (locinput == PL_bostr ||
2057 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2058 {
b8c5462f
JH
2059 break;
2060 }
d6a28714
JH
2061 sayNO;
2062 case SBOL:
c2a73568 2063 if (locinput == PL_bostr)
b8c5462f 2064 break;
d6a28714
JH
2065 sayNO;
2066 case GPOS:
2067 if (locinput == PL_reg_ganch)
2068 break;
2069 sayNO;
2070 case EOL:
2071 if (PL_multiline)
2072 goto meol;
2073 else
2074 goto seol;
2075 case MEOL:
2076 meol:
2077 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2078 sayNO;
b8c5462f 2079 break;
d6a28714
JH
2080 case SEOL:
2081 seol:
2082 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2083 sayNO;
d6a28714 2084 if (PL_regeol - locinput > 1)
b8c5462f 2085 sayNO;
b8c5462f 2086 break;
d6a28714
JH
2087 case EOS:
2088 if (PL_regeol != locinput)
b8c5462f 2089 sayNO;
d6a28714 2090 break;
ffc61ed2 2091 case SANY:
d6a28714 2092 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2093 sayNO;
b8c5462f 2094 nextchr = UCHARAT(++locinput);
a0d0e21e 2095 break;
ffc61ed2 2096 case REG_ANY:
1aa99e6b
IH
2097 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2098 sayNO;
2099 if (do_utf8) {
b8c5462f 2100 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2101 if (locinput > PL_regeol)
2102 sayNO;
a0ed51b3 2103 nextchr = UCHARAT(locinput);
a0ed51b3 2104 }
1aa99e6b
IH
2105 else
2106 nextchr = UCHARAT(++locinput);
a0ed51b3 2107 break;
d6a28714 2108 case EXACT:
cd439c50
IZ
2109 s = STRING(scan);
2110 ln = STR_LEN(scan);
1aa99e6b
IH
2111 if (do_utf8 != (UTF!=0)) {
2112 char *l = locinput;
2113 char *e = s + ln;
2114 STRLEN len;
2115 if (do_utf8)
2116 while (s < e) {
2117 if (l >= PL_regeol)
2118 sayNO;
9041c2e3 2119 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2120 sayNO;
2121 s++;
2122 l += len;
2123 }
2124 else
2125 while (s < e) {
2126 if (l >= PL_regeol)
2127 sayNO;
9041c2e3 2128 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2129 sayNO;
2130 s += len;
2131 l++;
2132 }
2133 locinput = l;
2134 nextchr = UCHARAT(locinput);
2135 break;
2136 }
d6a28714
JH
2137 /* Inline the first character, for speed. */
2138 if (UCHARAT(s) != nextchr)
2139 sayNO;
2140 if (PL_regeol - locinput < ln)
2141 sayNO;
2142 if (ln > 1 && memNE(s, locinput, ln))
2143 sayNO;
2144 locinput += ln;
2145 nextchr = UCHARAT(locinput);
2146 break;
2147 case EXACTFL:
b8c5462f
JH
2148 PL_reg_flags |= RF_tainted;
2149 /* FALL THROUGH */
d6a28714 2150 case EXACTF:
cd439c50
IZ
2151 s = STRING(scan);
2152 ln = STR_LEN(scan);
d6a28714 2153
1aa99e6b 2154 if (do_utf8) {
d6a28714 2155 char *l = locinput;
1aa99e6b
IH
2156 char *e;
2157 e = s + ln;
d6a28714
JH
2158 c1 = OP(scan) == EXACTF;
2159 while (s < e) {
1aa99e6b 2160 if (l >= PL_regeol) {
d6a28714
JH
2161 sayNO;
2162 }
2b9d42f0 2163 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2164 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2165 sayNO;
2166 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2167 l += UTF8SKIP(l);
b8c5462f 2168 }
d6a28714 2169 locinput = l;
a0ed51b3
LW
2170 nextchr = UCHARAT(locinput);
2171 break;
2172 }
d6a28714
JH
2173
2174 /* Inline the first character, for speed. */
2175 if (UCHARAT(s) != nextchr &&
2176 UCHARAT(s) != ((OP(scan) == EXACTF)
2177 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2178 sayNO;
d6a28714 2179 if (PL_regeol - locinput < ln)
b8c5462f 2180 sayNO;
d6a28714
JH
2181 if (ln > 1 && (OP(scan) == EXACTF
2182 ? ibcmp(s, locinput, ln)
2183 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2184 sayNO;
d6a28714
JH
2185 locinput += ln;
2186 nextchr = UCHARAT(locinput);
a0d0e21e 2187 break;
d6a28714 2188 case ANYOF:
ffc61ed2
JH
2189 if (do_utf8) {
2190 if (!reginclass(scan, (U8*)locinput, do_utf8))
2191 sayNO;
2192 if (locinput >= PL_regeol)
2193 sayNO;
2194 locinput += PL_utf8skip[nextchr];
b8c5462f 2195 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2196 }
2197 else {
2198 if (nextchr < 0)
2199 nextchr = UCHARAT(locinput);
2200 if (!reginclass(scan, (U8*)locinput, do_utf8))
2201 sayNO;
2202 if (!nextchr && locinput >= PL_regeol)
2203 sayNO;
2204 nextchr = UCHARAT(++locinput);
2205 }
b8c5462f 2206 break;
d6a28714 2207 case ALNUML:
b8c5462f
JH
2208 PL_reg_flags |= RF_tainted;
2209 /* FALL THROUGH */
d6a28714 2210 case ALNUM:
b8c5462f 2211 if (!nextchr)
4633a7c4 2212 sayNO;
ffc61ed2 2213 if (do_utf8) {
ad24be35 2214 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2215 if (!(OP(scan) == ALNUM
3568d838 2216 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2217 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2218 {
2219 sayNO;
a0ed51b3 2220 }
b8c5462f 2221 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2222 nextchr = UCHARAT(locinput);
2223 break;
2224 }
ffc61ed2 2225 if (!(OP(scan) == ALNUM
d6a28714 2226 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2227 sayNO;
b8c5462f 2228 nextchr = UCHARAT(++locinput);
a0d0e21e 2229 break;
d6a28714 2230 case NALNUML:
b8c5462f
JH
2231 PL_reg_flags |= RF_tainted;
2232 /* FALL THROUGH */
d6a28714
JH
2233 case NALNUM:
2234 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2235 sayNO;
ffc61ed2 2236 if (do_utf8) {
8269fa76 2237 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2238 if (OP(scan) == NALNUM
3568d838 2239 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2240 : isALNUM_LC_utf8((U8*)locinput))
2241 {
b8c5462f 2242 sayNO;
d6a28714 2243 }
b8c5462f
JH
2244 locinput += PL_utf8skip[nextchr];
2245 nextchr = UCHARAT(locinput);
2246 break;
2247 }
ffc61ed2 2248 if (OP(scan) == NALNUM
d6a28714 2249 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2250 sayNO;
76e3520e 2251 nextchr = UCHARAT(++locinput);
a0d0e21e 2252 break;
d6a28714
JH
2253 case BOUNDL:
2254 case NBOUNDL:
3280af22 2255 PL_reg_flags |= RF_tainted;
bbce6d69 2256 /* FALL THROUGH */
d6a28714
JH
2257 case BOUND:
2258 case NBOUND:
2259 /* was last char in word? */
ffc61ed2 2260 if (do_utf8) {
12d33761
HS
2261 if (locinput == PL_bostr)
2262 ln = '\n';
ffc61ed2
JH
2263 else {
2264 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2265
2b9d42f0 2266 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2267 }
2268 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2269 ln = isALNUM_uni(ln);
8269fa76 2270 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2271 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2272 }
2273 else {
9041c2e3 2274 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2275 n = isALNUM_LC_utf8((U8*)locinput);
2276 }
a0ed51b3 2277 }
d6a28714 2278 else {
12d33761
HS
2279 ln = (locinput != PL_bostr) ?
2280 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2281 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2282 ln = isALNUM(ln);
2283 n = isALNUM(nextchr);
2284 }
2285 else {
2286 ln = isALNUM_LC(ln);
2287 n = isALNUM_LC(nextchr);
2288 }
d6a28714 2289 }
ffc61ed2
JH
2290 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2291 OP(scan) == BOUNDL))
2292 sayNO;
a0ed51b3 2293 break;
d6a28714 2294 case SPACEL:
3280af22 2295 PL_reg_flags |= RF_tainted;
bbce6d69 2296 /* FALL THROUGH */
d6a28714 2297 case SPACE:
9442cb0e 2298 if (!nextchr)
4633a7c4 2299 sayNO;
1aa99e6b 2300 if (do_utf8) {
fd400ab9 2301 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2302 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2303 if (!(OP(scan) == SPACE
3568d838 2304 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2305 : isSPACE_LC_utf8((U8*)locinput)))
2306 {
2307 sayNO;
2308 }
2309 locinput += PL_utf8skip[nextchr];
2310 nextchr = UCHARAT(locinput);
2311 break;
d6a28714 2312 }
ffc61ed2
JH
2313 if (!(OP(scan) == SPACE
2314 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2315 sayNO;
2316 nextchr = UCHARAT(++locinput);
2317 }
2318 else {
2319 if (!(OP(scan) == SPACE
2320 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2321 sayNO;
2322 nextchr = UCHARAT(++locinput);
a0ed51b3 2323 }
a0ed51b3 2324 break;
d6a28714 2325 case NSPACEL:
3280af22 2326 PL_reg_flags |= RF_tainted;
bbce6d69 2327 /* FALL THROUGH */
d6a28714 2328 case NSPACE:
9442cb0e 2329 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2330 sayNO;
1aa99e6b 2331 if (do_utf8) {
8269fa76 2332 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2333 if (OP(scan) == NSPACE
3568d838 2334 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2335 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2336 {
2337 sayNO;
2338 }
2339 locinput += PL_utf8skip[nextchr];
2340 nextchr = UCHARAT(locinput);
2341 break;
a0ed51b3 2342 }
ffc61ed2 2343 if (OP(scan) == NSPACE
d6a28714 2344 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2345 sayNO;
76e3520e 2346 nextchr = UCHARAT(++locinput);
a0d0e21e 2347 break;
d6a28714 2348 case DIGITL:
a0ed51b3
LW
2349 PL_reg_flags |= RF_tainted;
2350 /* FALL THROUGH */
d6a28714 2351 case DIGIT:
9442cb0e 2352 if (!nextchr)
a0ed51b3 2353 sayNO;
1aa99e6b 2354 if (do_utf8) {
8269fa76 2355 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2356 if (!(OP(scan) == DIGIT
3568d838 2357 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2358 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2359 {
a0ed51b3 2360 sayNO;
dfe13c55 2361 }
6f06b55f 2362 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2363 nextchr = UCHARAT(locinput);
2364 break;
2365 }
ffc61ed2 2366 if (!(OP(scan) == DIGIT
9442cb0e 2367 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2368 sayNO;
2369 nextchr = UCHARAT(++locinput);
2370 break;
d6a28714 2371 case NDIGITL:
b8c5462f
JH
2372 PL_reg_flags |= RF_tainted;
2373 /* FALL THROUGH */
d6a28714 2374 case NDIGIT:
9442cb0e 2375 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2376 sayNO;
1aa99e6b 2377 if (do_utf8) {
8269fa76 2378 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2379 if (OP(scan) == NDIGIT
3568d838 2380 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2381 : isDIGIT_LC_utf8((U8*)locinput))
2382 {
a0ed51b3 2383 sayNO;
9442cb0e 2384 }
6f06b55f 2385 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2386 nextchr = UCHARAT(locinput);
2387 break;
2388 }
ffc61ed2 2389 if (OP(scan) == NDIGIT
9442cb0e 2390 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2391 sayNO;
2392 nextchr = UCHARAT(++locinput);
2393 break;
2394 case CLUMP:
8269fa76 2395 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2396 if (locinput >= PL_regeol ||
2397 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2398 sayNO;
6f06b55f 2399 locinput += PL_utf8skip[nextchr];
3568d838
JH
2400 while (locinput < PL_regeol &&
2401 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2402 locinput += UTF8SKIP(locinput);
2403 if (locinput > PL_regeol)
2404 sayNO;
2405 nextchr = UCHARAT(locinput);
2406 break;
c8756f30 2407 case REFFL:
3280af22 2408 PL_reg_flags |= RF_tainted;
c8756f30 2409 /* FALL THROUGH */
c277df42 2410 case REF:
c8756f30 2411 case REFF:
c277df42 2412 n = ARG(scan); /* which paren pair */
cf93c79d 2413 ln = PL_regstartp[n];
2c2d71f5 2414 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2415 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2416 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2417 if (ln == PL_regendp[n])
a0d0e21e 2418 break;
a0ed51b3 2419
cf93c79d 2420 s = PL_bostr + ln;
1aa99e6b 2421 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2422 char *l = locinput;
cf93c79d 2423 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2424 /*
2425 * Note that we can't do the "other character" lookup trick as
2426 * in the 8-bit case (no pun intended) because in Unicode we
2427 * have to map both upper and title case to lower case.
2428 */
2429 if (OP(scan) == REFF) {
2430 while (s < e) {
2431 if (l >= PL_regeol)
2432 sayNO;
dfe13c55 2433 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2434 sayNO;
2435 s += UTF8SKIP(s);
2436 l += UTF8SKIP(l);
2437 }
2438 }
2439 else {
2440 while (s < e) {
2441 if (l >= PL_regeol)
2442 sayNO;
dfe13c55 2443 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2444 sayNO;
2445 s += UTF8SKIP(s);
2446 l += UTF8SKIP(l);
2447 }
2448 }
2449 locinput = l;
2450 nextchr = UCHARAT(locinput);
2451 break;
2452 }
2453
a0d0e21e 2454 /* Inline the first character, for speed. */
76e3520e 2455 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2456 (OP(scan) == REF ||
2457 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2458 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2459 sayNO;
cf93c79d 2460 ln = PL_regendp[n] - ln;
3280af22 2461 if (locinput + ln > PL_regeol)
4633a7c4 2462 sayNO;
c8756f30
AK
2463 if (ln > 1 && (OP(scan) == REF
2464 ? memNE(s, locinput, ln)
2465 : (OP(scan) == REFF
2466 ? ibcmp(s, locinput, ln)
2467 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2468 sayNO;
a0d0e21e 2469 locinput += ln;
76e3520e 2470 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2471 break;
2472
2473 case NOTHING:
c277df42 2474 case TAIL:
a0d0e21e
LW
2475 break;
2476 case BACK:
2477 break;
c277df42
IZ
2478 case EVAL:
2479 {
2480 dSP;
533c011a 2481 OP_4tree *oop = PL_op;
3280af22
NIS
2482 COP *ocurcop = PL_curcop;
2483 SV **ocurpad = PL_curpad;
c277df42 2484 SV *ret;
9041c2e3 2485
c277df42 2486 n = ARG(scan);
533c011a 2487 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2488 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2489 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2490 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2491
cea2e8a9 2492 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2493 SPAGAIN;
2494 ret = POPs;
2495 PUTBACK;
9041c2e3 2496
0f5d15d6
IZ
2497 PL_op = oop;
2498 PL_curpad = ocurpad;
2499 PL_curcop = ocurcop;
c277df42 2500 if (logical) {
0f5d15d6
IZ
2501 if (logical == 2) { /* Postponed subexpression. */
2502 regexp *re;
22c35a8c 2503 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2504 re_cc_state state;
0f5d15d6
IZ
2505 CHECKPOINT cp, lastcp;
2506
2507 if(SvROK(ret) || SvRMAGICAL(ret)) {
2508 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2509
2510 if(SvMAGICAL(sv))
14befaf4 2511 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2512 }
2513 if (mg) {
2514 re = (regexp *)mg->mg_obj;
df0003d4 2515 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2516 }
2517 else {
2518 STRLEN len;
2519 char *t = SvPV(ret, len);
2520 PMOP pm;
2521 char *oprecomp = PL_regprecomp;
2522 I32 osize = PL_regsize;
2523 I32 onpar = PL_regnpar;
2524
5fcd1c1b 2525 Zero(&pm, 1, PMOP);
cea2e8a9 2526 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2527 if (!(SvFLAGS(ret)
0f5d15d6 2528 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2529 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2530 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2531 PL_regprecomp = oprecomp;
2532 PL_regsize = osize;
2533 PL_regnpar = onpar;
2534 }
2535 DEBUG_r(
9041c2e3 2536 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2537 "Entering embedded `%s%.60s%s%s'\n",
2538 PL_colors[0],
2539 re->precomp,
2540 PL_colors[1],
2541 (strlen(re->precomp) > 60 ? "..." : ""))
2542 );
2543 state.node = next;
2544 state.prev = PL_reg_call_cc;
2545 state.cc = PL_regcc;
2546 state.re = PL_reg_re;
2547
2ab05381 2548 PL_regcc = 0;
9041c2e3 2549
0f5d15d6 2550 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2551 REGCP_SET(lastcp);
0f5d15d6
IZ
2552 cache_re(re);
2553 state.ss = PL_savestack_ix;
2554 *PL_reglastparen = 0;
2555 PL_reg_call_cc = &state;
2556 PL_reginput = locinput;
2c2d71f5
JH
2557
2558 /* XXXX This is too dramatic a measure... */
2559 PL_reg_maxiter = 0;
2560
0f5d15d6 2561 if (regmatch(re->program + 1)) {
2c914db6
IZ
2562 /* Even though we succeeded, we need to restore
2563 global variables, since we may be wrapped inside
2564 SUSPEND, thus the match may be not finished yet. */
2565
2566 /* XXXX Do this only if SUSPENDed? */
2567 PL_reg_call_cc = state.prev;
2568 PL_regcc = state.cc;
2569 PL_reg_re = state.re;
2570 cache_re(PL_reg_re);
2571
2572 /* XXXX This is too dramatic a measure... */
2573 PL_reg_maxiter = 0;
2574
2575 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2576 ReREFCNT_dec(re);
2577 regcpblow(cp);
2578 sayYES;
2579 }
0f5d15d6 2580 ReREFCNT_dec(re);
02db2b7b 2581 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2582 regcppop();
2583 PL_reg_call_cc = state.prev;
2584 PL_regcc = state.cc;
2585 PL_reg_re = state.re;
d3790889 2586 cache_re(PL_reg_re);
2c2d71f5
JH
2587
2588 /* XXXX This is too dramatic a measure... */
2589 PL_reg_maxiter = 0;
2590
0f5d15d6
IZ
2591 sayNO;
2592 }
c277df42 2593 sw = SvTRUE(ret);
0f5d15d6 2594 logical = 0;
a0ed51b3
LW
2595 }
2596 else
3280af22 2597 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2598 break;
2599 }
a0d0e21e 2600 case OPEN:
c277df42 2601 n = ARG(scan); /* which paren pair */
3280af22
NIS
2602 PL_reg_start_tmp[n] = locinput;
2603 if (n > PL_regsize)
2604 PL_regsize = n;
a0d0e21e
LW
2605 break;
2606 case CLOSE:
c277df42 2607 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2608 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2609 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2610 if (n > *PL_reglastparen)
2611 *PL_reglastparen = n;
a0d0e21e 2612 break;
c277df42
IZ
2613 case GROUPP:
2614 n = ARG(scan); /* which paren pair */
cf93c79d 2615 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2616 break;
2617 case IFTHEN:
2c2d71f5 2618 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2619 if (sw)
2620 next = NEXTOPER(NEXTOPER(scan));
2621 else {
2622 next = scan + ARG(scan);
2623 if (OP(next) == IFTHEN) /* Fake one. */
2624 next = NEXTOPER(NEXTOPER(next));
2625 }
2626 break;
2627 case LOGICAL:
0f5d15d6 2628 logical = scan->flags;
c277df42 2629 break;
2ab05381
IZ
2630/*******************************************************************
2631 PL_regcc contains infoblock about the innermost (...)* loop, and
2632 a pointer to the next outer infoblock.
2633
2634 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2635
2636 1) After matching X, regnode for CURLYX is processed;
2637
9041c2e3 2638 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2639 regmatch() recursively with the starting point at WHILEM node;
2640
2641 3) Each hit of WHILEM node tries to match A and Z (in the order
2642 depending on the current iteration, min/max of {min,max} and
2643 greediness). The information about where are nodes for "A"
2644 and "Z" is read from the infoblock, as is info on how many times "A"
2645 was already matched, and greediness.
2646
2647 4) After A matches, the same WHILEM node is hit again.
2648
2649 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2650 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2651 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2652 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2653 of the external loop.
2654
2655 Currently present infoblocks form a tree with a stem formed by PL_curcc
2656 and whatever it mentions via ->next, and additional attached trees
2657 corresponding to temporarily unset infoblocks as in "5" above.
2658
9041c2e3 2659 In the following picture infoblocks for outer loop of
2ab05381
IZ
2660 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2661 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2662 infoblocks are drawn below the "reset" infoblock.
2663
2664 In fact in the picture below we do not show failed matches for Z and T
2665 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2666 more obvious *why* one needs to *temporary* unset infoblocks.]
2667
2668 Matched REx position InfoBlocks Comment
2669 (Y(A)*?Z)*?T x
2670 Y(A)*?Z)*?T x <- O
2671 Y (A)*?Z)*?T x <- O
2672 Y A)*?Z)*?T x <- O <- I
2673 YA )*?Z)*?T x <- O <- I
2674 YA A)*?Z)*?T x <- O <- I
2675 YAA )*?Z)*?T x <- O <- I
2676 YAA Z)*?T x <- O # Temporary unset I
2677 I
2678
2679 YAAZ Y(A)*?Z)*?T x <- O
2680 I
2681
2682 YAAZY (A)*?Z)*?T x <- O
2683 I
2684
2685 YAAZY A)*?Z)*?T x <- O <- I
2686 I
2687
2688 YAAZYA )*?Z)*?T x <- O <- I
2689 I
2690
2691 YAAZYA Z)*?T x <- O # Temporary unset I
2692 I,I
2693
2694 YAAZYAZ )*?T x <- O
2695 I,I
2696
2697 YAAZYAZ T x # Temporary unset O
2698 O
2699 I,I
2700
2701 YAAZYAZT x
2702 O
2703 I,I
2704 *******************************************************************/
a0d0e21e
LW
2705 case CURLYX: {
2706 CURCUR cc;
3280af22 2707 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2708 /* No need to save/restore up to this paren */
2709 I32 parenfloor = scan->flags;
c277df42
IZ
2710
2711 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2712 next += ARG(next);
3280af22
NIS
2713 cc.oldcc = PL_regcc;
2714 PL_regcc = &cc;
cb434fcc
IZ
2715 /* XXXX Probably it is better to teach regpush to support
2716 parenfloor > PL_regsize... */
2717 if (parenfloor > *PL_reglastparen)
2718 parenfloor = *PL_reglastparen; /* Pessimization... */
2719 cc.parenfloor = parenfloor;
a0d0e21e
LW
2720 cc.cur = -1;
2721 cc.min = ARG1(scan);
2722 cc.max = ARG2(scan);
c277df42 2723 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2724 cc.next = next;
2725 cc.minmod = minmod;
2726 cc.lastloc = 0;
3280af22 2727 PL_reginput = locinput;
a0d0e21e
LW
2728 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2729 regcpblow(cp);
3280af22 2730 PL_regcc = cc.oldcc;
4633a7c4 2731 saySAME(n);
a0d0e21e
LW
2732 }
2733 /* NOT REACHED */
2734 case WHILEM: {
2735 /*
2736 * This is really hard to understand, because after we match
2737 * what we're trying to match, we must make sure the rest of
2c2d71f5 2738 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2739 * to go back UP the parse tree by recursing ever deeper. And
2740 * if it fails, we have to reset our parent's current state
2741 * that we can try again after backing off.
2742 */
2743
c277df42 2744 CHECKPOINT cp, lastcp;
3280af22 2745 CURCUR* cc = PL_regcc;
c277df42
IZ
2746 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2747
4633a7c4 2748 n = cc->cur + 1; /* how many we know we matched */
3280af22 2749 PL_reginput = locinput;
a0d0e21e 2750
c277df42 2751 DEBUG_r(
9041c2e3
NIS
2752 PerlIO_printf(Perl_debug_log,
2753 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2754 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2755 (long)n, (long)cc->min,
c277df42
IZ
2756 (long)cc->max, (long)cc)
2757 );
4633a7c4 2758
a0d0e21e
LW
2759 /* If degenerate scan matches "", assume scan done. */
2760
579cf2c3 2761 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2762 PL_regcc = cc->oldcc;
2ab05381
IZ
2763 if (PL_regcc)
2764 ln = PL_regcc->cur;
c277df42 2765 DEBUG_r(
c3464db5
DD
2766 PerlIO_printf(Perl_debug_log,
2767 "%*s empty match detected, try continuation...\n",
3280af22 2768 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2769 );
a0d0e21e 2770 if (regmatch(cc->next))
4633a7c4 2771 sayYES;
2ab05381
IZ
2772 if (PL_regcc)
2773 PL_regcc->cur = ln;
3280af22 2774 PL_regcc = cc;
4633a7c4 2775 sayNO;
a0d0e21e
LW
2776 }
2777
2778 /* First just match a string of min scans. */
2779
2780 if (n < cc->min) {
2781 cc->cur = n;
2782 cc->lastloc = locinput;
4633a7c4
LW
2783 if (regmatch(cc->scan))
2784 sayYES;
2785 cc->cur = n - 1;
c277df42 2786 cc->lastloc = lastloc;
4633a7c4 2787 sayNO;
a0d0e21e
LW
2788 }
2789
2c2d71f5
JH
2790 if (scan->flags) {
2791 /* Check whether we already were at this position.
2792 Postpone detection until we know the match is not
2793 *that* much linear. */
2794 if (!PL_reg_maxiter) {
2795 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2796 PL_reg_leftiter = PL_reg_maxiter;
2797 }
2798 if (PL_reg_leftiter-- == 0) {
2799 I32 size = (PL_reg_maxiter + 7)/8;
2800 if (PL_reg_poscache) {
2801 if (PL_reg_poscache_size < size) {
2802 Renew(PL_reg_poscache, size, char);
2803 PL_reg_poscache_size = size;
2804 }
2805 Zero(PL_reg_poscache, size, char);
2806 }
2807 else {
2808 PL_reg_poscache_size = size;
2809 Newz(29, PL_reg_poscache, size, char);
2810 }
2811 DEBUG_r(
2812 PerlIO_printf(Perl_debug_log,
2813 "%sDetected a super-linear match, switching on caching%s...\n",
2814 PL_colors[4], PL_colors[5])
2815 );
2816 }
2817 if (PL_reg_leftiter < 0) {
2818 I32 o = locinput - PL_bostr, b;
2819
2820 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2821 b = o % 8;
2822 o /= 8;
2823 if (PL_reg_poscache[o] & (1<<b)) {
2824 DEBUG_r(
2825 PerlIO_printf(Perl_debug_log,
2826 "%*s already tried at this position...\n",
2827 REPORT_CODE_OFF+PL_regindent*2, "")
2828 );
7821416a 2829 sayNO_SILENT;
2c2d71f5
JH
2830 }
2831 PL_reg_poscache[o] |= (1<<b);
2832 }
2833 }
2834
a0d0e21e
LW
2835 /* Prefer next over scan for minimal matching. */
2836
2837 if (cc->minmod) {
3280af22 2838 PL_regcc = cc->oldcc;
2ab05381
IZ
2839 if (PL_regcc)
2840 ln = PL_regcc->cur;
5f05dabc 2841 cp = regcppush(cc->parenfloor);
02db2b7b 2842 REGCP_SET(lastcp);
5f05dabc 2843 if (regmatch(cc->next)) {
c277df42 2844 regcpblow(cp);
4633a7c4 2845 sayYES; /* All done. */
5f05dabc 2846 }
02db2b7b 2847 REGCP_UNWIND(lastcp);
5f05dabc 2848 regcppop();
2ab05381
IZ
2849 if (PL_regcc)
2850 PL_regcc->cur = ln;
3280af22 2851 PL_regcc = cc;
a0d0e21e 2852
c277df42 2853 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2854 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2855 && !(PL_reg_flags & RF_warned)) {
2856 PL_reg_flags |= RF_warned;
e476b1b5 2857 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2858 "Complex regular subexpression recursion",
2859 REG_INFTY - 1);
c277df42 2860 }
4633a7c4 2861 sayNO;
c277df42 2862 }
a687059c 2863
c277df42 2864 DEBUG_r(
c3464db5
DD
2865 PerlIO_printf(Perl_debug_log,
2866 "%*s trying longer...\n",
3280af22 2867 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2868 );
a0d0e21e 2869 /* Try scanning more and see if it helps. */
3280af22 2870 PL_reginput = locinput;
a0d0e21e
LW
2871 cc->cur = n;
2872 cc->lastloc = locinput;
5f05dabc 2873 cp = regcppush(cc->parenfloor);
02db2b7b 2874 REGCP_SET(lastcp);
5f05dabc 2875 if (regmatch(cc->scan)) {
c277df42 2876 regcpblow(cp);
4633a7c4 2877 sayYES;
5f05dabc 2878 }
02db2b7b 2879 REGCP_UNWIND(lastcp);
5f05dabc 2880 regcppop();
4633a7c4 2881 cc->cur = n - 1;
c277df42 2882 cc->lastloc = lastloc;
4633a7c4 2883 sayNO;
a0d0e21e
LW
2884 }
2885
2886 /* Prefer scan over next for maximal matching. */
2887
2888 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2889 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2890 cc->cur = n;
2891 cc->lastloc = locinput;
02db2b7b 2892 REGCP_SET(lastcp);
5f05dabc 2893 if (regmatch(cc->scan)) {
c277df42 2894 regcpblow(cp);
4633a7c4 2895 sayYES;
5f05dabc 2896 }
02db2b7b 2897 REGCP_UNWIND(lastcp);
a0d0e21e 2898 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2899 PL_reginput = locinput;
c277df42 2900 DEBUG_r(
c3464db5
DD
2901 PerlIO_printf(Perl_debug_log,
2902 "%*s failed, try continuation...\n",
3280af22 2903 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2904 );
2905 }
9041c2e3 2906 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2907 && !(PL_reg_flags & RF_warned)) {
3280af22 2908 PL_reg_flags |= RF_warned;
e476b1b5 2909 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2910 "Complex regular subexpression recursion",
2911 REG_INFTY - 1);
a0d0e21e
LW
2912 }
2913
2914 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2915 PL_regcc = cc->oldcc;
2ab05381
IZ
2916 if (PL_regcc)
2917 ln = PL_regcc->cur;
a0d0e21e 2918 if (regmatch(cc->next))
4633a7c4 2919 sayYES;
2ab05381
IZ
2920 if (PL_regcc)
2921 PL_regcc->cur = ln;
3280af22 2922 PL_regcc = cc;
4633a7c4 2923 cc->cur = n - 1;
c277df42 2924 cc->lastloc = lastloc;
4633a7c4 2925 sayNO;
a0d0e21e
LW
2926 }
2927 /* NOT REACHED */
9041c2e3 2928 case BRANCHJ:
c277df42
IZ
2929 next = scan + ARG(scan);
2930 if (next == scan)
2931 next = NULL;
2932 inner = NEXTOPER(NEXTOPER(scan));
2933 goto do_branch;
9041c2e3 2934 case BRANCH:
c277df42
IZ
2935 inner = NEXTOPER(scan);
2936 do_branch:
2937 {
2938 CHECKPOINT lastcp;
2939 c1 = OP(scan);
2940 if (OP(next) != c1) /* No choice. */
2941 next = inner; /* Avoid recursion. */
a0d0e21e 2942 else {
02db2b7b
IZ
2943 I32 lastparen = *PL_reglastparen;
2944 I32 unwind1;
2945 re_unwind_branch_t *uw;
2946
2947 /* Put unwinding data on stack */
2948 unwind1 = SSNEWt(1,re_unwind_branch_t);
2949 uw = SSPTRt(unwind1,re_unwind_branch_t);
2950 uw->prev = unwind;
2951 unwind = unwind1;
2952 uw->type = ((c1 == BRANCH)
2953 ? RE_UNWIND_BRANCH
2954 : RE_UNWIND_BRANCHJ);
2955 uw->lastparen = lastparen;
2956 uw->next = next;
2957 uw->locinput = locinput;
2958 uw->nextchr = nextchr;
2959#ifdef DEBUGGING
2960 uw->regindent = ++PL_regindent;
2961#endif
c277df42 2962
02db2b7b
IZ
2963 REGCP_SET(uw->lastcp);
2964
2965 /* Now go into the first branch */
2966 next = inner;
a687059c 2967 }
a0d0e21e
LW
2968 }
2969 break;
2970 case MINMOD:
2971 minmod = 1;
2972 break;
c277df42
IZ
2973 case CURLYM:
2974 {
00db4c45 2975 I32 l = 0;
c277df42 2976 CHECKPOINT lastcp;
9041c2e3 2977
c277df42
IZ
2978 /* We suppose that the next guy does not need
2979 backtracking: in particular, it is of constant length,
2980 and has no parenths to influence future backrefs. */
2981 ln = ARG1(scan); /* min to match */
2982 n = ARG2(scan); /* max to match */
c277df42
IZ
2983 paren = scan->flags;
2984 if (paren) {
3280af22
NIS
2985 if (paren > PL_regsize)
2986 PL_regsize = paren;
2987 if (paren > *PL_reglastparen)
2988 *PL_reglastparen = paren;
c277df42 2989 }
dc45a647 2990 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2991 if (paren)
2992 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2993 PL_reginput = locinput;
c277df42
IZ
2994 if (minmod) {
2995 minmod = 0;
2996 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2997 sayNO;
5f4b28b2 2998 if (ln && l == 0 && n >= ln
c277df42
IZ
2999 /* In fact, this is tricky. If paren, then the
3000 fact that we did/didnot match may influence
3001 future execution. */
3002 && !(paren && ln == 0))
3003 ln = n;
3280af22 3004 locinput = PL_reginput;
22c35a8c 3005 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3006 c1 = (U8)*STRING(next);
c277df42 3007 if (OP(next) == EXACTF)
22c35a8c 3008 c2 = PL_fold[c1];
c277df42 3009 else if (OP(next) == EXACTFL)
22c35a8c 3010 c2 = PL_fold_locale[c1];
c277df42
IZ
3011 else
3012 c2 = c1;
a0ed51b3
LW
3013 }
3014 else
c277df42 3015 c1 = c2 = -1000;
02db2b7b 3016 REGCP_SET(lastcp);
5f4b28b2 3017 /* This may be improved if l == 0. */
c277df42
IZ
3018 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3019 /* If it could work, try it. */
3020 if (c1 == -1000 ||
3280af22
NIS
3021 UCHARAT(PL_reginput) == c1 ||
3022 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3023 {
3024 if (paren) {
3025 if (n) {
cf93c79d
IZ
3026 PL_regstartp[paren] =
3027 HOPc(PL_reginput, -l) - PL_bostr;
3028 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3029 }
3030 else
cf93c79d 3031 PL_regendp[paren] = -1;
c277df42
IZ
3032 }
3033 if (regmatch(next))
3034 sayYES;
02db2b7b 3035 REGCP_UNWIND(lastcp);
c277df42
IZ
3036 }
3037 /* Couldn't or didn't -- move forward. */
3280af22 3038 PL_reginput = locinput;
c277df42
IZ
3039 if (regrepeat_hard(scan, 1, &l)) {
3040 ln++;
3280af22 3041 locinput = PL_reginput;
c277df42
IZ
3042 }
3043 else
3044 sayNO;
3045 }
a0ed51b3
LW
3046 }
3047 else {
c277df42
IZ
3048 n = regrepeat_hard(scan, n, &l);
3049 if (n != 0 && l == 0
3050 /* In fact, this is tricky. If paren, then the
3051 fact that we did/didnot match may influence
3052 future execution. */
3053 && !(paren && ln == 0))
3054 ln = n;
3280af22 3055 locinput = PL_reginput;
c277df42 3056 DEBUG_r(
5c0ca799 3057 PerlIO_printf(Perl_debug_log,
faccc32b 3058 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3059 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3060 (IV) n, (IV)l)
c277df42
IZ
3061 );
3062 if (n >= ln) {
22c35a8c 3063 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3064 c1 = (U8)*STRING(next);
c277df42 3065 if (OP(next) == EXACTF)
22c35a8c 3066 c2 = PL_fold[c1];
c277df42 3067 else if (OP(next) == EXACTFL)
22c35a8c 3068 c2 = PL_fold_locale[c1];
c277df42
IZ
3069 else
3070 c2 = c1;
a0ed51b3
LW
3071 }
3072 else
c277df42
IZ
3073 c1 = c2 = -1000;
3074 }
02db2b7b 3075 REGCP_SET(lastcp);
c277df42
IZ
3076 while (n >= ln) {
3077 /* If it could work, try it. */
3078 if (c1 == -1000 ||
3280af22
NIS
3079 UCHARAT(PL_reginput) == c1 ||
3080 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3081 {
3082 DEBUG_r(
c3464db5 3083 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3084 "%*s trying tail with n=%"IVdf"...\n",
3085 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3086 );
3087 if (paren) {
3088 if (n) {
cf93c79d
IZ
3089 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3090 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3091 }
a0ed51b3 3092 else
cf93c79d 3093 PL_regendp[paren] = -1;
c277df42 3094 }
a0ed51b3
LW
3095 if (regmatch(next))
3096 sayYES;
02db2b7b 3097 REGCP_UNWIND(lastcp);
a0ed51b3 3098 }
c277df42
IZ
3099 /* Couldn't or didn't -- back up. */
3100 n--;
dfe13c55 3101 locinput = HOPc(locinput, -l);
3280af22 3102 PL_reginput = locinput;
c277df42
IZ
3103 }
3104 }
3105 sayNO;
3106 break;
3107 }
3108 case CURLYN:
3109 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3110 if (paren > PL_regsize)
3111 PL_regsize = paren;
3112 if (paren > *PL_reglastparen)
3113 *PL_reglastparen = paren;
c277df42
IZ
3114 ln = ARG1(scan); /* min to match */
3115 n = ARG2(scan); /* max to match */
dc45a647 3116 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3117 goto repeat;
a0d0e21e 3118 case CURLY:
c277df42 3119 paren = 0;
a0d0e21e
LW
3120 ln = ARG1(scan); /* min to match */
3121 n = ARG2(scan); /* max to match */
dc45a647 3122 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3123 goto repeat;
3124 case STAR:
3125 ln = 0;
c277df42 3126 n = REG_INFTY;
a0d0e21e 3127 scan = NEXTOPER(scan);
c277df42 3128 paren = 0;
a0d0e21e
LW
3129 goto repeat;
3130 case PLUS:
c277df42
IZ
3131 ln = 1;
3132 n = REG_INFTY;
3133 scan = NEXTOPER(scan);
3134 paren = 0;
3135 repeat:
a0d0e21e
LW
3136 /*
3137 * Lookahead to avoid useless match attempts
3138 * when we know what character comes next.
3139 */
22c35a8c 3140 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3141 U8 *s = (U8*)STRING(next);
3142 if (!UTF) {
3143 c2 = c1 = *s;
3144 if (OP(next) == EXACTF)
3145 c2 = PL_fold[c1];
3146 else if (OP(next) == EXACTFL)
3147 c2 = PL_fold_locale[c1];
3148 }
3149 else { /* UTF */
3150 if (OP(next) == EXACTF) {
3151 c1 = to_utf8_lower(s);
3152 c2 = to_utf8_upper(s);
3153 }
3154 else {
9041c2e3 3155 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3156 }
3157 }
bbce6d69 3158 }
a0d0e21e 3159 else
bbce6d69 3160 c1 = c2 = -1000;
3280af22 3161 PL_reginput = locinput;
a0d0e21e 3162 if (minmod) {
c277df42 3163 CHECKPOINT lastcp;
a0d0e21e
LW
3164 minmod = 0;
3165 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3166 sayNO;
a0ed51b3 3167 locinput = PL_reginput;
02db2b7b 3168 REGCP_SET(lastcp);
0fe9bf95 3169 if (c1 != -1000) {
1aa99e6b 3170 char *e; /* Should not check after this */
0fe9bf95
IZ
3171 char *old = locinput;
3172
1aa99e6b 3173 if (n == REG_INFTY) {
0fe9bf95 3174 e = PL_regeol - 1;
1aa99e6b
IH
3175 if (do_utf8)
3176 while (UTF8_IS_CONTINUATION(*(U8*)e))
3177 e--;
3178 }
3179 else if (do_utf8) {
3180 int m = n - ln;
3181 for (e = locinput;
3182 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3183 e += UTF8SKIP(e);
3184 }
3185 else {
3186 e = locinput + n - ln;
3187 if (e >= PL_regeol)
3188 e = PL_regeol - 1;
3189 }
0fe9bf95 3190 while (1) {
1aa99e6b 3191 int count;
0fe9bf95 3192 /* Find place 'next' could work */
1aa99e6b
IH
3193 if (!do_utf8) {
3194 if (c1 == c2) {
3195 while (locinput <= e && *locinput != c1)
3196 locinput++;
3197 } else {
9041c2e3 3198 while (locinput <= e
1aa99e6b
IH
3199 && *locinput != c1
3200 && *locinput != c2)
3201 locinput++;
3202 }
3203 count = locinput - old;
3204 }
3205 else {
3206 STRLEN len;
3207 if (c1 == c2) {
3208 for (count = 0;
3209 locinput <= e &&
9041c2e3 3210 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3211 count++)
3212 locinput += len;
3213
3214 } else {
3215 for (count = 0; locinput <= e; count++) {
9041c2e3 3216 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3217 if (c == c1 || c == c2)
3218 break;
9041c2e3 3219 locinput += len;
1aa99e6b
IH
3220 }
3221 }
0fe9bf95 3222 }
9041c2e3 3223 if (locinput > e)
0fe9bf95
IZ
3224 sayNO;
3225 /* PL_reginput == old now */
3226 if (locinput != old) {
3227 ln = 1; /* Did some */
1aa99e6b 3228 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3229 sayNO;
3230 }
3231 /* PL_reginput == locinput now */
29d1e993 3232 TRYPAREN(paren, ln, locinput);
0fe9bf95 3233 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3234 REGCP_UNWIND(lastcp);
0fe9bf95 3235 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3236 old = locinput;
3237 if (do_utf8)
3238 locinput += UTF8SKIP(locinput);
3239 else
3240 locinput++;
0fe9bf95
IZ
3241 }
3242 }
3243 else
c277df42 3244 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3245 UV c;
3246 if (c1 != -1000) {
3247 if (do_utf8)
9041c2e3 3248 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3249 else
9041c2e3 3250 c = UCHARAT(PL_reginput);
2390ecbc
PP
3251 /* If it could work, try it. */
3252 if (c == c1 || c == c2)
3253 {
3254 TRYPAREN(paren, n, PL_reginput);
3255 REGCP_UNWIND(lastcp);
3256 }
1aa99e6b 3257 }
a0d0e21e 3258 /* If it could work, try it. */
2390ecbc 3259 else if (c1 == -1000)
bbce6d69 3260 {
29d1e993 3261 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3262 REGCP_UNWIND(lastcp);
bbce6d69 3263 }
c277df42 3264 /* Couldn't or didn't -- move forward. */
a0ed51b3 3265 PL_reginput = locinput;
a0d0e21e
LW
3266 if (regrepeat(scan, 1)) {
3267 ln++;
a0ed51b3
LW
3268 locinput = PL_reginput;
3269 }
3270 else
4633a7c4 3271 sayNO;
a0d0e21e
LW
3272 }
3273 }
3274 else {
c277df42 3275 CHECKPOINT lastcp;
a0d0e21e 3276 n = regrepeat(scan, n);
a0ed51b3 3277 locinput = PL_reginput;
22c35a8c 3278 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3279 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3280 ln = n; /* why back off? */
1aeab75a
GS
3281 /* ...because $ and \Z can match before *and* after
3282 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3283 We should back off by one in this case. */
3284 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3285 ln--;
3286 }
02db2b7b 3287 REGCP_SET(lastcp);
c277df42 3288 if (paren) {
1aa99e6b 3289 UV c;
c277df42 3290 while (n >= ln) {
1aa99e6b
IH
3291 if (c1 != -1000) {
3292 if (do_utf8)
9041c2e3 3293 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3294 else
9041c2e3 3295 c = UCHARAT(PL_reginput);
1aa99e6b 3296 }
c277df42 3297 /* If it could work, try it. */
1aa99e6b 3298 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3299 {
29d1e993 3300 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3301 REGCP_UNWIND(lastcp);
c277df42
IZ
3302 }
3303 /* Couldn't or didn't -- back up. */
3304 n--;
dfe13c55 3305 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3306 }
a0ed51b3
LW
3307 }
3308 else {
1aa99e6b 3309 UV c;
c277df42 3310 while (n >= ln) {
1aa99e6b
IH
3311 if (c1 != -1000) {
3312 if (do_utf8)
9041c2e3 3313 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3314 else
9041c2e3 3315 c = UCHARAT(PL_reginput);
1aa99e6b 3316 }
c277df42 3317 /* If it could work, try it. */
1aa99e6b 3318 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3319 {
29d1e993 3320 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3321 REGCP_UNWIND(lastcp);
c277df42
IZ
3322 }
3323 /* Couldn't or didn't -- back up. */
3324 n--;
dfe13c55 3325 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3326 }
a0d0e21e
LW
3327 }
3328 }
4633a7c4 3329 sayNO;
c277df42 3330 break;
a0d0e21e 3331 case END:
0f5d15d6
IZ
3332 if (PL_reg_call_cc) {
3333 re_cc_state *cur_call_cc = PL_reg_call_cc;
3334 CURCUR *cctmp = PL_regcc;
3335 regexp *re = PL_reg_re;
3336 CHECKPOINT cp, lastcp;
3337
3338 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3339 REGCP_SET(lastcp);
0f5d15d6
IZ
3340 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3341 the caller. */
3342 PL_reginput = locinput; /* Make position available to
3343 the callcc. */
3344 cache_re(PL_reg_call_cc->re);
3345 PL_regcc = PL_reg_call_cc->cc;
3346 PL_reg_call_cc = PL_reg_call_cc->prev;
3347 if (regmatch(cur_call_cc->node)) {
3348 PL_reg_call_cc = cur_call_cc;
3349 regcpblow(cp);
3350 sayYES;
3351 }
02db2b7b 3352 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3353 regcppop();
3354 PL_reg_call_cc = cur_call_cc;
3355 PL_regcc = cctmp;
3356 PL_reg_re = re;
3357 cache_re(re);
3358
3359 DEBUG_r(
3360 PerlIO_printf(Perl_debug_log,
3361 "%*s continuation failed...\n",
3362 REPORT_CODE_OFF+PL_regindent*2, "")
3363 );
7821416a 3364 sayNO_SILENT;
0f5d15d6 3365 }
7821416a
IZ
3366 if (locinput < PL_regtill) {
3367 DEBUG_r(PerlIO_printf(Perl_debug_log,
3368 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3369 PL_colors[4],
3370 (long)(locinput - PL_reg_starttry),
3371 (long)(PL_regtill - PL_reg_starttry),
3372 PL_colors[5]));
3373 sayNO_FINAL; /* Cannot match: too short. */
3374 }
3375 PL_reginput = locinput; /* put where regtry can find it */
3376 sayYES_FINAL; /* Success! */
7e5428c5 3377 case SUCCEED:
3280af22 3378 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3379 sayYES_LOUD; /* Success! */
c277df42
IZ
3380 case SUSPEND:
3381 n = 1;
9fe1d20c 3382 PL_reginput = locinput;
9041c2e3 3383 goto do_ifmatch;
a0d0e21e 3384 case UNLESSM:
c277df42 3385 n = 0;
a0ed51b3 3386 if (scan->flags) {
efb30f32
HS
3387 s = HOPBACKc(locinput, scan->flags);
3388 if (!s)
3389 goto say_yes;
3390 PL_reginput = s;
a0ed51b3
LW
3391 }
3392 else
3393 PL_reginput = locinput;
c277df42
IZ
3394 goto do_ifmatch;
3395 case IFMATCH:
3396 n = 1;
a0ed51b3 3397 if (scan->flags) {
efb30f32
HS
3398 s = HOPBACKc(locinput, scan->flags);
3399 if (!s)
3400 goto say_no;
3401 PL_reginput = s;
a0ed51b3
LW
3402 }
3403 else
3404 PL_reginput = locinput;
3405
c277df42 3406 do_ifmatch:
c277df42
IZ
3407 inner = NEXTOPER(NEXTOPER(scan));
3408 if (regmatch(inner) != n) {
3409 say_no:
3410 if (logical) {
3411 logical = 0;
3412 sw = 0;
3413 goto do_longjump;
a0ed51b3
LW
3414 }
3415 else
c277df42
IZ
3416 sayNO;
3417 }
3418 say_yes:
3419 if (logical) {
3420 logical = 0;
3421 sw = 1;
3422 }
fe44a5e8 3423 if (OP(scan) == SUSPEND) {
3280af22 3424 locinput = PL_reginput;
565764a8 3425 nextchr = UCHARAT(locinput);
fe44a5e8 3426 }
c277df42
IZ
3427 /* FALL THROUGH. */
3428 case LONGJMP:
3429 do_longjump:
3430 next = scan + ARG(scan);
3431 if (next == scan)
3432 next = NULL;
a0d0e21e
LW
3433 break;
3434 default:
b900a521 3435 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3436 PTR2UV(scan), OP(scan));
cea2e8a9 3437 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3438 }
02db2b7b 3439 reenter:
a0d0e21e
LW
3440 scan = next;
3441 }
a687059c 3442
a0d0e21e
LW
3443 /*
3444 * We get here only if there's trouble -- normally "case END" is
3445 * the terminating point.
3446 */
cea2e8a9 3447 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3448 /*NOTREACHED*/
4633a7c4
LW
3449 sayNO;
3450
7821416a
IZ
3451yes_loud:
3452 DEBUG_r(
3453 PerlIO_printf(Perl_debug_log,
3454 "%*s %scould match...%s\n",
3455 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3456 );
3457 goto yes;
3458yes_final:
3459 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3460 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3461yes:
3462#ifdef DEBUGGING
3280af22 3463 PL_regindent--;
4633a7c4 3464#endif
02db2b7b
IZ
3465
3466#if 0 /* Breaks $^R */
3467 if (unwind)
3468 regcpblow(firstcp);
3469#endif
4633a7c4
LW
3470 return 1;
3471
3472no:
7821416a
IZ
3473 DEBUG_r(
3474 PerlIO_printf(Perl_debug_log,
3475 "%*s %sfailed...%s\n",
3476 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3477 );
3478 goto do_no;
3479no_final:
3480do_no:
02db2b7b
IZ
3481 if (unwind) {
3482 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3483
3484 switch (uw->type) {
3485 case RE_UNWIND_BRANCH:
3486 case RE_UNWIND_BRANCHJ:
3487 {
3488 re_unwind_branch_t *uwb = &(uw->branch);
3489 I32 lastparen = uwb->lastparen;
9041c2e3 3490
02db2b7b
IZ
3491 REGCP_UNWIND(uwb->lastcp);
3492 for (n = *PL_reglastparen; n > lastparen; n--)
3493 PL_regendp[n] = -1;
3494 *PL_reglastparen = n;
3495 scan = next = uwb->next;
9041c2e3
NIS
3496 if ( !scan ||
3497 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3498 ? BRANCH : BRANCHJ) ) { /* Failure */
3499 unwind = uwb->prev;
3500#ifdef DEBUGGING
3501 PL_regindent--;
3502#endif
3503 goto do_no;
3504 }
3505 /* Have more choice yet. Reuse the same uwb. */
3506 /*SUPPRESS 560*/
3507 if ((n = (uwb->type == RE_UNWIND_BRANCH
3508 ? NEXT_OFF(next) : ARG(next))))
3509 next += n;
3510 else
3511 next = NULL; /* XXXX Needn't unwinding in this case... */
3512 uwb->next = next;
3513 next = NEXTOPER(scan);
3514 if (uwb->type == RE_UNWIND_BRANCHJ)
3515 next = NEXTOPER(next);
3516 locinput = uwb->locinput;
3517 nextchr = uwb->nextchr;
3518#ifdef DEBUGGING
3519 PL_regindent = uwb->regindent;
3520#endif
3521
3522 goto reenter;
3523 }
3524 /* NOT REACHED */
3525 default:
3526 Perl_croak(aTHX_ "regexp unwind memory corruption");
3527 }
3528 /* NOT REACHED */
3529 }
4633a7c4 3530#ifdef DEBUGGING
3280af22 3531 PL_regindent--;
4633a7c4 3532#endif
a0d0e21e 3533 return 0;
a687059c
LW
3534}
3535
3536/*
3537 - regrepeat - repeatedly match something simple, report how many
3538 */
3539/*
3540 * [This routine now assumes that it will only match on things of length 1.
3541 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3542 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3543 */
76e3520e 3544STATIC I32
cea2e8a9 3545S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3546{
a0d0e21e 3547 register char *scan;
a0d0e21e 3548 register I32 c;
3280af22 3549 register char *loceol = PL_regeol;
a0ed51b3 3550 register I32 hardcount = 0;
ffc61ed2 3551 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3552
3280af22 3553 scan = PL_reginput;
c277df42 3554 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3555 loceol = scan + max;
a0d0e21e 3556 switch (OP(p)) {
22c35a8c 3557 case REG_ANY:
1aa99e6b 3558 if (do_utf8) {
ffc61ed2 3559 loceol = PL_regeol;
1aa99e6b 3560 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3561 scan += UTF8SKIP(scan);
3562 hardcount++;
3563 }
3564 } else {
3565 while (scan < loceol && *scan != '\n')
3566 scan++;
a0ed51b3
LW
3567 }
3568 break;
ffc61ed2 3569 case SANY:
3baa4c62 3570 scan = loceol;
a0ed51b3 3571 break;
bbce6d69 3572 case EXACT: /* length of string is 1 */
cd439c50 3573 c = (U8)*STRING(p);
bbce6d69 3574 while (scan < loceol && UCHARAT(scan) == c)
3575 scan++;
3576 break;
3577 case EXACTF: /* length of string is 1 */
cd439c50 3578 c = (U8)*STRING(p);
bbce6d69 3579 while (scan < loceol &&
22c35a8c 3580 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3581 scan++;
3582 break;
3583 case EXACTFL: /* length of string is 1 */
3280af22 3584 PL_reg_flags |= RF_tainted;
cd439c50 3585 c = (U8)*STRING(p);
bbce6d69 3586 while (scan < loceol &&
22c35a8c 3587 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3588 scan++;
3589 break;
3590 case ANYOF:
ffc61ed2
JH
3591 if (do_utf8) {
3592 loceol = PL_regeol;
1aa99e6b
IH
3593 while (hardcount < max && scan < loceol &&
3594 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3595 scan += UTF8SKIP(scan);
3596 hardcount++;
3597 }
3598 } else {
3599 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3600 scan++;
3601 }
a0d0e21e
LW
3602 break;
3603 case ALNUM:
1aa99e6b 3604 if (do_utf8) {
ffc61ed2 3605 loceol = PL_regeol;
8269fa76 3606 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3607 while (hardcount < max && scan < loceol &&
3568d838 3608 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3609 scan += UTF8SKIP(scan);
3610 hardcount++;
3611 }
3612 } else {
3613 while (scan < loceol && isALNUM(*scan))
3614 scan++;
a0ed51b3
LW
3615 }
3616 break;
bbce6d69 3617 case ALNUML:
3280af22 3618 PL_reg_flags |= RF_tainted;
1aa99e6b 3619 if (do_utf8) {
ffc61ed2 3620 loceol = PL_regeol;
1aa99e6b
IH
3621 while (hardcount < max && scan < loceol &&
3622 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3623 scan += UTF8SKIP(scan);
3624 hardcount++;
3625 }
3626 } else {
3627 while (scan < loceol && isALNUM_LC(*scan))
3628 scan++;
a0ed51b3
LW
3629 }
3630 break;
a0d0e21e 3631 case NALNUM:
1aa99e6b 3632 if (do_utf8) {
ffc61ed2 3633 loceol = PL_regeol;
8269fa76 3634 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3635 while (hardcount < max && scan < loceol &&
3568d838 3636 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3637 scan += UTF8SKIP(scan);
3638 hardcount++;
3639 }
3640 } else {
3641 while (scan < loceol && !isALNUM(*scan))
3642 scan++;
a0ed51b3
LW
3643 }
3644 break;
bbce6d69 3645 case NALNUML:
3280af22 3646 PL_reg_flags |= RF_tainted;
1aa99e6b 3647 if (do_utf8) {
ffc61ed2 3648 loceol = PL_regeol;
1aa99e6b
IH
3649 while (hardcount < max && scan < loceol &&
3650 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3651 scan += UTF8SKIP(scan);
3652 hardcount++;
3653 }
3654 } else {
3655 while (scan < loceol && !isALNUM_LC(*scan))
3656 scan++;
a0ed51b3
LW
3657 }
3658 break;
a0d0e21e 3659 case SPACE:
1aa99e6b 3660 if (do_utf8) {
ffc61ed2 3661 loceol = PL_regeol;
8269fa76 3662 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3663 while (hardcount < max && scan < loceol &&
3568d838
JH
3664 (*scan == ' ' ||
3665 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3666 scan += UTF8SKIP(scan);
3667 hardcount++;
3668 }
3669 } else {
3670 while (scan < loceol && isSPACE(*scan))
3671 scan++;
a0ed51b3
LW
3672 }
3673 break;
bbce6d69 3674 case SPACEL:
3280af22 3675 PL_reg_flags |= RF_tainted;
1aa99e6b 3676 if (do_utf8) {
ffc61ed2 3677 loceol = PL_regeol;
1aa99e6b 3678 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3679 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3680 scan += UTF8SKIP(scan);
3681 hardcount++;
3682 }
3683 } else {
3684 while (scan < loceol && isSPACE_LC(*scan))
3685 scan++;
a0ed51b3
LW
3686 }
3687 break;
a0d0e21e 3688 case NSPACE:
1aa99e6b 3689 if (do_utf8) {
ffc61ed2 3690 loceol = PL_regeol;
8269fa76 3691 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3692 while (hardcount < max && scan < loceol &&
3568d838
JH
3693 !(*scan == ' ' ||
3694 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3695 scan += UTF8SKIP(scan);
3696 hardcount++;
3697 }
3698 } else {
3699 while (scan < loceol && !isSPACE(*scan))
3700 scan++;
3701 break;
a0ed51b3 3702 }
bbce6d69 3703 case NSPACEL:
3280af22 3704 PL_reg_flags |= RF_tainted;
1aa99e6b 3705 if (do_utf8) {
ffc61ed2 3706 loceol = PL_regeol;
1aa99e6b 3707 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3708 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3709 scan += UTF8SKIP(scan);
3710 hardcount++;
3711 }
3712 } else {
3713 while (scan < loceol && !isSPACE_LC(*scan))
3714 scan++;
a0ed51b3
LW
3715 }
3716 break;
a0d0e21e 3717 case DIGIT:
1aa99e6b 3718 if (do_utf8) {
ffc61ed2 3719 loceol = PL_regeol;
8269fa76 3720 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3721 while (hardcount < max && scan < loceol &&
3568d838 3722 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3723 scan += UTF8SKIP(scan);
3724 hardcount++;
3725 }
3726 } else {
3727 while (scan < loceol && isDIGIT(*scan))
3728 scan++;
a0ed51b3
LW
3729 }
3730 break;
a0d0e21e 3731 case NDIGIT:
1aa99e6b 3732 if (do_utf8) {
ffc61ed2 3733 loceol = PL_regeol;
8269fa76 3734 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3735 while (hardcount < max && scan < loceol &&
3568d838 3736 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3737 scan += UTF8SKIP(scan);
3738 hardcount++;
3739 }
3740 } else {
3741 while (scan < loceol && !isDIGIT(*scan))
3742 scan++;
a0ed51b3
LW
3743 }
3744 break;
a0d0e21e
LW
3745 default: /* Called on something of 0 width. */
3746 break; /* So match right here or not at all. */
3747 }
a687059c 3748
a0ed51b3
LW
3749 if (hardcount)
3750 c = hardcount;
3751 else
3752 c = scan - PL_reginput;
3280af22 3753 PL_reginput = scan;
a687059c 3754
9041c2e3 3755 DEBUG_r(
c277df42
IZ
3756 {
3757 SV *prop = sv_newmortal();
3758
3759 regprop(prop, p);
9041c2e3
NIS
3760 PerlIO_printf(Perl_debug_log,
3761 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3762 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3763 });
9041c2e3 3764
a0d0e21e 3765 return(c);
a687059c
LW
3766}
3767
3768/*
c277df42 3769 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3770 *
c277df42
IZ
3771 * The repeater is supposed to have constant length.
3772 */
3773
76e3520e 3774STATIC I32
cea2e8a9 3775S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3776{
3777 register char *scan;
3778 register char *start;
3280af22 3779 register char *loceol = PL_regeol;
a0ed51b3 3780 I32 l = 0;
708e3b05 3781 I32 count = 0, res = 1;
a0ed51b3
LW
3782
3783 if (!max)
3784 return 0;
c277df42 3785
3280af22 3786 start = PL_reginput;
1aa99e6b 3787 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3788 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3789 if (!count++) {
3790 l = 0;
3791 while (start < PL_reginput) {
3792 l++;
3793 start += UTF8SKIP(start);
3794 }
3795 *lp = l;
3796 if (l == 0)
3797 return max;
3798 }
3799 if (count == max)
3800 return count;
3801 }
3802 }
3803 else {
708e3b05 3804 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3805 if (!count++) {
3806 *lp = l = PL_reginput - start;
3807 if (max != REG_INFTY && l*max < loceol - scan)
3808 loceol = scan + l*max;
3809 if (l == 0)
3810 return max;
c277df42
IZ
3811 }
3812 }
3813 }
708e3b05 3814 if (!res)
3280af22 3815 PL_reginput = scan;
9041c2e3 3816
a0ed51b3 3817 return count;
c277df42
IZ
3818}
3819
3820/*
ffc61ed2
JH
3821- regclass_swash - prepare the utf8 swash
3822*/
3823
3824SV *
3825Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3826{
3827 SV *sw = NULL;
3828 SV *si = NULL;
3829
3830 if (PL_regdata && PL_regdata->count) {
3831 U32 n = ARG(node);
3832
3833 if (PL_regdata->what[n] == 's') {
3834 SV *rv = (SV*)PL_regdata->data[n];
3835 AV *av = (AV*)SvRV((SV*)rv);
3836 SV **a;
9041c2e3 3837
ffc61ed2
JH
3838 si = *av_fetch(av, 0, FALSE);
3839 a = av_fetch(av, 1, FALSE);
9041c2e3 3840
ffc61ed2
JH
3841 if (a)
3842 sw = *a;
3843 else if (si && doinit) {
3844 sw = swash_init("utf8", "", si, 1, 0);
3845 (void)av_store(av, 1, sw);
3846 }
3847 }
3848 }
3849
3850 if (initsvp)
3851 *initsvp = si;
3852
3853 return sw;
3854}
3855
3856/*
cb8d8820 3857 - reginclass - determine if a character falls into a character class
bbce6d69 3858 */
3859
76e3520e 3860STATIC bool
ffc61ed2 3861S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3862{
ffc61ed2 3863 char flags = ANYOF_FLAGS(n);
bbce6d69 3864 bool match = FALSE;
1aa99e6b 3865 UV c;
3568d838 3866 STRLEN len = 0;
1aa99e6b 3867
3568d838 3868 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3869
ffc61ed2
JH
3870 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3871 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3872 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3873 match = TRUE;
bbce6d69 3874 }
3568d838 3875 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3876 match = TRUE;
ffc61ed2
JH
3877 if (!match) {
3878 SV *sw = regclass_swash(n, TRUE, 0);
3879
3880 if (sw) {
3568d838 3881 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3882 match = TRUE;
3883 else if (flags & ANYOF_FOLD) {
3884 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3885
ffc61ed2
JH
3886 if (flags & ANYOF_LOCALE) {
3887 PL_reg_flags |= RF_tainted;
9041c2e3 3888 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3889 }
1aa99e6b 3890 else
9041c2e3 3891 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3892 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3893 match = TRUE;
3894 }
3895 }
bbce6d69 3896 }
3897 }
1aa99e6b 3898 if (!match && c < 256) {
ffc61ed2
JH
3899 if (ANYOF_BITMAP_TEST(n, c))
3900 match = TRUE;
3901 else if (flags & ANYOF_FOLD) {
3568d838 3902 I32 f;
a0ed51b3 3903
ffc61ed2
JH
3904 if (flags & ANYOF_LOCALE) {
3905 PL_reg_flags |= RF_tainted;
3906 f = PL_fold_locale[c];
3907 }
3908 else
3909 f = PL_fold[c];
3910 if (f != c && ANYOF_BITMAP_TEST(n, f))
3911 match = TRUE;
3912 }
3913
3914 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3915 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3916 if (
3917 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3933 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3943 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3944 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3945 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3946 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3947 ) /* How's that for a conditional? */
3948 {
3949 match = TRUE;
3950 }
a0ed51b3 3951 }
a0ed51b3
LW
3952 }
3953
a0ed51b3
LW
3954 return (flags & ANYOF_INVERT) ? !match : match;
3955}
161b471a 3956
dfe13c55 3957STATIC U8 *
cea2e8a9 3958S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3959{
1aa99e6b
IH
3960 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3961}
3962
3963STATIC U8 *
3964S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3965{
a0ed51b3 3966 if (off >= 0) {
1aa99e6b 3967 while (off-- && s < lim) {
ffc61ed2 3968 /* XXX could check well-formedness here */
a0ed51b3 3969 s += UTF8SKIP(s);
ffc61ed2 3970 }
a0ed51b3
LW
3971 }
3972 else {
3973 while (off++) {
1aa99e6b 3974 if (s > lim) {
a0ed51b3 3975 s--;
ffc61ed2 3976 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3977 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3978 s--;
ffc61ed2
JH
3979 }
3980 /* XXX could check well-formedness here */
a0ed51b3
LW
3981 }
3982 }
3983 }
3984 return s;
3985}
161b471a 3986
dfe13c55 3987STATIC U8 *
1aa99e6b 3988S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 3989{
1aa99e6b
IH
3990 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3991}
3992
3993STATIC U8 *
3994S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
3995{
3996 if (off >= 0) {
1aa99e6b 3997 while (off-- && s < lim) {
ffc61ed2 3998 /* XXX could check well-formedness here */
a0ed51b3 3999 s += UTF8SKIP(s);
ffc61ed2 4000 }
a0ed51b3
LW
4001 if (off >= 0)
4002 return 0;
4003 }
4004 else {
4005 while (off++) {
1aa99e6b 4006 if (s > lim) {
a0ed51b3 4007 s--;
ffc61ed2 4008 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4009 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4010 s--;
ffc61ed2
JH
4011 }
4012 /* XXX could check well-formedness here */
a0ed51b3
LW
4013 }
4014 else
4015 break;
4016 }
4017 if (off <= 0)
4018 return 0;
4019 }
4020 return s;
4021}
51371543
GS
4022
4023#ifdef PERL_OBJECT
51371543
GS
4024#include "XSUB.h"
4025#endif
4026
4027static void
4028restore_pos(pTHXo_ void *arg)
4029{
51371543
GS
4030 if (PL_reg_eval_set) {
4031 if (PL_reg_oldsaved) {
4032 PL_reg_re->subbeg = PL_reg_oldsaved;
4033 PL_reg_re->sublen = PL_reg_oldsavedlen;
4034 RX_MATCH_COPIED_on(PL_reg_re);
4035 }
4036 PL_reg_magic->mg_len = PL_reg_oldpos;
4037 PL_reg_eval_set = 0;
4038 PL_curpm = PL_reg_oldcurpm;
4039 }
4040}