This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
stash autovivification and method call error messages
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
efb30f32
HS
119#define HOPBACK(pos, off) ( \
120 (UTF && DO_UTF8(PL_reg_sv)) \
121 ? reghopmaybe((U8*)pos, -off) \
122 : (pos - off >= PL_bostr) \
123 ? (U8*)(pos - off) \
124 : (U8*)NULL \
125)
126#define HOPBACKc(pos, off) (char*)HOPBACK(pos, off)
127
1aa99e6b
IH
128#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
129#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
130#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
131#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
132#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
133#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
134
8269fa76 135#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 136
8269fa76 137static void restore_pos(pTHXo_ void *arg);
51371543 138
76e3520e 139STATIC CHECKPOINT
cea2e8a9 140S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 141{
3280af22 142 int retval = PL_savestack_ix;
b1ce53c5
JH
143#define REGCP_PAREN_ELEMS 4
144 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
145 int p;
146
b1ce53c5
JH
147#define REGCP_OTHER_ELEMS 5
148 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 149 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 150/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
151 SSPUSHINT(PL_regendp[p]);
152 SSPUSHINT(PL_regstartp[p]);
3280af22 153 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
154 SSPUSHINT(p);
155 }
b1ce53c5 156/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
157 SSPUSHINT(PL_regsize);
158 SSPUSHINT(*PL_reglastparen);
159 SSPUSHPTR(PL_reginput);
41123dfd
JH
160#define REGCP_FRAME_ELEMS 2
161/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
162 * are needed for the regexp context stack bookkeeping. */
163 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 164 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 165
a0d0e21e
LW
166 return retval;
167}
168
c277df42 169/* These are needed since we do not localize EVAL nodes: */
02db2b7b 170# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 171 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 172 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 173
02db2b7b 174# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 175 PerlIO_printf(Perl_debug_log, \
faccc32b 176 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 177 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 178
76e3520e 179STATIC char *
cea2e8a9 180S_regcppop(pTHX)
a0d0e21e 181{
b1ce53c5 182 I32 i;
a0d0e21e
LW
183 U32 paren = 0;
184 char *input;
cf93c79d 185 I32 tmps;
b1ce53c5
JH
186
187 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 188 i = SSPOPINT;
b1ce53c5
JH
189 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
190 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 191 input = (char *) SSPOPPTR;
3280af22
NIS
192 *PL_reglastparen = SSPOPINT;
193 PL_regsize = SSPOPINT;
b1ce53c5
JH
194
195 /* Now restore the parentheses context. */
41123dfd
JH
196 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
197 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 198 paren = (U32)SSPOPINT;
3280af22 199 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
200 PL_regstartp[paren] = SSPOPINT;
201 tmps = SSPOPINT;
3280af22
NIS
202 if (paren <= *PL_reglastparen)
203 PL_regendp[paren] = tmps;
c277df42 204 DEBUG_r(
c3464db5 205 PerlIO_printf(Perl_debug_log,
b900a521 206 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 207 (UV)paren, (IV)PL_regstartp[paren],
b900a521 208 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 209 (IV)PL_regendp[paren],
3280af22 210 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 211 );
a0d0e21e 212 }
c277df42 213 DEBUG_r(
3280af22 214 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 215 PerlIO_printf(Perl_debug_log,
faccc32b
JH
216 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
217 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
218 }
219 );
daf18116 220#if 1
dafc8851
JH
221 /* It would seem that the similar code in regtry()
222 * already takes care of this, and in fact it is in
223 * a better location to since this code can #if 0-ed out
224 * but the code in regtry() is needed or otherwise tests
225 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
226 * (as of patchlevel 7877) will fail. Then again,
227 * this code seems to be necessary or otherwise
228 * building DynaLoader will fail:
229 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
230 * --jhi */
3280af22
NIS
231 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
232 if (paren > PL_regsize)
cf93c79d
IZ
233 PL_regstartp[paren] = -1;
234 PL_regendp[paren] = -1;
a0d0e21e 235 }
dafc8851 236#endif
a0d0e21e
LW
237 return input;
238}
239
0f5d15d6 240STATIC char *
cea2e8a9 241S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
242{
243 I32 tmp = PL_savestack_ix;
244
245 PL_savestack_ix = ss;
246 regcppop();
247 PL_savestack_ix = tmp;
942e002e 248 return Nullch;
0f5d15d6
IZ
249}
250
251typedef struct re_cc_state
252{
253 I32 ss;
254 regnode *node;
255 struct re_cc_state *prev;
256 CURCUR *cc;
257 regexp *re;
258} re_cc_state;
259
02db2b7b 260#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 261
29d1e993
HS
262#define TRYPAREN(paren, n, input) { \
263 if (paren) { \
264 if (n) { \
265 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
266 PL_regendp[paren] = input - PL_bostr; \
267 } \
268 else \
269 PL_regendp[paren] = -1; \
270 } \
271 if (regmatch(next)) \
272 sayYES; \
273 if (paren && n) \
274 PL_regendp[paren] = -1; \
275}
276
277
a687059c 278/*
e50aee73 279 * pregexec and friends
a687059c
LW
280 */
281
282/*
c277df42 283 - pregexec - match a regexp against a string
a687059c 284 */
c277df42 285I32
864dbfa3 286Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 287 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
288/* strend: pointer to null at end of string */
289/* strbeg: real beginning of string */
290/* minend: end of match must be >=minend after stringarg. */
291/* nosave: For optimizations. */
292{
293 return
9041c2e3 294 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
295 nosave ? 0 : REXEC_COPY_STR);
296}
0f5d15d6
IZ
297
298STATIC void
cea2e8a9 299S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
300{
301 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
302#ifdef DEBUGGING
303 PL_regprogram = prog->program;
304#endif
305 PL_regnpar = prog->nparens;
9041c2e3
NIS
306 PL_regdata = prog->data;
307 PL_reg_re = prog;
0f5d15d6 308}
22e551b9 309
9041c2e3 310/*
cad2e5aa
JH
311 * Need to implement the following flags for reg_anch:
312 *
313 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
314 * USE_INTUIT_ML
315 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
316 * INTUIT_AUTORITATIVE_ML
317 * INTUIT_ONCE_NOML - Intuit can match in one location only.
318 * INTUIT_ONCE_ML
319 *
320 * Another flag for this function: SECOND_TIME (so that float substrs
321 * with giant delta may be not rechecked).
322 */
323
324/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
325
2c2d71f5 326/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
327 Otherwise, only SvCUR(sv) is used to get strbeg. */
328
329/* XXXX We assume that strpos is strbeg unless sv. */
330
6eb5f6b9
JH
331/* XXXX Some places assume that there is a fixed substring.
332 An update may be needed if optimizer marks as "INTUITable"
333 RExen without fixed substrings. Similarly, it is assumed that
334 lengths of all the strings are no more than minlen, thus they
335 cannot come from lookahead.
336 (Or minlen should take into account lookahead.) */
337
2c2d71f5
JH
338/* A failure to find a constant substring means that there is no need to make
339 an expensive call to REx engine, thus we celebrate a failure. Similarly,
340 finding a substring too deep into the string means that less calls to
30944b6d
IZ
341 regtry() should be needed.
342
343 REx compiler's optimizer found 4 possible hints:
344 a) Anchored substring;
345 b) Fixed substring;
346 c) Whether we are anchored (beginning-of-line or \G);
347 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 348 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
349 string which does not contradict any of them.
350 */
2c2d71f5 351
6eb5f6b9
JH
352/* Most of decisions we do here should have been done at compile time.
353 The nodes of the REx which we used for the search should have been
354 deleted from the finite automaton. */
355
cad2e5aa
JH
356char *
357Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
358 char *strend, U32 flags, re_scream_pos_data *data)
359{
2c2d71f5 360 register I32 start_shift;
cad2e5aa 361 /* Should be nonnegative! */
2c2d71f5
JH
362 register I32 end_shift;
363 register char *s;
364 register SV *check;
a1933d95 365 char *strbeg;
cad2e5aa
JH
366 char *t;
367 I32 ml_anch;
2c2d71f5 368 char *tmp;
6eb5f6b9
JH
369 register char *other_last = Nullch; /* other substr checked before this */
370 char *check_at; /* check substr found at this pos */
30944b6d
IZ
371#ifdef DEBUGGING
372 char *i_strpos = strpos;
373#endif
cad2e5aa
JH
374
375 DEBUG_r( if (!PL_colorset) reginitcolors() );
376 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 377 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
378 PL_colors[4],PL_colors[5],PL_colors[0],
379 prog->precomp,
380 PL_colors[1],
381 (strlen(prog->precomp) > 60 ? "..." : ""),
382 PL_colors[0],
b900a521 383 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
384 strpos, PL_colors[1],
385 (strend - strpos > 60 ? "..." : ""))
386 );
387
1aa99e6b
IH
388 if (prog->reganch & ROPT_UTF8)
389 PL_reg_flags |= RF_utf8;
390
391 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 392 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 393 goto fail;
2c2d71f5 394 }
a1933d95 395 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 396 PL_regeol = strend;
653099ff 397 check = prog->check_substr;
2c2d71f5 398 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
399 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
400 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 401 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 402
7e25d62c
JH
403 if (!ml_anch) {
404 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
405 | ROPT_IMPLICIT)) /* not a real BOL */
406 /* SvCUR is not set on references: SvRV and SvPVX overlap */
407 && sv && !SvROK(sv)
408 && (strpos != strbeg)) {
409 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
410 goto fail;
411 }
412 if (prog->check_offset_min == prog->check_offset_max &&
413 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 414 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
415 I32 slen;
416
1aa99e6b 417 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
418 if (SvTAIL(check)) {
419 slen = SvCUR(check); /* >= 1 */
cad2e5aa 420
9041c2e3 421 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
422 || (strend - s == slen && strend[-1] != '\n')) {
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
424 goto fail_finish;
cad2e5aa
JH
425 }
426 /* Now should match s[0..slen-2] */
427 slen--;
653099ff 428 if (slen && (*SvPVX(check) != *s
cad2e5aa 429 || (slen > 1
653099ff 430 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
431 report_neq:
432 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
433 goto fail_finish;
434 }
cad2e5aa 435 }
653099ff
GS
436 else if (*SvPVX(check) != *s
437 || ((slen = SvCUR(check)) > 1
438 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
439 goto report_neq;
440 goto success_at_start;
7e25d62c 441 }
cad2e5aa 442 }
2c2d71f5 443 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 444 s = strpos;
2c2d71f5 445 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 446 end_shift = prog->minlen - start_shift -
653099ff 447 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 448 if (!ml_anch) {
653099ff
GS
449 I32 end = prog->check_offset_max + CHR_SVLEN(check)
450 - (SvTAIL(check) != 0);
1aa99e6b 451 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
452
453 if (end_shift < eshift)
454 end_shift = eshift;
455 }
cad2e5aa 456 }
2c2d71f5 457 else { /* Can match at random position */
cad2e5aa
JH
458 ml_anch = 0;
459 s = strpos;
2c2d71f5
JH
460 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
461 /* Should be nonnegative! */
462 end_shift = prog->minlen - start_shift -
653099ff 463 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
464 }
465
2c2d71f5 466#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 467 if (end_shift < 0)
6bbae5e6 468 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
469#endif
470
2c2d71f5
JH
471 restart:
472 /* Find a possible match in the region s..strend by looking for
473 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 474 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
475 I32 p = -1; /* Internal iterator of scream. */
476 I32 *pp = data ? data->scream_pos : &p;
477
2c2d71f5
JH
478 if (PL_screamfirst[BmRARE(check)] >= 0
479 || ( BmRARE(check) == '\n'
480 && (BmPREVIOUS(check) == SvCUR(check) - 1)
481 && SvTAIL(check) ))
9041c2e3 482 s = screaminstr(sv, check,
2c2d71f5 483 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 484 else
2c2d71f5 485 goto fail_finish;
cad2e5aa
JH
486 if (data)
487 *data->scream_olds = s;
488 }
3baa4c62
JH
489 else if (prog->reganch & ROPT_SANY_SEEN)
490 s = fbm_instr((U8*)(s + start_shift),
491 (U8*)(strend - end_shift),
492 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 493 else
1aa99e6b
IH
494 s = fbm_instr(HOP3(s, start_shift, strend),
495 HOP3(strend, -end_shift, strbeg),
2c2d71f5 496 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
497
498 /* Update the count-of-usability, remove useless subpatterns,
499 unshift s. */
2c2d71f5
JH
500
501 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
502 (s ? "Found" : "Did not find"),
503 ((check == prog->anchored_substr) ? "anchored" : "floating"),
504 PL_colors[0],
7b0972df
JH
505 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
506 SvPVX(check),
2c2d71f5
JH
507 PL_colors[1], (SvTAIL(check) ? "$" : ""),
508 (s ? " at offset " : "...\n") ) );
509
510 if (!s)
511 goto fail_finish;
512
6eb5f6b9
JH
513 check_at = s;
514
2c2d71f5 515 /* Finish the diagnostic message */
30944b6d 516 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
517
518 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
519 Start with the other substr.
520 XXXX no SCREAM optimization yet - and a very coarse implementation
521 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
522 *always* match. Probably should be marked during compile...
523 Probably it is right to do no SCREAM here...
524 */
525
526 if (prog->float_substr && prog->anchored_substr) {
30944b6d 527 /* Take into account the "other" substring. */
2c2d71f5
JH
528 /* XXXX May be hopelessly wrong for UTF... */
529 if (!other_last)
6eb5f6b9 530 other_last = strpos;
2c2d71f5 531 if (check == prog->float_substr) {
30944b6d
IZ
532 do_other_anchored:
533 {
1aa99e6b 534 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
535 char *s1 = s;
536
2c2d71f5
JH
537 t = s - prog->check_offset_max;
538 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
539 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 540 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 541 && t > strpos)))
30944b6d 542 /* EMPTY */;
2c2d71f5
JH
543 else
544 t = strpos;
1aa99e6b 545 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
546 if (t < other_last) /* These positions already checked */
547 t = other_last;
1aa99e6b 548 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
549 if (last < last1)
550 last1 = last;
551 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
552 /* On end-of-str: see comment below. */
553 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
554 HOP3(HOP3(last1, prog->anchored_offset, strend)
555 + SvCUR(prog->anchored_substr),
556 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
557 prog->anchored_substr,
558 PL_multiline ? FBMrf_MULTILINE : 0);
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
561 (s ? "Found" : "Contradicts"),
562 PL_colors[0],
7b0972df
JH
563 (int)(SvCUR(prog->anchored_substr)
564 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
565 SvPVX(prog->anchored_substr),
566 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
567 if (!s) {
568 if (last1 >= last2) {
569 DEBUG_r(PerlIO_printf(Perl_debug_log,
570 ", giving up...\n"));
571 goto fail_finish;
572 }
573 DEBUG_r(PerlIO_printf(Perl_debug_log,
574 ", trying floating at offset %ld...\n",
1aa99e6b
IH
575 (long)(HOP3c(s1, 1, strend) - i_strpos)));
576 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
577 s = HOP3c(last, 1, strend);
2c2d71f5
JH
578 goto restart;
579 }
580 else {
581 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 582 (long)(s - i_strpos)));
1aa99e6b
IH
583 t = HOP3c(s, -prog->anchored_offset, strbeg);
584 other_last = HOP3c(s, 1, strend);
30944b6d 585 s = s1;
2c2d71f5
JH
586 if (t == strpos)
587 goto try_at_start;
2c2d71f5
JH
588 goto try_at_offset;
589 }
30944b6d 590 }
2c2d71f5
JH
591 }
592 else { /* Take into account the floating substring. */
593 char *last, *last1;
594 char *s1 = s;
595
1aa99e6b
IH
596 t = HOP3c(s, -start_shift, strbeg);
597 last1 = last =
598 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
599 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
600 last = HOP3c(t, prog->float_max_offset, strend);
601 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
602 if (s < other_last)
603 s = other_last;
2c2d71f5
JH
604 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
605 /* fbm_instr() takes into account exact value of end-of-str
606 if the check is SvTAIL(ed). Since false positives are OK,
607 and end-of-str is not later than strend we are OK. */
608 s = fbm_instr((unsigned char*)s,
609 (unsigned char*)last + SvCUR(prog->float_substr)
610 - (SvTAIL(prog->float_substr)!=0),
611 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
612 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
613 (s ? "Found" : "Contradicts"),
614 PL_colors[0],
7b0972df
JH
615 (int)(SvCUR(prog->float_substr)
616 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
617 SvPVX(prog->float_substr),
618 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
619 if (!s) {
620 if (last1 == last) {
621 DEBUG_r(PerlIO_printf(Perl_debug_log,
622 ", giving up...\n"));
623 goto fail_finish;
624 }
625 DEBUG_r(PerlIO_printf(Perl_debug_log,
626 ", trying anchored starting at offset %ld...\n",
30944b6d 627 (long)(s1 + 1 - i_strpos)));
803ff556 628 other_last = last;
1aa99e6b 629 s = HOP3c(t, 1, strend);
2c2d71f5
JH
630 goto restart;
631 }
632 else {
633 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 634 (long)(s - i_strpos)));
803ff556 635 other_last = s; /* Fix this later. --Hugo */
30944b6d 636 s = s1;
2c2d71f5
JH
637 if (t == strpos)
638 goto try_at_start;
2c2d71f5
JH
639 goto try_at_offset;
640 }
641 }
cad2e5aa 642 }
2c2d71f5
JH
643
644 t = s - prog->check_offset_max;
2c2d71f5
JH
645 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
646 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
647 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
648 && t > strpos))) {
2c2d71f5
JH
649 /* Fixed substring is found far enough so that the match
650 cannot start at strpos. */
651 try_at_offset:
cad2e5aa 652 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
653 /* Eventually fbm_*() should handle this, but often
654 anchored_offset is not 0, so this check will not be wasted. */
655 /* XXXX In the code below we prefer to look for "^" even in
656 presence of anchored substrings. And we search even
657 beyond the found float position. These pessimizations
658 are historical artefacts only. */
659 find_anchor:
2c2d71f5 660 while (t < strend - prog->minlen) {
cad2e5aa 661 if (*t == '\n') {
4ee3650e 662 if (t < check_at - prog->check_offset_min) {
30944b6d 663 if (prog->anchored_substr) {
4ee3650e
GS
664 /* Since we moved from the found position,
665 we definitely contradict the found anchored
30944b6d
IZ
666 substr. Due to the above check we do not
667 contradict "check" substr.
668 Thus we can arrive here only if check substr
669 is float. Redo checking for "other"=="fixed".
670 */
9041c2e3 671 strpos = t + 1;
30944b6d
IZ
672 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
673 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
674 goto do_other_anchored;
675 }
4ee3650e
GS
676 /* We don't contradict the found floating substring. */
677 /* XXXX Why not check for STCLASS? */
cad2e5aa 678 s = t + 1;
2c2d71f5 679 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 680 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
681 goto set_useful;
682 }
4ee3650e
GS
683 /* Position contradicts check-string */
684 /* XXXX probably better to look for check-string
685 than for "\n", so one should lower the limit for t? */
686 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 687 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 688 other_last = strpos = s = t + 1;
cad2e5aa
JH
689 goto restart;
690 }
691 t++;
692 }
2c2d71f5
JH
693 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
694 PL_colors[0],PL_colors[1]));
695 goto fail_finish;
cad2e5aa 696 }
f5952150
GS
697 else {
698 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
699 PL_colors[0],PL_colors[1]));
700 }
cad2e5aa
JH
701 s = t;
702 set_useful:
2c2d71f5 703 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
704 }
705 else {
f5952150 706 /* The found string does not prohibit matching at strpos,
2c2d71f5 707 - no optimization of calling REx engine can be performed,
f5952150
GS
708 unless it was an MBOL and we are not after MBOL,
709 or a future STCLASS check will fail this. */
2c2d71f5
JH
710 try_at_start:
711 /* Even in this situation we may use MBOL flag if strpos is offset
712 wrt the start of the string. */
05b4157f 713 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 714 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
715 /* May be due to an implicit anchor of m{.*foo} */
716 && !(prog->reganch & ROPT_IMPLICIT))
717 {
cad2e5aa
JH
718 t = strpos;
719 goto find_anchor;
720 }
30944b6d 721 DEBUG_r( if (ml_anch)
f5952150
GS
722 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
723 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 724 );
2c2d71f5 725 success_at_start:
30944b6d 726 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 727 && prog->check_substr /* Could be deleted already */
cad2e5aa 728 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
729 && prog->check_substr == prog->float_substr)
730 {
cad2e5aa 731 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 732 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
733 SvREFCNT_dec(prog->check_substr);
734 prog->check_substr = Nullsv; /* disable */
735 prog->float_substr = Nullsv; /* clear */
5e39e1e5 736 check = Nullsv; /* abort */
cad2e5aa 737 s = strpos;
3cf5c195
IZ
738 /* XXXX This is a remnant of the old implementation. It
739 looks wasteful, since now INTUIT can use many
6eb5f6b9 740 other heuristics. */
cad2e5aa
JH
741 prog->reganch &= ~RE_USE_INTUIT;
742 }
743 else
744 s = strpos;
745 }
746
6eb5f6b9
JH
747 /* Last resort... */
748 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
749 if (prog->regstclass) {
750 /* minlen == 0 is possible if regstclass is \b or \B,
751 and the fixed substr is ''$.
752 Since minlen is already taken into account, s+1 is before strend;
753 accidentally, minlen >= 1 guaranties no false positives at s + 1
754 even for \b or \B. But (minlen? 1 : 0) below assumes that
755 regstclass does not come from lookahead... */
756 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
757 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 758 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 759 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 760 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 761 : 1);
6eb5f6b9 762 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
763 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
764 : (prog->float_substr
765 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
766 cl_l, strend)
767 : strend);
a1933d95 768 char *startpos = strbeg;
6eb5f6b9
JH
769
770 t = s;
76384e4a 771 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 772 PL_regdata = prog->data;
76384e4a
GS
773 PL_bostr = startpos;
774 }
6eb5f6b9
JH
775 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
776 if (!s) {
777#ifdef DEBUGGING
778 char *what;
779#endif
780 if (endpos == strend) {
781 DEBUG_r( PerlIO_printf(Perl_debug_log,
782 "Could not match STCLASS...\n") );
783 goto fail;
784 }
66e933ab
GS
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "This position contradicts STCLASS...\n") );
653099ff
GS
787 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
788 goto fail;
6eb5f6b9
JH
789 /* Contradict one of substrings */
790 if (prog->anchored_substr) {
6eb5f6b9
JH
791 if (prog->anchored_substr == check) {
792 DEBUG_r( what = "anchored" );
793 hop_and_restart:
1aa99e6b 794 s = HOP3c(t, 1, strend);
66e933ab
GS
795 if (s + start_shift + end_shift > strend) {
796 /* XXXX Should be taken into account earlier? */
797 DEBUG_r( PerlIO_printf(Perl_debug_log,
798 "Could not match STCLASS...\n") );
799 goto fail;
800 }
5e39e1e5
HS
801 if (!check)
802 goto giveup;
6eb5f6b9 803 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 804 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
805 what, (long)(s + start_shift - i_strpos)) );
806 goto restart;
807 }
66e933ab 808 /* Have both, check_string is floating */
6eb5f6b9
JH
809 if (t + start_shift >= check_at) /* Contradicts floating=check */
810 goto retry_floating_check;
811 /* Recheck anchored substring, but not floating... */
9041c2e3 812 s = check_at;
5e39e1e5
HS
813 if (!check)
814 goto giveup;
6eb5f6b9 815 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 816 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
817 (long)(other_last - i_strpos)) );
818 goto do_other_anchored;
819 }
60e71179
GS
820 /* Another way we could have checked stclass at the
821 current position only: */
822 if (ml_anch) {
823 s = t = t + 1;
5e39e1e5
HS
824 if (!check)
825 goto giveup;
60e71179 826 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
827 "Looking for /%s^%s/m starting at offset %ld...\n",
828 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 829 goto try_at_offset;
66e933ab 830 }
60e71179
GS
831 if (!prog->float_substr) /* Could have been deleted */
832 goto fail;
6eb5f6b9
JH
833 /* Check is floating subtring. */
834 retry_floating_check:
835 t = check_at - start_shift;
836 DEBUG_r( what = "floating" );
837 goto hop_and_restart;
838 }
839 DEBUG_r( if (t != s)
9041c2e3 840 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
841 "By STCLASS: moving %ld --> %ld\n",
842 (long)(t - i_strpos), (long)(s - i_strpos));
843 else
9041c2e3 844 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
845 "Does not contradict STCLASS...\n") );
846 }
5e39e1e5
HS
847 giveup:
848 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
849 PL_colors[4], (check ? "Guessed" : "Giving up"),
850 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 851 return s;
2c2d71f5
JH
852
853 fail_finish: /* Substring not found */
66e933ab
GS
854 if (prog->check_substr) /* could be removed already */
855 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 856 fail:
2c2d71f5 857 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
858 PL_colors[4],PL_colors[5]));
859 return Nullch;
860}
9661b544 861
6eb5f6b9 862/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
863STATIC char *
864S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 865{
6eb5f6b9
JH
866 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
867 char *m;
d8093b23
G
868 STRLEN ln;
869 unsigned int c1;
870 unsigned int c2;
6eb5f6b9
JH
871 char *e;
872 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 873 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 874
6eb5f6b9
JH
875 /* We know what class it must start with. */
876 switch (OP(c)) {
6eb5f6b9 877 case ANYOF:
a0ed51b3 878 while (s < strend) {
ffc61ed2 879 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
880 if (tmp && (norun || regtry(prog, s)))
881 goto got_it;
882 else
883 tmp = doevery;
a0ed51b3 884 }
6eb5f6b9
JH
885 else
886 tmp = 1;
ffc61ed2 887 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 888 }
6eb5f6b9
JH
889 break;
890 case EXACTF:
891 m = STRING(c);
892 ln = STR_LEN(c);
1aa99e6b
IH
893 if (UTF) {
894 c1 = to_utf8_lower((U8*)m);
895 c2 = to_utf8_upper((U8*)m);
896 }
897 else {
898 c1 = *(U8*)m;
899 c2 = PL_fold[c1];
900 }
6eb5f6b9
JH
901 goto do_exactf;
902 case EXACTFL:
903 m = STRING(c);
904 ln = STR_LEN(c);
d8093b23 905 c1 = *(U8*)m;
6eb5f6b9
JH
906 c2 = PL_fold_locale[c1];
907 do_exactf:
908 e = strend - ln;
b3c9acc1 909
6eb5f6b9
JH
910 if (norun && e < s)
911 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
912
913 if (do_utf8) {
914 STRLEN len;
915 if (c1 == c2)
916 while (s <= e) {
9041c2e3 917 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
918 && regtry(prog, s) )
919 goto got_it;
920 s += len;
921 }
922 else
923 while (s <= e) {
9041c2e3 924 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
925 if ( (c == c1 || c == c2) && regtry(prog, s) )
926 goto got_it;
927 s += len;
928 }
929 }
930 else {
931 if (c1 == c2)
932 while (s <= e) {
933 if ( *(U8*)s == c1
934 && (ln == 1 || !(OP(c) == EXACTF
935 ? ibcmp(s, m, ln)
936 : ibcmp_locale(s, m, ln)))
937 && (norun || regtry(prog, s)) )
938 goto got_it;
939 s++;
940 }
941 else
942 while (s <= e) {
943 if ( (*(U8*)s == c1 || *(U8*)s == c2)
944 && (ln == 1 || !(OP(c) == EXACTF
945 ? ibcmp(s, m, ln)
946 : ibcmp_locale(s, m, ln)))
947 && (norun || regtry(prog, s)) )
948 goto got_it;
949 s++;
950 }
b3c9acc1
IZ
951 }
952 break;
bbce6d69 953 case BOUNDL:
3280af22 954 PL_reg_flags |= RF_tainted;
bbce6d69 955 /* FALL THROUGH */
a0d0e21e 956 case BOUND:
ffc61ed2 957 if (do_utf8) {
12d33761 958 if (s == PL_bostr)
ffc61ed2
JH
959 tmp = '\n';
960 else {
1aa99e6b 961 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 962
2b9d42f0 963 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
964 }
965 tmp = ((OP(c) == BOUND ?
9041c2e3 966 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 967 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
968 while (s < strend) {
969 if (tmp == !(OP(c) == BOUND ?
3568d838 970 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
971 isALNUM_LC_utf8((U8*)s)))
972 {
973 tmp = !tmp;
974 if ((norun || regtry(prog, s)))
975 goto got_it;
976 }
977 s += UTF8SKIP(s);
a687059c 978 }
a0d0e21e 979 }
667bb95a 980 else {
12d33761 981 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
982 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
983 while (s < strend) {
984 if (tmp ==
985 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
986 tmp = !tmp;
987 if ((norun || regtry(prog, s)))
988 goto got_it;
989 }
990 s++;
a0ed51b3 991 }
a0ed51b3 992 }
6eb5f6b9 993 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
994 goto got_it;
995 break;
bbce6d69 996 case NBOUNDL:
3280af22 997 PL_reg_flags |= RF_tainted;
bbce6d69 998 /* FALL THROUGH */
a0d0e21e 999 case NBOUND:
ffc61ed2 1000 if (do_utf8) {
12d33761 1001 if (s == PL_bostr)
ffc61ed2
JH
1002 tmp = '\n';
1003 else {
1aa99e6b 1004 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 1005
2b9d42f0 1006 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
1007 }
1008 tmp = ((OP(c) == NBOUND ?
9041c2e3 1009 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1010 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1011 while (s < strend) {
1012 if (tmp == !(OP(c) == NBOUND ?
3568d838 1013 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1014 isALNUM_LC_utf8((U8*)s)))
1015 tmp = !tmp;
1016 else if ((norun || regtry(prog, s)))
1017 goto got_it;
1018 s += UTF8SKIP(s);
1019 }
a0d0e21e 1020 }
667bb95a 1021 else {
12d33761 1022 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1023 tmp = ((OP(c) == NBOUND ?
1024 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1025 while (s < strend) {
1026 if (tmp ==
1027 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1028 tmp = !tmp;
1029 else if ((norun || regtry(prog, s)))
1030 goto got_it;
1031 s++;
1032 }
a0ed51b3 1033 }
6eb5f6b9 1034 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1035 goto got_it;
1036 break;
a0d0e21e 1037 case ALNUM:
ffc61ed2 1038 if (do_utf8) {
8269fa76 1039 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1040 while (s < strend) {
3568d838 1041 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1042 if (tmp && (norun || regtry(prog, s)))
1043 goto got_it;
1044 else
1045 tmp = doevery;
1046 }
bbce6d69 1047 else
ffc61ed2
JH
1048 tmp = 1;
1049 s += UTF8SKIP(s);
bbce6d69 1050 }
bbce6d69 1051 }
ffc61ed2
JH
1052 else {
1053 while (s < strend) {
1054 if (isALNUM(*s)) {
1055 if (tmp && (norun || regtry(prog, s)))
1056 goto got_it;
1057 else
1058 tmp = doevery;
1059 }
a0ed51b3 1060 else
ffc61ed2
JH
1061 tmp = 1;
1062 s++;
a0ed51b3 1063 }
a0ed51b3
LW
1064 }
1065 break;
bbce6d69 1066 case ALNUML:
3280af22 1067 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1068 if (do_utf8) {
1069 while (s < strend) {
1070 if (isALNUM_LC_utf8((U8*)s)) {
1071 if (tmp && (norun || regtry(prog, s)))
1072 goto got_it;
1073 else
1074 tmp = doevery;
1075 }
a687059c 1076 else
ffc61ed2
JH
1077 tmp = 1;
1078 s += UTF8SKIP(s);
a0d0e21e 1079 }
a0d0e21e 1080 }
ffc61ed2
JH
1081 else {
1082 while (s < strend) {
1083 if (isALNUM_LC(*s)) {
1084 if (tmp && (norun || regtry(prog, s)))
1085 goto got_it;
1086 else
1087 tmp = doevery;
1088 }
a0ed51b3 1089 else
ffc61ed2
JH
1090 tmp = 1;
1091 s++;
a0ed51b3 1092 }
a0ed51b3
LW
1093 }
1094 break;
a0d0e21e 1095 case NALNUM:
ffc61ed2 1096 if (do_utf8) {
8269fa76 1097 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1098 while (s < strend) {
3568d838 1099 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1100 if (tmp && (norun || regtry(prog, s)))
1101 goto got_it;
1102 else
1103 tmp = doevery;
1104 }
bbce6d69 1105 else
ffc61ed2
JH
1106 tmp = 1;
1107 s += UTF8SKIP(s);
bbce6d69 1108 }
bbce6d69 1109 }
ffc61ed2
JH
1110 else {
1111 while (s < strend) {
1112 if (!isALNUM(*s)) {
1113 if (tmp && (norun || regtry(prog, s)))
1114 goto got_it;
1115 else
1116 tmp = doevery;
1117 }
a0ed51b3 1118 else
ffc61ed2
JH
1119 tmp = 1;
1120 s++;
a0ed51b3 1121 }
a0ed51b3
LW
1122 }
1123 break;
bbce6d69 1124 case NALNUML:
3280af22 1125 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1126 if (do_utf8) {
1127 while (s < strend) {
1128 if (!isALNUM_LC_utf8((U8*)s)) {
1129 if (tmp && (norun || regtry(prog, s)))
1130 goto got_it;
1131 else
1132 tmp = doevery;
1133 }
a687059c 1134 else
ffc61ed2
JH
1135 tmp = 1;
1136 s += UTF8SKIP(s);
a687059c 1137 }
a0d0e21e 1138 }
ffc61ed2
JH
1139 else {
1140 while (s < strend) {
1141 if (!isALNUM_LC(*s)) {
1142 if (tmp && (norun || regtry(prog, s)))
1143 goto got_it;
1144 else
1145 tmp = doevery;
1146 }
a0ed51b3 1147 else
ffc61ed2
JH
1148 tmp = 1;
1149 s++;
a0ed51b3 1150 }
a0ed51b3
LW
1151 }
1152 break;
a0d0e21e 1153 case SPACE:
ffc61ed2 1154 if (do_utf8) {
8269fa76 1155 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1156 while (s < strend) {
3568d838 1157 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1158 if (tmp && (norun || regtry(prog, s)))
1159 goto got_it;
1160 else
1161 tmp = doevery;
1162 }
a0d0e21e 1163 else
ffc61ed2
JH
1164 tmp = 1;
1165 s += UTF8SKIP(s);
2304df62 1166 }
a0d0e21e 1167 }
ffc61ed2
JH
1168 else {
1169 while (s < strend) {
1170 if (isSPACE(*s)) {
1171 if (tmp && (norun || regtry(prog, s)))
1172 goto got_it;
1173 else
1174 tmp = doevery;
1175 }
a0ed51b3 1176 else
ffc61ed2
JH
1177 tmp = 1;
1178 s++;
a0ed51b3 1179 }
a0ed51b3
LW
1180 }
1181 break;
bbce6d69 1182 case SPACEL:
3280af22 1183 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1184 if (do_utf8) {
1185 while (s < strend) {
1186 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1187 if (tmp && (norun || regtry(prog, s)))
1188 goto got_it;
1189 else
1190 tmp = doevery;
1191 }
bbce6d69 1192 else
ffc61ed2
JH
1193 tmp = 1;
1194 s += UTF8SKIP(s);
bbce6d69 1195 }
bbce6d69 1196 }
ffc61ed2
JH
1197 else {
1198 while (s < strend) {
1199 if (isSPACE_LC(*s)) {
1200 if (tmp && (norun || regtry(prog, s)))
1201 goto got_it;
1202 else
1203 tmp = doevery;
1204 }
a0ed51b3 1205 else
ffc61ed2
JH
1206 tmp = 1;
1207 s++;
a0ed51b3 1208 }
a0ed51b3
LW
1209 }
1210 break;
a0d0e21e 1211 case NSPACE:
ffc61ed2 1212 if (do_utf8) {
8269fa76 1213 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1214 while (s < strend) {
3568d838 1215 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1216 if (tmp && (norun || regtry(prog, s)))
1217 goto got_it;
1218 else
1219 tmp = doevery;
1220 }
a0d0e21e 1221 else
ffc61ed2
JH
1222 tmp = 1;
1223 s += UTF8SKIP(s);
a687059c 1224 }
a0d0e21e 1225 }
ffc61ed2
JH
1226 else {
1227 while (s < strend) {
1228 if (!isSPACE(*s)) {
1229 if (tmp && (norun || regtry(prog, s)))
1230 goto got_it;
1231 else
1232 tmp = doevery;
1233 }
a0ed51b3 1234 else
ffc61ed2
JH
1235 tmp = 1;
1236 s++;
a0ed51b3 1237 }
a0ed51b3
LW
1238 }
1239 break;
bbce6d69 1240 case NSPACEL:
3280af22 1241 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1242 if (do_utf8) {
1243 while (s < strend) {
1244 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1245 if (tmp && (norun || regtry(prog, s)))
1246 goto got_it;
1247 else
1248 tmp = doevery;
1249 }
bbce6d69 1250 else
ffc61ed2
JH
1251 tmp = 1;
1252 s += UTF8SKIP(s);
bbce6d69 1253 }
bbce6d69 1254 }
ffc61ed2
JH
1255 else {
1256 while (s < strend) {
1257 if (!isSPACE_LC(*s)) {
1258 if (tmp && (norun || regtry(prog, s)))
1259 goto got_it;
1260 else
1261 tmp = doevery;
1262 }
a0ed51b3 1263 else
ffc61ed2
JH
1264 tmp = 1;
1265 s++;
a0ed51b3 1266 }
a0ed51b3
LW
1267 }
1268 break;
a0d0e21e 1269 case DIGIT:
ffc61ed2 1270 if (do_utf8) {
8269fa76 1271 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1272 while (s < strend) {
3568d838 1273 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1274 if (tmp && (norun || regtry(prog, s)))
1275 goto got_it;
1276 else
1277 tmp = doevery;
1278 }
a0d0e21e 1279 else
ffc61ed2
JH
1280 tmp = 1;
1281 s += UTF8SKIP(s);
2b69d0c2 1282 }
a0d0e21e 1283 }
ffc61ed2
JH
1284 else {
1285 while (s < strend) {
1286 if (isDIGIT(*s)) {
1287 if (tmp && (norun || regtry(prog, s)))
1288 goto got_it;
1289 else
1290 tmp = doevery;
1291 }
a0ed51b3 1292 else
ffc61ed2
JH
1293 tmp = 1;
1294 s++;
a0ed51b3 1295 }
a0ed51b3
LW
1296 }
1297 break;
b8c5462f
JH
1298 case DIGITL:
1299 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1300 if (do_utf8) {
1301 while (s < strend) {
1302 if (isDIGIT_LC_utf8((U8*)s)) {
1303 if (tmp && (norun || regtry(prog, s)))
1304 goto got_it;
1305 else
1306 tmp = doevery;
1307 }
b8c5462f 1308 else
ffc61ed2
JH
1309 tmp = 1;
1310 s += UTF8SKIP(s);
b8c5462f 1311 }
b8c5462f 1312 }
ffc61ed2
JH
1313 else {
1314 while (s < strend) {
1315 if (isDIGIT_LC(*s)) {
1316 if (tmp && (norun || regtry(prog, s)))
1317 goto got_it;
1318 else
1319 tmp = doevery;
1320 }
b8c5462f 1321 else
ffc61ed2
JH
1322 tmp = 1;
1323 s++;
b8c5462f 1324 }
b8c5462f
JH
1325 }
1326 break;
a0d0e21e 1327 case NDIGIT:
ffc61ed2 1328 if (do_utf8) {
8269fa76 1329 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1330 while (s < strend) {
3568d838 1331 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1332 if (tmp && (norun || regtry(prog, s)))
1333 goto got_it;
1334 else
1335 tmp = doevery;
1336 }
a0d0e21e 1337 else
ffc61ed2
JH
1338 tmp = 1;
1339 s += UTF8SKIP(s);
a687059c 1340 }
a0d0e21e 1341 }
ffc61ed2
JH
1342 else {
1343 while (s < strend) {
1344 if (!isDIGIT(*s)) {
1345 if (tmp && (norun || regtry(prog, s)))
1346 goto got_it;
1347 else
1348 tmp = doevery;
1349 }
a0ed51b3 1350 else
ffc61ed2
JH
1351 tmp = 1;
1352 s++;
a0ed51b3 1353 }
a0ed51b3
LW
1354 }
1355 break;
b8c5462f
JH
1356 case NDIGITL:
1357 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1358 if (do_utf8) {
1359 while (s < strend) {
1360 if (!isDIGIT_LC_utf8((U8*)s)) {
1361 if (tmp && (norun || regtry(prog, s)))
1362 goto got_it;
1363 else
1364 tmp = doevery;
1365 }
b8c5462f 1366 else
ffc61ed2
JH
1367 tmp = 1;
1368 s += UTF8SKIP(s);
b8c5462f 1369 }
a0ed51b3 1370 }
ffc61ed2
JH
1371 else {
1372 while (s < strend) {
1373 if (!isDIGIT_LC(*s)) {
1374 if (tmp && (norun || regtry(prog, s)))
1375 goto got_it;
1376 else
1377 tmp = doevery;
1378 }
cf93c79d 1379 else
ffc61ed2
JH
1380 tmp = 1;
1381 s++;
b8c5462f 1382 }
b8c5462f
JH
1383 }
1384 break;
b3c9acc1 1385 default:
3c3eec57
GS
1386 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1387 break;
d6a28714 1388 }
6eb5f6b9
JH
1389 return 0;
1390 got_it:
1391 return s;
1392}
1393
1394/*
1395 - regexec_flags - match a regexp against a string
1396 */
1397I32
1398Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1399 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1400/* strend: pointer to null at end of string */
1401/* strbeg: real beginning of string */
1402/* minend: end of match must be >=minend after stringarg. */
1403/* data: May be used for some additional optimizations. */
1404/* nosave: For optimizations. */
1405{
6eb5f6b9
JH
1406 register char *s;
1407 register regnode *c;
1408 register char *startpos = stringarg;
6eb5f6b9
JH
1409 I32 minlen; /* must match at least this many chars */
1410 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1411 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1412 constant substr. */ /* CC */
1413 I32 end_shift = 0; /* Same for the end. */ /* CC */
1414 I32 scream_pos = -1; /* Internal iterator of scream. */
1415 char *scream_olds;
1416 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1417 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1418
1419 PL_regcc = 0;
1420
1421 cache_re(prog);
1422#ifdef DEBUGGING
aea4f609 1423 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1424#endif
1425
1426 /* Be paranoid... */
1427 if (prog == NULL || startpos == NULL) {
1428 Perl_croak(aTHX_ "NULL regexp parameter");
1429 return 0;
1430 }
1431
1432 minlen = prog->minlen;
1aa99e6b 1433 if (do_utf8) {
3baa4c62
JH
1434 if (!(prog->reganch & ROPT_SANY_SEEN))
1435 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1436 }
1437 else {
1438 if (strend - startpos < minlen) goto phooey;
1439 }
6eb5f6b9 1440
6eb5f6b9
JH
1441 /* Check validity of program. */
1442 if (UCHARAT(prog->program) != REG_MAGIC) {
1443 Perl_croak(aTHX_ "corrupted regexp program");
1444 }
1445
1446 PL_reg_flags = 0;
1447 PL_reg_eval_set = 0;
1448 PL_reg_maxiter = 0;
1449
1450 if (prog->reganch & ROPT_UTF8)
1451 PL_reg_flags |= RF_utf8;
1452
1453 /* Mark beginning of line for ^ and lookbehind. */
1454 PL_regbol = startpos;
1455 PL_bostr = strbeg;
1456 PL_reg_sv = sv;
1457
1458 /* Mark end of line for $ (and such) */
1459 PL_regeol = strend;
1460
1461 /* see how far we have to get to not match where we matched before */
1462 PL_regtill = startpos+minend;
1463
1464 /* We start without call_cc context. */
1465 PL_reg_call_cc = 0;
1466
1467 /* If there is a "must appear" string, look for it. */
1468 s = startpos;
1469
1470 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1471 MAGIC *mg;
1472
1473 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1474 PL_reg_ganch = startpos;
1475 else if (sv && SvTYPE(sv) >= SVt_PVMG
1476 && SvMAGIC(sv)
14befaf4
DM
1477 && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1478 && mg->mg_len >= 0) {
6eb5f6b9
JH
1479 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1480 if (prog->reganch & ROPT_ANCH_GPOS) {
1481 if (s > PL_reg_ganch)
1482 goto phooey;
1483 s = PL_reg_ganch;
1484 }
1485 }
1486 else /* pos() not defined */
1487 PL_reg_ganch = strbeg;
1488 }
1489
699c3c34
JH
1490 if (do_utf8 == (UTF!=0) &&
1491 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1492 re_scream_pos_data d;
1493
1494 d.scream_olds = &scream_olds;
1495 d.scream_pos = &scream_pos;
1496 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1497 if (!s)
1498 goto phooey; /* not present */
1499 }
1500
1501 DEBUG_r( if (!PL_colorset) reginitcolors() );
1502 DEBUG_r(PerlIO_printf(Perl_debug_log,
1503 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1504 PL_colors[4],PL_colors[5],PL_colors[0],
1505 prog->precomp,
1506 PL_colors[1],
1507 (strlen(prog->precomp) > 60 ? "..." : ""),
1508 PL_colors[0],
1509 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1510 startpos, PL_colors[1],
1511 (strend - startpos > 60 ? "..." : ""))
1512 );
1513
1514 /* Simplest case: anchored match need be tried only once. */
1515 /* [unless only anchor is BOL and multiline is set] */
1516 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1517 if (s == startpos && regtry(prog, startpos))
1518 goto got_it;
1519 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1520 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1521 {
1522 char *end;
1523
1524 if (minlen)
1525 dontbother = minlen - 1;
1aa99e6b 1526 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1527 /* for multiline we only have to try after newlines */
1528 if (prog->check_substr) {
1529 if (s == startpos)
1530 goto after_try;
1531 while (1) {
1532 if (regtry(prog, s))
1533 goto got_it;
1534 after_try:
1535 if (s >= end)
1536 goto phooey;
1537 if (prog->reganch & RE_USE_INTUIT) {
1538 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1539 if (!s)
1540 goto phooey;
1541 }
1542 else
1543 s++;
1544 }
1545 } else {
1546 if (s > startpos)
1547 s--;
1548 while (s < end) {
1549 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1550 if (regtry(prog, s))
1551 goto got_it;
1552 }
1553 }
1554 }
1555 }
1556 goto phooey;
1557 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1558 if (regtry(prog, PL_reg_ganch))
1559 goto got_it;
1560 goto phooey;
1561 }
1562
1563 /* Messy cases: unanchored match. */
9041c2e3 1564 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1565 /* we have /x+whatever/ */
1566 /* it must be a one character string (XXXX Except UTF?) */
1567 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1568#ifdef DEBUGGING
1569 int did_match = 0;
1570#endif
1571
1aa99e6b 1572 if (do_utf8) {
6eb5f6b9
JH
1573 while (s < strend) {
1574 if (*s == ch) {
bf93d4cc 1575 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1576 if (regtry(prog, s)) goto got_it;
1577 s += UTF8SKIP(s);
1578 while (s < strend && *s == ch)
1579 s += UTF8SKIP(s);
1580 }
1581 s += UTF8SKIP(s);
1582 }
1583 }
1584 else {
1585 while (s < strend) {
1586 if (*s == ch) {
bf93d4cc 1587 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1588 if (regtry(prog, s)) goto got_it;
1589 s++;
1590 while (s < strend && *s == ch)
1591 s++;
1592 }
1593 s++;
1594 }
1595 }
bf93d4cc
GS
1596 DEBUG_r(did_match ||
1597 PerlIO_printf(Perl_debug_log,
1598 "Did not find anchored character...\n"));
6eb5f6b9
JH
1599 }
1600 /*SUPPRESS 560*/
1aa99e6b
IH
1601 else if (do_utf8 == (UTF!=0) &&
1602 (prog->anchored_substr != Nullsv
9041c2e3 1603 || (prog->float_substr != Nullsv
1aa99e6b 1604 && prog->float_max_offset < strend - s))) {
9041c2e3 1605 SV *must = prog->anchored_substr
6eb5f6b9 1606 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1607 I32 back_max =
6eb5f6b9 1608 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1609 I32 back_min =
6eb5f6b9 1610 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1611 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1612 -(I32)(CHR_SVLEN(must)
1aa99e6b 1613 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1614 char *last1; /* Last position checked before */
bf93d4cc
GS
1615#ifdef DEBUGGING
1616 int did_match = 0;
1617#endif
6eb5f6b9
JH
1618
1619 if (s > PL_bostr)
1620 last1 = HOPc(s, -1);
1621 else
1622 last1 = s - 1; /* bogus */
1623
1624 /* XXXX check_substr already used to find `s', can optimize if
1625 check_substr==must. */
1626 scream_pos = -1;
1627 dontbother = end_shift;
1628 strend = HOPc(strend, -dontbother);
1629 while ( (s <= last) &&
9041c2e3 1630 ((flags & REXEC_SCREAM)
1aa99e6b 1631 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1632 end_shift, &scream_pos, 0))
1aa99e6b 1633 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1634 (unsigned char*)strend, must,
6eb5f6b9 1635 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1636 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1637 if (HOPc(s, -back_max) > last1) {
1638 last1 = HOPc(s, -back_min);
1639 s = HOPc(s, -back_max);
1640 }
1641 else {
1642 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1643
1644 last1 = HOPc(s, -back_min);
1645 s = t;
1646 }
1aa99e6b 1647 if (do_utf8) {
6eb5f6b9
JH
1648 while (s <= last1) {
1649 if (regtry(prog, s))
1650 goto got_it;
1651 s += UTF8SKIP(s);
1652 }
1653 }
1654 else {
1655 while (s <= last1) {
1656 if (regtry(prog, s))
1657 goto got_it;
1658 s++;
1659 }
1660 }
1661 }
bf93d4cc
GS
1662 DEBUG_r(did_match ||
1663 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1664 ((must == prog->anchored_substr)
1665 ? "anchored" : "floating"),
1666 PL_colors[0],
1667 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1668 SvPVX(must),
1669 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1670 goto phooey;
1671 }
155aba94 1672 else if ((c = prog->regstclass)) {
66e933ab
GS
1673 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1674 /* don't bother with what can't match */
6eb5f6b9 1675 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1676 DEBUG_r({
1677 SV *prop = sv_newmortal();
1678 regprop(prop, c);
1679 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1680 });
6eb5f6b9
JH
1681 if (find_byclass(prog, c, s, strend, startpos, 0))
1682 goto got_it;
bf93d4cc 1683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1684 }
1685 else {
1686 dontbother = 0;
1687 if (prog->float_substr != Nullsv) { /* Trim the end. */
1688 char *last;
d6a28714
JH
1689
1690 if (flags & REXEC_SCREAM) {
1691 last = screaminstr(sv, prog->float_substr, s - strbeg,
1692 end_shift, &scream_pos, 1); /* last one */
1693 if (!last)
ffc61ed2 1694 last = scream_olds; /* Only one occurrence. */
b8c5462f 1695 }
d6a28714
JH
1696 else {
1697 STRLEN len;
1698 char *little = SvPV(prog->float_substr, len);
1699
1700 if (SvTAIL(prog->float_substr)) {
1701 if (memEQ(strend - len + 1, little, len - 1))
1702 last = strend - len + 1;
1703 else if (!PL_multiline)
9041c2e3 1704 last = memEQ(strend - len, little, len)
d6a28714 1705 ? strend - len : Nullch;
b8c5462f 1706 else
d6a28714
JH
1707 goto find_last;
1708 } else {
1709 find_last:
9041c2e3 1710 if (len)
d6a28714 1711 last = rninstr(s, strend, little, little + len);
b8c5462f 1712 else
d6a28714 1713 last = strend; /* matching `$' */
b8c5462f 1714 }
b8c5462f 1715 }
bf93d4cc
GS
1716 if (last == NULL) {
1717 DEBUG_r(PerlIO_printf(Perl_debug_log,
1718 "%sCan't trim the tail, match fails (should not happen)%s\n",
1719 PL_colors[4],PL_colors[5]));
1720 goto phooey; /* Should not happen! */
1721 }
d6a28714
JH
1722 dontbother = strend - last + prog->float_min_offset;
1723 }
1724 if (minlen && (dontbother < minlen))
1725 dontbother = minlen - 1;
1726 strend -= dontbother; /* this one's always in bytes! */
1727 /* We don't know much -- general case. */
1aa99e6b 1728 if (do_utf8) {
d6a28714
JH
1729 for (;;) {
1730 if (regtry(prog, s))
1731 goto got_it;
1732 if (s >= strend)
1733 break;
b8c5462f 1734 s += UTF8SKIP(s);
d6a28714
JH
1735 };
1736 }
1737 else {
1738 do {
1739 if (regtry(prog, s))
1740 goto got_it;
1741 } while (s++ < strend);
1742 }
1743 }
1744
1745 /* Failure. */
1746 goto phooey;
1747
1748got_it:
1749 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1750
1751 if (PL_reg_eval_set) {
1752 /* Preserve the current value of $^R */
1753 if (oreplsv != GvSV(PL_replgv))
1754 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1755 restored, the value remains
1756 the same. */
1757 restore_pos(aTHXo_ 0);
1758 }
1759
1760 /* make sure $`, $&, $', and $digit will work later */
1761 if ( !(flags & REXEC_NOT_FIRST) ) {
1762 if (RX_MATCH_COPIED(prog)) {
1763 Safefree(prog->subbeg);
1764 RX_MATCH_COPIED_off(prog);
1765 }
1766 if (flags & REXEC_COPY_STR) {
1767 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1768
1769 s = savepvn(strbeg, i);
1770 prog->subbeg = s;
1771 prog->sublen = i;
1772 RX_MATCH_COPIED_on(prog);
1773 }
1774 else {
1775 prog->subbeg = strbeg;
1776 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1777 }
1778 }
9041c2e3 1779
d6a28714
JH
1780 return 1;
1781
1782phooey:
bf93d4cc
GS
1783 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1784 PL_colors[4],PL_colors[5]));
d6a28714
JH
1785 if (PL_reg_eval_set)
1786 restore_pos(aTHXo_ 0);
1787 return 0;
1788}
1789
1790/*
1791 - regtry - try match at specific point
1792 */
1793STATIC I32 /* 0 failure, 1 success */
1794S_regtry(pTHX_ regexp *prog, char *startpos)
1795{
d6a28714
JH
1796 register I32 i;
1797 register I32 *sp;
1798 register I32 *ep;
1799 CHECKPOINT lastcp;
1800
02db2b7b
IZ
1801#ifdef DEBUGGING
1802 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1803#endif
d6a28714
JH
1804 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1805 MAGIC *mg;
1806
1807 PL_reg_eval_set = RS_init;
1808 DEBUG_r(DEBUG_s(
b900a521
JH
1809 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1810 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1811 ));
e8347627 1812 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1813 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1814 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1815 SAVETMPS;
1816 /* Apparently this is not needed, judging by wantarray. */
e8347627 1817 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1818 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1819
1820 if (PL_reg_sv) {
1821 /* Make $_ available to executed code. */
1822 if (PL_reg_sv != DEFSV) {
1823 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1824 SAVESPTR(DEFSV);
1825 DEFSV = PL_reg_sv;
b8c5462f 1826 }
d6a28714 1827
9041c2e3 1828 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
14befaf4 1829 && (mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global)))) {
d6a28714 1830 /* prepare for quick setting of pos */
14befaf4
DM
1831 sv_magic(PL_reg_sv, (SV*)0,
1832 PERL_MAGIC_regex_global, Nullch, 0);
1833 mg = mg_find(PL_reg_sv, PERL_MAGIC_regex_global);
d6a28714 1834 mg->mg_len = -1;
b8c5462f 1835 }
d6a28714
JH
1836 PL_reg_magic = mg;
1837 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1838 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1839 }
1840 if (!PL_reg_curpm)
0f79a09d 1841 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1842 PL_reg_curpm->op_pmregexp = prog;
1843 PL_reg_oldcurpm = PL_curpm;
1844 PL_curpm = PL_reg_curpm;
1845 if (RX_MATCH_COPIED(prog)) {
1846 /* Here is a serious problem: we cannot rewrite subbeg,
1847 since it may be needed if this match fails. Thus
1848 $` inside (?{}) could fail... */
1849 PL_reg_oldsaved = prog->subbeg;
1850 PL_reg_oldsavedlen = prog->sublen;
1851 RX_MATCH_COPIED_off(prog);
1852 }
1853 else
1854 PL_reg_oldsaved = Nullch;
1855 prog->subbeg = PL_bostr;
1856 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1857 }
1858 prog->startp[0] = startpos - PL_bostr;
1859 PL_reginput = startpos;
1860 PL_regstartp = prog->startp;
1861 PL_regendp = prog->endp;
1862 PL_reglastparen = &prog->lastparen;
1863 prog->lastparen = 0;
1864 PL_regsize = 0;
1865 DEBUG_r(PL_reg_starttry = startpos);
1866 if (PL_reg_start_tmpl <= prog->nparens) {
1867 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1868 if(PL_reg_start_tmp)
1869 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1870 else
1871 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1872 }
1873
1874 /* XXXX What this code is doing here?!!! There should be no need
1875 to do this again and again, PL_reglastparen should take care of
3dd2943c 1876 this! --ilya*/
dafc8851
JH
1877
1878 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1879 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1880 * PL_reglastparen), is not needed at all by the test suite
1881 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1882 * enough, for building DynaLoader, or otherwise this
1883 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1884 * will happen. Meanwhile, this code *is* needed for the
1885 * above-mentioned test suite tests to succeed. The common theme
1886 * on those tests seems to be returning null fields from matches.
1887 * --jhi */
dafc8851 1888#if 1
d6a28714
JH
1889 sp = prog->startp;
1890 ep = prog->endp;
1891 if (prog->nparens) {
09e8ae3b 1892 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1893 *++sp = -1;
1894 *++ep = -1;
1895 }
1896 }
dafc8851 1897#endif
02db2b7b 1898 REGCP_SET(lastcp);
d6a28714
JH
1899 if (regmatch(prog->program + 1)) {
1900 prog->endp[0] = PL_reginput - PL_bostr;
1901 return 1;
1902 }
02db2b7b 1903 REGCP_UNWIND(lastcp);
d6a28714
JH
1904 return 0;
1905}
1906
02db2b7b
IZ
1907#define RE_UNWIND_BRANCH 1
1908#define RE_UNWIND_BRANCHJ 2
1909
1910union re_unwind_t;
1911
1912typedef struct { /* XX: makes sense to enlarge it... */
1913 I32 type;
1914 I32 prev;
1915 CHECKPOINT lastcp;
1916} re_unwind_generic_t;
1917
1918typedef struct {
1919 I32 type;
1920 I32 prev;
1921 CHECKPOINT lastcp;
1922 I32 lastparen;
1923 regnode *next;
1924 char *locinput;
1925 I32 nextchr;
1926#ifdef DEBUGGING
1927 int regindent;
1928#endif
1929} re_unwind_branch_t;
1930
1931typedef union re_unwind_t {
1932 I32 type;
1933 re_unwind_generic_t generic;
1934 re_unwind_branch_t branch;
1935} re_unwind_t;
1936
d6a28714
JH
1937/*
1938 - regmatch - main matching routine
1939 *
1940 * Conceptually the strategy is simple: check to see whether the current
1941 * node matches, call self recursively to see whether the rest matches,
1942 * and then act accordingly. In practice we make some effort to avoid
1943 * recursion, in particular by going through "ordinary" nodes (that don't
1944 * need to know whether the rest of the match failed) by a loop instead of
1945 * by recursion.
1946 */
1947/* [lwall] I've hoisted the register declarations to the outer block in order to
1948 * maybe save a little bit of pushing and popping on the stack. It also takes
1949 * advantage of machines that use a register save mask on subroutine entry.
1950 */
1951STATIC I32 /* 0 failure, 1 success */
1952S_regmatch(pTHX_ regnode *prog)
1953{
d6a28714
JH
1954 register regnode *scan; /* Current node. */
1955 regnode *next; /* Next node. */
1956 regnode *inner; /* Next node in internal branch. */
1957 register I32 nextchr; /* renamed nextchr - nextchar colides with
1958 function of same name */
1959 register I32 n; /* no or next */
1960 register I32 ln; /* len or last */
1961 register char *s; /* operand or save */
1962 register char *locinput = PL_reginput;
1963 register I32 c1, c2, paren; /* case fold search, parenth */
1964 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1965 I32 unwind = 0;
1966 I32 firstcp = PL_savestack_ix;
ffc61ed2 1967 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1968
d6a28714
JH
1969#ifdef DEBUGGING
1970 PL_regindent++;
1971#endif
1972
1973 /* Note that nextchr is a byte even in UTF */
1974 nextchr = UCHARAT(locinput);
1975 scan = prog;
1976 while (scan != NULL) {
1977#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1978#if 1
d6a28714
JH
1979# define sayYES goto yes
1980# define sayNO goto no
7821416a
IZ
1981# define sayYES_FINAL goto yes_final
1982# define sayYES_LOUD goto yes_loud
1983# define sayNO_FINAL goto no_final
1984# define sayNO_SILENT goto do_no
d6a28714
JH
1985# define saySAME(x) if (x) goto yes; else goto no
1986# define REPORT_CODE_OFF 24
1987#else
1988# define sayYES return 1
1989# define sayNO return 0
7821416a
IZ
1990# define sayYES_FINAL return 1
1991# define sayYES_LOUD return 1
1992# define sayNO_FINAL return 0
1993# define sayNO_SILENT return 0
d6a28714
JH
1994# define saySAME(x) return x
1995#endif
1996 DEBUG_r( {
1997 SV *prop = sv_newmortal();
1998 int docolor = *PL_colors[0];
1999 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2000 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2001 /* The part of the string before starttry has one color
2002 (pref0_len chars), between starttry and current
2003 position another one (pref_len - pref0_len chars),
2004 after the current position the third one.
2005 We assume that pref0_len <= pref_len, otherwise we
2006 decrease pref0_len. */
9041c2e3 2007 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2008 ? (5 + taill) - l : locinput - PL_bostr;
2009 int pref0_len;
d6a28714 2010
1aa99e6b
IH
2011 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2012 pref_len++;
2013 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2014 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2015 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2016 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2017 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2018 l--;
d6a28714
JH
2019 if (pref0_len < 0)
2020 pref0_len = 0;
2021 if (pref0_len > pref_len)
2022 pref0_len = pref_len;
2023 regprop(prop, scan);
9041c2e3 2024 PerlIO_printf(Perl_debug_log,
b900a521 2025 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2026 (IV)(locinput - PL_bostr),
2027 PL_colors[4], pref0_len,
d6a28714 2028 locinput - pref_len, PL_colors[5],
9041c2e3 2029 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2030 locinput - pref_len + pref0_len, PL_colors[3],
2031 (docolor ? "" : "> <"),
2032 PL_colors[0], l, locinput, PL_colors[1],
2033 15 - l - pref_len + 1,
2034 "",
b900a521 2035 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2036 SvPVX(prop));
2037 } );
2038
2039 next = scan + NEXT_OFF(scan);
2040 if (next == scan)
2041 next = NULL;
2042
2043 switch (OP(scan)) {
2044 case BOL:
12d33761
HS
2045 if (locinput == PL_bostr || (PL_multiline &&
2046 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2047 {
2048 /* regtill = regbol; */
b8c5462f
JH
2049 break;
2050 }
d6a28714
JH
2051 sayNO;
2052 case MBOL:
12d33761
HS
2053 if (locinput == PL_bostr ||
2054 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2055 {
b8c5462f
JH
2056 break;
2057 }
d6a28714
JH
2058 sayNO;
2059 case SBOL:
c2a73568 2060 if (locinput == PL_bostr)
b8c5462f 2061 break;
d6a28714
JH
2062 sayNO;
2063 case GPOS:
2064 if (locinput == PL_reg_ganch)
2065 break;
2066 sayNO;
2067 case EOL:
2068 if (PL_multiline)
2069 goto meol;
2070 else
2071 goto seol;
2072 case MEOL:
2073 meol:
2074 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2075 sayNO;
b8c5462f 2076 break;
d6a28714
JH
2077 case SEOL:
2078 seol:
2079 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2080 sayNO;
d6a28714 2081 if (PL_regeol - locinput > 1)
b8c5462f 2082 sayNO;
b8c5462f 2083 break;
d6a28714
JH
2084 case EOS:
2085 if (PL_regeol != locinput)
b8c5462f 2086 sayNO;
d6a28714 2087 break;
ffc61ed2 2088 case SANY:
d6a28714 2089 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2090 sayNO;
b8c5462f 2091 nextchr = UCHARAT(++locinput);
a0d0e21e 2092 break;
ffc61ed2 2093 case REG_ANY:
1aa99e6b
IH
2094 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2095 sayNO;
2096 if (do_utf8) {
b8c5462f 2097 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2098 if (locinput > PL_regeol)
2099 sayNO;
a0ed51b3 2100 nextchr = UCHARAT(locinput);
a0ed51b3 2101 }
1aa99e6b
IH
2102 else
2103 nextchr = UCHARAT(++locinput);
a0ed51b3 2104 break;
d6a28714 2105 case EXACT:
cd439c50
IZ
2106 s = STRING(scan);
2107 ln = STR_LEN(scan);
1aa99e6b
IH
2108 if (do_utf8 != (UTF!=0)) {
2109 char *l = locinput;
2110 char *e = s + ln;
2111 STRLEN len;
2112 if (do_utf8)
2113 while (s < e) {
2114 if (l >= PL_regeol)
2115 sayNO;
9041c2e3 2116 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2117 sayNO;
2118 s++;
2119 l += len;
2120 }
2121 else
2122 while (s < e) {
2123 if (l >= PL_regeol)
2124 sayNO;
9041c2e3 2125 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2126 sayNO;
2127 s += len;
2128 l++;
2129 }
2130 locinput = l;
2131 nextchr = UCHARAT(locinput);
2132 break;
2133 }
d6a28714
JH
2134 /* Inline the first character, for speed. */
2135 if (UCHARAT(s) != nextchr)
2136 sayNO;
2137 if (PL_regeol - locinput < ln)
2138 sayNO;
2139 if (ln > 1 && memNE(s, locinput, ln))
2140 sayNO;
2141 locinput += ln;
2142 nextchr = UCHARAT(locinput);
2143 break;
2144 case EXACTFL:
b8c5462f
JH
2145 PL_reg_flags |= RF_tainted;
2146 /* FALL THROUGH */
d6a28714 2147 case EXACTF:
cd439c50
IZ
2148 s = STRING(scan);
2149 ln = STR_LEN(scan);
d6a28714 2150
1aa99e6b 2151 if (do_utf8) {
d6a28714 2152 char *l = locinput;
1aa99e6b
IH
2153 char *e;
2154 e = s + ln;
d6a28714
JH
2155 c1 = OP(scan) == EXACTF;
2156 while (s < e) {
1aa99e6b 2157 if (l >= PL_regeol) {
d6a28714
JH
2158 sayNO;
2159 }
2b9d42f0 2160 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2161 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2162 sayNO;
2163 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2164 l += UTF8SKIP(l);
b8c5462f 2165 }
d6a28714 2166 locinput = l;
a0ed51b3
LW
2167 nextchr = UCHARAT(locinput);
2168 break;
2169 }
d6a28714
JH
2170
2171 /* Inline the first character, for speed. */
2172 if (UCHARAT(s) != nextchr &&
2173 UCHARAT(s) != ((OP(scan) == EXACTF)
2174 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2175 sayNO;
d6a28714 2176 if (PL_regeol - locinput < ln)
b8c5462f 2177 sayNO;
d6a28714
JH
2178 if (ln > 1 && (OP(scan) == EXACTF
2179 ? ibcmp(s, locinput, ln)
2180 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2181 sayNO;
d6a28714
JH
2182 locinput += ln;
2183 nextchr = UCHARAT(locinput);
a0d0e21e 2184 break;
d6a28714 2185 case ANYOF:
ffc61ed2
JH
2186 if (do_utf8) {
2187 if (!reginclass(scan, (U8*)locinput, do_utf8))
2188 sayNO;
2189 if (locinput >= PL_regeol)
2190 sayNO;
2191 locinput += PL_utf8skip[nextchr];
b8c5462f 2192 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2193 }
2194 else {
2195 if (nextchr < 0)
2196 nextchr = UCHARAT(locinput);
2197 if (!reginclass(scan, (U8*)locinput, do_utf8))
2198 sayNO;
2199 if (!nextchr && locinput >= PL_regeol)
2200 sayNO;
2201 nextchr = UCHARAT(++locinput);
2202 }
b8c5462f 2203 break;
d6a28714 2204 case ALNUML:
b8c5462f
JH
2205 PL_reg_flags |= RF_tainted;
2206 /* FALL THROUGH */
d6a28714 2207 case ALNUM:
b8c5462f 2208 if (!nextchr)
4633a7c4 2209 sayNO;
ffc61ed2
JH
2210 if (do_utf8) {
2211 if (!(OP(scan) == ALNUM
3568d838 2212 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2213 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2214 {
2215 sayNO;
a0ed51b3 2216 }
b8c5462f 2217 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2218 nextchr = UCHARAT(locinput);
2219 break;
2220 }
ffc61ed2 2221 if (!(OP(scan) == ALNUM
d6a28714 2222 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2223 sayNO;
b8c5462f 2224 nextchr = UCHARAT(++locinput);
a0d0e21e 2225 break;
d6a28714 2226 case NALNUML:
b8c5462f
JH
2227 PL_reg_flags |= RF_tainted;
2228 /* FALL THROUGH */
d6a28714
JH
2229 case NALNUM:
2230 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2231 sayNO;
ffc61ed2 2232 if (do_utf8) {
8269fa76 2233 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2234 if (OP(scan) == NALNUM
3568d838 2235 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2236 : isALNUM_LC_utf8((U8*)locinput))
2237 {
b8c5462f 2238 sayNO;
d6a28714 2239 }
b8c5462f
JH
2240 locinput += PL_utf8skip[nextchr];
2241 nextchr = UCHARAT(locinput);
2242 break;
2243 }
ffc61ed2 2244 if (OP(scan) == NALNUM
d6a28714 2245 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2246 sayNO;
76e3520e 2247 nextchr = UCHARAT(++locinput);
a0d0e21e 2248 break;
d6a28714
JH
2249 case BOUNDL:
2250 case NBOUNDL:
3280af22 2251 PL_reg_flags |= RF_tainted;
bbce6d69 2252 /* FALL THROUGH */
d6a28714
JH
2253 case BOUND:
2254 case NBOUND:
2255 /* was last char in word? */
ffc61ed2 2256 if (do_utf8) {
12d33761
HS
2257 if (locinput == PL_bostr)
2258 ln = '\n';
ffc61ed2
JH
2259 else {
2260 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2261
2b9d42f0 2262 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2263 }
2264 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2265 ln = isALNUM_uni(ln);
8269fa76 2266 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2267 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2268 }
2269 else {
9041c2e3 2270 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2271 n = isALNUM_LC_utf8((U8*)locinput);
2272 }
a0ed51b3 2273 }
d6a28714 2274 else {
12d33761
HS
2275 ln = (locinput != PL_bostr) ?
2276 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2277 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2278 ln = isALNUM(ln);
2279 n = isALNUM(nextchr);
2280 }
2281 else {
2282 ln = isALNUM_LC(ln);
2283 n = isALNUM_LC(nextchr);
2284 }
d6a28714 2285 }
ffc61ed2
JH
2286 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2287 OP(scan) == BOUNDL))
2288 sayNO;
a0ed51b3 2289 break;
d6a28714 2290 case SPACEL:
3280af22 2291 PL_reg_flags |= RF_tainted;
bbce6d69 2292 /* FALL THROUGH */
d6a28714 2293 case SPACE:
9442cb0e 2294 if (!nextchr)
4633a7c4 2295 sayNO;
1aa99e6b 2296 if (do_utf8) {
fd400ab9 2297 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2298 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2299 if (!(OP(scan) == SPACE
3568d838 2300 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2301 : isSPACE_LC_utf8((U8*)locinput)))
2302 {
2303 sayNO;
2304 }
2305 locinput += PL_utf8skip[nextchr];
2306 nextchr = UCHARAT(locinput);
2307 break;
d6a28714 2308 }
ffc61ed2
JH
2309 if (!(OP(scan) == SPACE
2310 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2311 sayNO;
2312 nextchr = UCHARAT(++locinput);
2313 }
2314 else {
2315 if (!(OP(scan) == SPACE
2316 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2317 sayNO;
2318 nextchr = UCHARAT(++locinput);
a0ed51b3 2319 }
a0ed51b3 2320 break;
d6a28714 2321 case NSPACEL:
3280af22 2322 PL_reg_flags |= RF_tainted;
bbce6d69 2323 /* FALL THROUGH */
d6a28714 2324 case NSPACE:
9442cb0e 2325 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2326 sayNO;
1aa99e6b 2327 if (do_utf8) {
8269fa76 2328 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2329 if (OP(scan) == NSPACE
3568d838 2330 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2331 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2332 {
2333 sayNO;
2334 }
2335 locinput += PL_utf8skip[nextchr];
2336 nextchr = UCHARAT(locinput);
2337 break;
a0ed51b3 2338 }
ffc61ed2 2339 if (OP(scan) == NSPACE
d6a28714 2340 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2341 sayNO;
76e3520e 2342 nextchr = UCHARAT(++locinput);
a0d0e21e 2343 break;
d6a28714 2344 case DIGITL:
a0ed51b3
LW
2345 PL_reg_flags |= RF_tainted;
2346 /* FALL THROUGH */
d6a28714 2347 case DIGIT:
9442cb0e 2348 if (!nextchr)
a0ed51b3 2349 sayNO;
1aa99e6b 2350 if (do_utf8) {
8269fa76 2351 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2352 if (!(OP(scan) == DIGIT
3568d838 2353 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2354 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2355 {
a0ed51b3 2356 sayNO;
dfe13c55 2357 }
6f06b55f 2358 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2359 nextchr = UCHARAT(locinput);
2360 break;
2361 }
ffc61ed2 2362 if (!(OP(scan) == DIGIT
9442cb0e 2363 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2364 sayNO;
2365 nextchr = UCHARAT(++locinput);
2366 break;
d6a28714 2367 case NDIGITL:
b8c5462f
JH
2368 PL_reg_flags |= RF_tainted;
2369 /* FALL THROUGH */
d6a28714 2370 case NDIGIT:
9442cb0e 2371 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2372 sayNO;
1aa99e6b 2373 if (do_utf8) {
8269fa76 2374 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2375 if (OP(scan) == NDIGIT
3568d838 2376 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2377 : isDIGIT_LC_utf8((U8*)locinput))
2378 {
a0ed51b3 2379 sayNO;
9442cb0e 2380 }
6f06b55f 2381 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2382 nextchr = UCHARAT(locinput);
2383 break;
2384 }
ffc61ed2 2385 if (OP(scan) == NDIGIT
9442cb0e 2386 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2387 sayNO;
2388 nextchr = UCHARAT(++locinput);
2389 break;
2390 case CLUMP:
8269fa76 2391 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2392 if (locinput >= PL_regeol ||
2393 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2394 sayNO;
6f06b55f 2395 locinput += PL_utf8skip[nextchr];
3568d838
JH
2396 while (locinput < PL_regeol &&
2397 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2398 locinput += UTF8SKIP(locinput);
2399 if (locinput > PL_regeol)
2400 sayNO;
2401 nextchr = UCHARAT(locinput);
2402 break;
c8756f30 2403 case REFFL:
3280af22 2404 PL_reg_flags |= RF_tainted;
c8756f30 2405 /* FALL THROUGH */
c277df42 2406 case REF:
c8756f30 2407 case REFF:
c277df42 2408 n = ARG(scan); /* which paren pair */
cf93c79d 2409 ln = PL_regstartp[n];
2c2d71f5 2410 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2411 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2412 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2413 if (ln == PL_regendp[n])
a0d0e21e 2414 break;
a0ed51b3 2415
cf93c79d 2416 s = PL_bostr + ln;
1aa99e6b 2417 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2418 char *l = locinput;
cf93c79d 2419 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2420 /*
2421 * Note that we can't do the "other character" lookup trick as
2422 * in the 8-bit case (no pun intended) because in Unicode we
2423 * have to map both upper and title case to lower case.
2424 */
2425 if (OP(scan) == REFF) {
2426 while (s < e) {
2427 if (l >= PL_regeol)
2428 sayNO;
dfe13c55 2429 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2430 sayNO;
2431 s += UTF8SKIP(s);
2432 l += UTF8SKIP(l);
2433 }
2434 }
2435 else {
2436 while (s < e) {
2437 if (l >= PL_regeol)
2438 sayNO;
dfe13c55 2439 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2440 sayNO;
2441 s += UTF8SKIP(s);
2442 l += UTF8SKIP(l);
2443 }
2444 }
2445 locinput = l;
2446 nextchr = UCHARAT(locinput);
2447 break;
2448 }
2449
a0d0e21e 2450 /* Inline the first character, for speed. */
76e3520e 2451 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2452 (OP(scan) == REF ||
2453 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2454 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2455 sayNO;
cf93c79d 2456 ln = PL_regendp[n] - ln;
3280af22 2457 if (locinput + ln > PL_regeol)
4633a7c4 2458 sayNO;
c8756f30
AK
2459 if (ln > 1 && (OP(scan) == REF
2460 ? memNE(s, locinput, ln)
2461 : (OP(scan) == REFF
2462 ? ibcmp(s, locinput, ln)
2463 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2464 sayNO;
a0d0e21e 2465 locinput += ln;
76e3520e 2466 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2467 break;
2468
2469 case NOTHING:
c277df42 2470 case TAIL:
a0d0e21e
LW
2471 break;
2472 case BACK:
2473 break;
c277df42
IZ
2474 case EVAL:
2475 {
2476 dSP;
533c011a 2477 OP_4tree *oop = PL_op;
3280af22
NIS
2478 COP *ocurcop = PL_curcop;
2479 SV **ocurpad = PL_curpad;
c277df42 2480 SV *ret;
9041c2e3 2481
c277df42 2482 n = ARG(scan);
533c011a 2483 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2484 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2485 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2486 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2487
cea2e8a9 2488 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2489 SPAGAIN;
2490 ret = POPs;
2491 PUTBACK;
9041c2e3 2492
0f5d15d6
IZ
2493 PL_op = oop;
2494 PL_curpad = ocurpad;
2495 PL_curcop = ocurcop;
c277df42 2496 if (logical) {
0f5d15d6
IZ
2497 if (logical == 2) { /* Postponed subexpression. */
2498 regexp *re;
22c35a8c 2499 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2500 re_cc_state state;
0f5d15d6
IZ
2501 CHECKPOINT cp, lastcp;
2502
2503 if(SvROK(ret) || SvRMAGICAL(ret)) {
2504 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2505
2506 if(SvMAGICAL(sv))
14befaf4 2507 mg = mg_find(sv, PERL_MAGIC_qr);
0f5d15d6
IZ
2508 }
2509 if (mg) {
2510 re = (regexp *)mg->mg_obj;
df0003d4 2511 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2512 }
2513 else {
2514 STRLEN len;
2515 char *t = SvPV(ret, len);
2516 PMOP pm;
2517 char *oprecomp = PL_regprecomp;
2518 I32 osize = PL_regsize;
2519 I32 onpar = PL_regnpar;
2520
5fcd1c1b 2521 Zero(&pm, 1, PMOP);
cea2e8a9 2522 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2523 if (!(SvFLAGS(ret)
0f5d15d6 2524 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
14befaf4
DM
2525 sv_magic(ret,(SV*)ReREFCNT_inc(re),
2526 PERL_MAGIC_qr,0,0);
0f5d15d6
IZ
2527 PL_regprecomp = oprecomp;
2528 PL_regsize = osize;
2529 PL_regnpar = onpar;
2530 }
2531 DEBUG_r(
9041c2e3 2532 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2533 "Entering embedded `%s%.60s%s%s'\n",
2534 PL_colors[0],
2535 re->precomp,
2536 PL_colors[1],
2537 (strlen(re->precomp) > 60 ? "..." : ""))
2538 );
2539 state.node = next;
2540 state.prev = PL_reg_call_cc;
2541 state.cc = PL_regcc;
2542 state.re = PL_reg_re;
2543
2ab05381 2544 PL_regcc = 0;
9041c2e3 2545
0f5d15d6 2546 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2547 REGCP_SET(lastcp);
0f5d15d6
IZ
2548 cache_re(re);
2549 state.ss = PL_savestack_ix;
2550 *PL_reglastparen = 0;
2551 PL_reg_call_cc = &state;
2552 PL_reginput = locinput;
2c2d71f5
JH
2553
2554 /* XXXX This is too dramatic a measure... */
2555 PL_reg_maxiter = 0;
2556
0f5d15d6 2557 if (regmatch(re->program + 1)) {
2c914db6
IZ
2558 /* Even though we succeeded, we need to restore
2559 global variables, since we may be wrapped inside
2560 SUSPEND, thus the match may be not finished yet. */
2561
2562 /* XXXX Do this only if SUSPENDed? */
2563 PL_reg_call_cc = state.prev;
2564 PL_regcc = state.cc;
2565 PL_reg_re = state.re;
2566 cache_re(PL_reg_re);
2567
2568 /* XXXX This is too dramatic a measure... */
2569 PL_reg_maxiter = 0;
2570
2571 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2572 ReREFCNT_dec(re);
2573 regcpblow(cp);
2574 sayYES;
2575 }
0f5d15d6 2576 ReREFCNT_dec(re);
02db2b7b 2577 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2578 regcppop();
2579 PL_reg_call_cc = state.prev;
2580 PL_regcc = state.cc;
2581 PL_reg_re = state.re;
d3790889 2582 cache_re(PL_reg_re);
2c2d71f5
JH
2583
2584 /* XXXX This is too dramatic a measure... */
2585 PL_reg_maxiter = 0;
2586
0f5d15d6
IZ
2587 sayNO;
2588 }
c277df42 2589 sw = SvTRUE(ret);
0f5d15d6 2590 logical = 0;
a0ed51b3
LW
2591 }
2592 else
3280af22 2593 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2594 break;
2595 }
a0d0e21e 2596 case OPEN:
c277df42 2597 n = ARG(scan); /* which paren pair */
3280af22
NIS
2598 PL_reg_start_tmp[n] = locinput;
2599 if (n > PL_regsize)
2600 PL_regsize = n;
a0d0e21e
LW
2601 break;
2602 case CLOSE:
c277df42 2603 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2604 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2605 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2606 if (n > *PL_reglastparen)
2607 *PL_reglastparen = n;
a0d0e21e 2608 break;
c277df42
IZ
2609 case GROUPP:
2610 n = ARG(scan); /* which paren pair */
cf93c79d 2611 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2612 break;
2613 case IFTHEN:
2c2d71f5 2614 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2615 if (sw)
2616 next = NEXTOPER(NEXTOPER(scan));
2617 else {
2618 next = scan + ARG(scan);
2619 if (OP(next) == IFTHEN) /* Fake one. */
2620 next = NEXTOPER(NEXTOPER(next));
2621 }
2622 break;
2623 case LOGICAL:
0f5d15d6 2624 logical = scan->flags;
c277df42 2625 break;
2ab05381
IZ
2626/*******************************************************************
2627 PL_regcc contains infoblock about the innermost (...)* loop, and
2628 a pointer to the next outer infoblock.
2629
2630 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2631
2632 1) After matching X, regnode for CURLYX is processed;
2633
9041c2e3 2634 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2635 regmatch() recursively with the starting point at WHILEM node;
2636
2637 3) Each hit of WHILEM node tries to match A and Z (in the order
2638 depending on the current iteration, min/max of {min,max} and
2639 greediness). The information about where are nodes for "A"
2640 and "Z" is read from the infoblock, as is info on how many times "A"
2641 was already matched, and greediness.
2642
2643 4) After A matches, the same WHILEM node is hit again.
2644
2645 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2646 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2647 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2648 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2649 of the external loop.
2650
2651 Currently present infoblocks form a tree with a stem formed by PL_curcc
2652 and whatever it mentions via ->next, and additional attached trees
2653 corresponding to temporarily unset infoblocks as in "5" above.
2654
9041c2e3 2655 In the following picture infoblocks for outer loop of
2ab05381
IZ
2656 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2657 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2658 infoblocks are drawn below the "reset" infoblock.
2659
2660 In fact in the picture below we do not show failed matches for Z and T
2661 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2662 more obvious *why* one needs to *temporary* unset infoblocks.]
2663
2664 Matched REx position InfoBlocks Comment
2665 (Y(A)*?Z)*?T x
2666 Y(A)*?Z)*?T x <- O
2667 Y (A)*?Z)*?T x <- O
2668 Y A)*?Z)*?T x <- O <- I
2669 YA )*?Z)*?T x <- O <- I
2670 YA A)*?Z)*?T x <- O <- I
2671 YAA )*?Z)*?T x <- O <- I
2672 YAA Z)*?T x <- O # Temporary unset I
2673 I
2674
2675 YAAZ Y(A)*?Z)*?T x <- O
2676 I
2677
2678 YAAZY (A)*?Z)*?T x <- O
2679 I
2680
2681 YAAZY A)*?Z)*?T x <- O <- I
2682 I
2683
2684 YAAZYA )*?Z)*?T x <- O <- I
2685 I
2686
2687 YAAZYA Z)*?T x <- O # Temporary unset I
2688 I,I
2689
2690 YAAZYAZ )*?T x <- O
2691 I,I
2692
2693 YAAZYAZ T x # Temporary unset O
2694 O
2695 I,I
2696
2697 YAAZYAZT x
2698 O
2699 I,I
2700 *******************************************************************/
a0d0e21e
LW
2701 case CURLYX: {
2702 CURCUR cc;
3280af22 2703 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2704 /* No need to save/restore up to this paren */
2705 I32 parenfloor = scan->flags;
c277df42
IZ
2706
2707 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2708 next += ARG(next);
3280af22
NIS
2709 cc.oldcc = PL_regcc;
2710 PL_regcc = &cc;
cb434fcc
IZ
2711 /* XXXX Probably it is better to teach regpush to support
2712 parenfloor > PL_regsize... */
2713 if (parenfloor > *PL_reglastparen)
2714 parenfloor = *PL_reglastparen; /* Pessimization... */
2715 cc.parenfloor = parenfloor;
a0d0e21e
LW
2716 cc.cur = -1;
2717 cc.min = ARG1(scan);
2718 cc.max = ARG2(scan);
c277df42 2719 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2720 cc.next = next;
2721 cc.minmod = minmod;
2722 cc.lastloc = 0;
3280af22 2723 PL_reginput = locinput;
a0d0e21e
LW
2724 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2725 regcpblow(cp);
3280af22 2726 PL_regcc = cc.oldcc;
4633a7c4 2727 saySAME(n);
a0d0e21e
LW
2728 }
2729 /* NOT REACHED */
2730 case WHILEM: {
2731 /*
2732 * This is really hard to understand, because after we match
2733 * what we're trying to match, we must make sure the rest of
2c2d71f5 2734 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2735 * to go back UP the parse tree by recursing ever deeper. And
2736 * if it fails, we have to reset our parent's current state
2737 * that we can try again after backing off.
2738 */
2739
c277df42 2740 CHECKPOINT cp, lastcp;
3280af22 2741 CURCUR* cc = PL_regcc;
c277df42
IZ
2742 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2743
4633a7c4 2744 n = cc->cur + 1; /* how many we know we matched */
3280af22 2745 PL_reginput = locinput;
a0d0e21e 2746
c277df42 2747 DEBUG_r(
9041c2e3
NIS
2748 PerlIO_printf(Perl_debug_log,
2749 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2750 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2751 (long)n, (long)cc->min,
c277df42
IZ
2752 (long)cc->max, (long)cc)
2753 );
4633a7c4 2754
a0d0e21e
LW
2755 /* If degenerate scan matches "", assume scan done. */
2756
579cf2c3 2757 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2758 PL_regcc = cc->oldcc;
2ab05381
IZ
2759 if (PL_regcc)
2760 ln = PL_regcc->cur;
c277df42 2761 DEBUG_r(
c3464db5
DD
2762 PerlIO_printf(Perl_debug_log,
2763 "%*s empty match detected, try continuation...\n",
3280af22 2764 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2765 );
a0d0e21e 2766 if (regmatch(cc->next))
4633a7c4 2767 sayYES;
2ab05381
IZ
2768 if (PL_regcc)
2769 PL_regcc->cur = ln;
3280af22 2770 PL_regcc = cc;
4633a7c4 2771 sayNO;
a0d0e21e
LW
2772 }
2773
2774 /* First just match a string of min scans. */
2775
2776 if (n < cc->min) {
2777 cc->cur = n;
2778 cc->lastloc = locinput;
4633a7c4
LW
2779 if (regmatch(cc->scan))
2780 sayYES;
2781 cc->cur = n - 1;
c277df42 2782 cc->lastloc = lastloc;
4633a7c4 2783 sayNO;
a0d0e21e
LW
2784 }
2785
2c2d71f5
JH
2786 if (scan->flags) {
2787 /* Check whether we already were at this position.
2788 Postpone detection until we know the match is not
2789 *that* much linear. */
2790 if (!PL_reg_maxiter) {
2791 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2792 PL_reg_leftiter = PL_reg_maxiter;
2793 }
2794 if (PL_reg_leftiter-- == 0) {
2795 I32 size = (PL_reg_maxiter + 7)/8;
2796 if (PL_reg_poscache) {
2797 if (PL_reg_poscache_size < size) {
2798 Renew(PL_reg_poscache, size, char);
2799 PL_reg_poscache_size = size;
2800 }
2801 Zero(PL_reg_poscache, size, char);
2802 }
2803 else {
2804 PL_reg_poscache_size = size;
2805 Newz(29, PL_reg_poscache, size, char);
2806 }
2807 DEBUG_r(
2808 PerlIO_printf(Perl_debug_log,
2809 "%sDetected a super-linear match, switching on caching%s...\n",
2810 PL_colors[4], PL_colors[5])
2811 );
2812 }
2813 if (PL_reg_leftiter < 0) {
2814 I32 o = locinput - PL_bostr, b;
2815
2816 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2817 b = o % 8;
2818 o /= 8;
2819 if (PL_reg_poscache[o] & (1<<b)) {
2820 DEBUG_r(
2821 PerlIO_printf(Perl_debug_log,
2822 "%*s already tried at this position...\n",
2823 REPORT_CODE_OFF+PL_regindent*2, "")
2824 );
7821416a 2825 sayNO_SILENT;
2c2d71f5
JH
2826 }
2827 PL_reg_poscache[o] |= (1<<b);
2828 }
2829 }
2830
a0d0e21e
LW
2831 /* Prefer next over scan for minimal matching. */
2832
2833 if (cc->minmod) {
3280af22 2834 PL_regcc = cc->oldcc;
2ab05381
IZ
2835 if (PL_regcc)
2836 ln = PL_regcc->cur;
5f05dabc 2837 cp = regcppush(cc->parenfloor);
02db2b7b 2838 REGCP_SET(lastcp);
5f05dabc 2839 if (regmatch(cc->next)) {
c277df42 2840 regcpblow(cp);
4633a7c4 2841 sayYES; /* All done. */
5f05dabc 2842 }
02db2b7b 2843 REGCP_UNWIND(lastcp);
5f05dabc 2844 regcppop();
2ab05381
IZ
2845 if (PL_regcc)
2846 PL_regcc->cur = ln;
3280af22 2847 PL_regcc = cc;
a0d0e21e 2848
c277df42 2849 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2850 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2851 && !(PL_reg_flags & RF_warned)) {
2852 PL_reg_flags |= RF_warned;
e476b1b5 2853 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2854 "Complex regular subexpression recursion",
2855 REG_INFTY - 1);
c277df42 2856 }
4633a7c4 2857 sayNO;
c277df42 2858 }
a687059c 2859
c277df42 2860 DEBUG_r(
c3464db5
DD
2861 PerlIO_printf(Perl_debug_log,
2862 "%*s trying longer...\n",
3280af22 2863 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2864 );
a0d0e21e 2865 /* Try scanning more and see if it helps. */
3280af22 2866 PL_reginput = locinput;
a0d0e21e
LW
2867 cc->cur = n;
2868 cc->lastloc = locinput;
5f05dabc 2869 cp = regcppush(cc->parenfloor);
02db2b7b 2870 REGCP_SET(lastcp);
5f05dabc 2871 if (regmatch(cc->scan)) {
c277df42 2872 regcpblow(cp);
4633a7c4 2873 sayYES;
5f05dabc 2874 }
02db2b7b 2875 REGCP_UNWIND(lastcp);
5f05dabc 2876 regcppop();
4633a7c4 2877 cc->cur = n - 1;
c277df42 2878 cc->lastloc = lastloc;
4633a7c4 2879 sayNO;
a0d0e21e
LW
2880 }
2881
2882 /* Prefer scan over next for maximal matching. */
2883
2884 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2885 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2886 cc->cur = n;
2887 cc->lastloc = locinput;
02db2b7b 2888 REGCP_SET(lastcp);
5f05dabc 2889 if (regmatch(cc->scan)) {
c277df42 2890 regcpblow(cp);
4633a7c4 2891 sayYES;
5f05dabc 2892 }
02db2b7b 2893 REGCP_UNWIND(lastcp);
a0d0e21e 2894 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2895 PL_reginput = locinput;
c277df42 2896 DEBUG_r(
c3464db5
DD
2897 PerlIO_printf(Perl_debug_log,
2898 "%*s failed, try continuation...\n",
3280af22 2899 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2900 );
2901 }
9041c2e3 2902 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2903 && !(PL_reg_flags & RF_warned)) {
3280af22 2904 PL_reg_flags |= RF_warned;
e476b1b5 2905 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2906 "Complex regular subexpression recursion",
2907 REG_INFTY - 1);
a0d0e21e
LW
2908 }
2909
2910 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2911 PL_regcc = cc->oldcc;
2ab05381
IZ
2912 if (PL_regcc)
2913 ln = PL_regcc->cur;
a0d0e21e 2914 if (regmatch(cc->next))
4633a7c4 2915 sayYES;
2ab05381
IZ
2916 if (PL_regcc)
2917 PL_regcc->cur = ln;
3280af22 2918 PL_regcc = cc;
4633a7c4 2919 cc->cur = n - 1;
c277df42 2920 cc->lastloc = lastloc;
4633a7c4 2921 sayNO;
a0d0e21e
LW
2922 }
2923 /* NOT REACHED */
9041c2e3 2924 case BRANCHJ:
c277df42
IZ
2925 next = scan + ARG(scan);
2926 if (next == scan)
2927 next = NULL;
2928 inner = NEXTOPER(NEXTOPER(scan));
2929 goto do_branch;
9041c2e3 2930 case BRANCH:
c277df42
IZ
2931 inner = NEXTOPER(scan);
2932 do_branch:
2933 {
2934 CHECKPOINT lastcp;
2935 c1 = OP(scan);
2936 if (OP(next) != c1) /* No choice. */
2937 next = inner; /* Avoid recursion. */
a0d0e21e 2938 else {
02db2b7b
IZ
2939 I32 lastparen = *PL_reglastparen;
2940 I32 unwind1;
2941 re_unwind_branch_t *uw;
2942
2943 /* Put unwinding data on stack */
2944 unwind1 = SSNEWt(1,re_unwind_branch_t);
2945 uw = SSPTRt(unwind1,re_unwind_branch_t);
2946 uw->prev = unwind;
2947 unwind = unwind1;
2948 uw->type = ((c1 == BRANCH)
2949 ? RE_UNWIND_BRANCH
2950 : RE_UNWIND_BRANCHJ);
2951 uw->lastparen = lastparen;
2952 uw->next = next;
2953 uw->locinput = locinput;
2954 uw->nextchr = nextchr;
2955#ifdef DEBUGGING
2956 uw->regindent = ++PL_regindent;
2957#endif
c277df42 2958
02db2b7b
IZ
2959 REGCP_SET(uw->lastcp);
2960
2961 /* Now go into the first branch */
2962 next = inner;
a687059c 2963 }
a0d0e21e
LW
2964 }
2965 break;
2966 case MINMOD:
2967 minmod = 1;
2968 break;
c277df42
IZ
2969 case CURLYM:
2970 {
00db4c45 2971 I32 l = 0;
c277df42 2972 CHECKPOINT lastcp;
9041c2e3 2973
c277df42
IZ
2974 /* We suppose that the next guy does not need
2975 backtracking: in particular, it is of constant length,
2976 and has no parenths to influence future backrefs. */
2977 ln = ARG1(scan); /* min to match */
2978 n = ARG2(scan); /* max to match */
c277df42
IZ
2979 paren = scan->flags;
2980 if (paren) {
3280af22
NIS
2981 if (paren > PL_regsize)
2982 PL_regsize = paren;
2983 if (paren > *PL_reglastparen)
2984 *PL_reglastparen = paren;
c277df42 2985 }
dc45a647 2986 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2987 if (paren)
2988 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2989 PL_reginput = locinput;
c277df42
IZ
2990 if (minmod) {
2991 minmod = 0;
2992 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2993 sayNO;
5f4b28b2 2994 if (ln && l == 0 && n >= ln
c277df42
IZ
2995 /* In fact, this is tricky. If paren, then the
2996 fact that we did/didnot match may influence
2997 future execution. */
2998 && !(paren && ln == 0))
2999 ln = n;
3280af22 3000 locinput = PL_reginput;
22c35a8c 3001 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3002 c1 = (U8)*STRING(next);
c277df42 3003 if (OP(next) == EXACTF)
22c35a8c 3004 c2 = PL_fold[c1];
c277df42 3005 else if (OP(next) == EXACTFL)
22c35a8c 3006 c2 = PL_fold_locale[c1];
c277df42
IZ
3007 else
3008 c2 = c1;
a0ed51b3
LW
3009 }
3010 else
c277df42 3011 c1 = c2 = -1000;
02db2b7b 3012 REGCP_SET(lastcp);
5f4b28b2 3013 /* This may be improved if l == 0. */
c277df42
IZ
3014 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3015 /* If it could work, try it. */
3016 if (c1 == -1000 ||
3280af22
NIS
3017 UCHARAT(PL_reginput) == c1 ||
3018 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3019 {
3020 if (paren) {
3021 if (n) {
cf93c79d
IZ
3022 PL_regstartp[paren] =
3023 HOPc(PL_reginput, -l) - PL_bostr;
3024 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3025 }
3026 else
cf93c79d 3027 PL_regendp[paren] = -1;
c277df42
IZ
3028 }
3029 if (regmatch(next))
3030 sayYES;
02db2b7b 3031 REGCP_UNWIND(lastcp);
c277df42
IZ
3032 }
3033 /* Couldn't or didn't -- move forward. */
3280af22 3034 PL_reginput = locinput;
c277df42
IZ
3035 if (regrepeat_hard(scan, 1, &l)) {
3036 ln++;
3280af22 3037 locinput = PL_reginput;
c277df42
IZ
3038 }
3039 else
3040 sayNO;
3041 }
a0ed51b3
LW
3042 }
3043 else {
c277df42
IZ
3044 n = regrepeat_hard(scan, n, &l);
3045 if (n != 0 && l == 0
3046 /* In fact, this is tricky. If paren, then the
3047 fact that we did/didnot match may influence
3048 future execution. */
3049 && !(paren && ln == 0))
3050 ln = n;
3280af22 3051 locinput = PL_reginput;
c277df42 3052 DEBUG_r(
5c0ca799 3053 PerlIO_printf(Perl_debug_log,
faccc32b 3054 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3055 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3056 (IV) n, (IV)l)
c277df42
IZ
3057 );
3058 if (n >= ln) {
22c35a8c 3059 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3060 c1 = (U8)*STRING(next);
c277df42 3061 if (OP(next) == EXACTF)
22c35a8c 3062 c2 = PL_fold[c1];
c277df42 3063 else if (OP(next) == EXACTFL)
22c35a8c 3064 c2 = PL_fold_locale[c1];
c277df42
IZ
3065 else
3066 c2 = c1;
a0ed51b3
LW
3067 }
3068 else
c277df42
IZ
3069 c1 = c2 = -1000;
3070 }
02db2b7b 3071 REGCP_SET(lastcp);
c277df42
IZ
3072 while (n >= ln) {
3073 /* If it could work, try it. */
3074 if (c1 == -1000 ||
3280af22
NIS
3075 UCHARAT(PL_reginput) == c1 ||
3076 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3077 {
3078 DEBUG_r(
c3464db5 3079 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3080 "%*s trying tail with n=%"IVdf"...\n",
3081 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3082 );
3083 if (paren) {
3084 if (n) {
cf93c79d
IZ
3085 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3086 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3087 }
a0ed51b3 3088 else
cf93c79d 3089 PL_regendp[paren] = -1;
c277df42 3090 }
a0ed51b3
LW
3091 if (regmatch(next))
3092 sayYES;
02db2b7b 3093 REGCP_UNWIND(lastcp);
a0ed51b3 3094 }
c277df42
IZ
3095 /* Couldn't or didn't -- back up. */
3096 n--;
dfe13c55 3097 locinput = HOPc(locinput, -l);
3280af22 3098 PL_reginput = locinput;
c277df42
IZ
3099 }
3100 }
3101 sayNO;
3102 break;
3103 }
3104 case CURLYN:
3105 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3106 if (paren > PL_regsize)
3107 PL_regsize = paren;
3108 if (paren > *PL_reglastparen)
3109 *PL_reglastparen = paren;
c277df42
IZ
3110 ln = ARG1(scan); /* min to match */
3111 n = ARG2(scan); /* max to match */
dc45a647 3112 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3113 goto repeat;
a0d0e21e 3114 case CURLY:
c277df42 3115 paren = 0;
a0d0e21e
LW
3116 ln = ARG1(scan); /* min to match */
3117 n = ARG2(scan); /* max to match */
dc45a647 3118 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3119 goto repeat;
3120 case STAR:
3121 ln = 0;
c277df42 3122 n = REG_INFTY;
a0d0e21e 3123 scan = NEXTOPER(scan);
c277df42 3124 paren = 0;
a0d0e21e
LW
3125 goto repeat;
3126 case PLUS:
c277df42
IZ
3127 ln = 1;
3128 n = REG_INFTY;
3129 scan = NEXTOPER(scan);
3130 paren = 0;
3131 repeat:
a0d0e21e
LW
3132 /*
3133 * Lookahead to avoid useless match attempts
3134 * when we know what character comes next.
3135 */
22c35a8c 3136 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3137 U8 *s = (U8*)STRING(next);
3138 if (!UTF) {
3139 c2 = c1 = *s;
3140 if (OP(next) == EXACTF)
3141 c2 = PL_fold[c1];
3142 else if (OP(next) == EXACTFL)
3143 c2 = PL_fold_locale[c1];
3144 }
3145 else { /* UTF */
3146 if (OP(next) == EXACTF) {
3147 c1 = to_utf8_lower(s);
3148 c2 = to_utf8_upper(s);
3149 }
3150 else {
9041c2e3 3151 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3152 }
3153 }
bbce6d69 3154 }
a0d0e21e 3155 else
bbce6d69 3156 c1 = c2 = -1000;
3280af22 3157 PL_reginput = locinput;
a0d0e21e 3158 if (minmod) {
c277df42 3159 CHECKPOINT lastcp;
a0d0e21e
LW
3160 minmod = 0;
3161 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3162 sayNO;
a0ed51b3 3163 locinput = PL_reginput;
02db2b7b 3164 REGCP_SET(lastcp);
0fe9bf95 3165 if (c1 != -1000) {
1aa99e6b 3166 char *e; /* Should not check after this */
0fe9bf95
IZ
3167 char *old = locinput;
3168
1aa99e6b 3169 if (n == REG_INFTY) {
0fe9bf95 3170 e = PL_regeol - 1;
1aa99e6b
IH
3171 if (do_utf8)
3172 while (UTF8_IS_CONTINUATION(*(U8*)e))
3173 e--;
3174 }
3175 else if (do_utf8) {
3176 int m = n - ln;
3177 for (e = locinput;
3178 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3179 e += UTF8SKIP(e);
3180 }
3181 else {
3182 e = locinput + n - ln;
3183 if (e >= PL_regeol)
3184 e = PL_regeol - 1;
3185 }
0fe9bf95 3186 while (1) {
1aa99e6b 3187 int count;
0fe9bf95 3188 /* Find place 'next' could work */
1aa99e6b
IH
3189 if (!do_utf8) {
3190 if (c1 == c2) {
3191 while (locinput <= e && *locinput != c1)
3192 locinput++;
3193 } else {
9041c2e3 3194 while (locinput <= e
1aa99e6b
IH
3195 && *locinput != c1
3196 && *locinput != c2)
3197 locinput++;
3198 }
3199 count = locinput - old;
3200 }
3201 else {
3202 STRLEN len;
3203 if (c1 == c2) {
3204 for (count = 0;
3205 locinput <= e &&
9041c2e3 3206 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3207 count++)
3208 locinput += len;
3209
3210 } else {
3211 for (count = 0; locinput <= e; count++) {
9041c2e3 3212 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3213 if (c == c1 || c == c2)
3214 break;
9041c2e3 3215 locinput += len;
1aa99e6b
IH
3216 }
3217 }
0fe9bf95 3218 }
9041c2e3 3219 if (locinput > e)
0fe9bf95
IZ
3220 sayNO;
3221 /* PL_reginput == old now */
3222 if (locinput != old) {
3223 ln = 1; /* Did some */
1aa99e6b 3224 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3225 sayNO;
3226 }
3227 /* PL_reginput == locinput now */
29d1e993 3228 TRYPAREN(paren, ln, locinput);
0fe9bf95 3229 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3230 REGCP_UNWIND(lastcp);
0fe9bf95 3231 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3232 old = locinput;
3233 if (do_utf8)
3234 locinput += UTF8SKIP(locinput);
3235 else
3236 locinput++;
0fe9bf95
IZ
3237 }
3238 }
3239 else
c277df42 3240 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3241 UV c;
3242 if (c1 != -1000) {
3243 if (do_utf8)
9041c2e3 3244 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3245 else
9041c2e3 3246 c = UCHARAT(PL_reginput);
2390ecbc
PP
3247 /* If it could work, try it. */
3248 if (c == c1 || c == c2)
3249 {
3250 TRYPAREN(paren, n, PL_reginput);
3251 REGCP_UNWIND(lastcp);
3252 }
1aa99e6b 3253 }
a0d0e21e 3254 /* If it could work, try it. */
2390ecbc 3255 else if (c1 == -1000)
bbce6d69 3256 {
29d1e993 3257 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3258 REGCP_UNWIND(lastcp);
bbce6d69 3259 }
c277df42 3260 /* Couldn't or didn't -- move forward. */
a0ed51b3 3261 PL_reginput = locinput;
a0d0e21e
LW
3262 if (regrepeat(scan, 1)) {
3263 ln++;
a0ed51b3
LW
3264 locinput = PL_reginput;
3265 }
3266 else
4633a7c4 3267 sayNO;
a0d0e21e
LW
3268 }
3269 }
3270 else {
c277df42 3271 CHECKPOINT lastcp;
a0d0e21e 3272 n = regrepeat(scan, n);
a0ed51b3 3273 locinput = PL_reginput;
22c35a8c 3274 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3275 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3276 ln = n; /* why back off? */
1aeab75a
GS
3277 /* ...because $ and \Z can match before *and* after
3278 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3279 We should back off by one in this case. */
3280 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3281 ln--;
3282 }
02db2b7b 3283 REGCP_SET(lastcp);
c277df42 3284 if (paren) {
1aa99e6b 3285 UV c;
c277df42 3286 while (n >= ln) {
1aa99e6b
IH
3287 if (c1 != -1000) {
3288 if (do_utf8)
9041c2e3 3289 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3290 else
9041c2e3 3291 c = UCHARAT(PL_reginput);
1aa99e6b 3292 }
c277df42 3293 /* If it could work, try it. */
1aa99e6b 3294 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3295 {
29d1e993 3296 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3297 REGCP_UNWIND(lastcp);
c277df42
IZ
3298 }
3299 /* Couldn't or didn't -- back up. */
3300 n--;
dfe13c55 3301 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3302 }
a0ed51b3
LW
3303 }
3304 else {
1aa99e6b 3305 UV c;
c277df42 3306 while (n >= ln) {
1aa99e6b
IH
3307 if (c1 != -1000) {
3308 if (do_utf8)
9041c2e3 3309 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3310 else
9041c2e3 3311 c = UCHARAT(PL_reginput);
1aa99e6b 3312 }
c277df42 3313 /* If it could work, try it. */
1aa99e6b 3314 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3315 {
29d1e993 3316 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3317 REGCP_UNWIND(lastcp);
c277df42
IZ
3318 }
3319 /* Couldn't or didn't -- back up. */
3320 n--;
dfe13c55 3321 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3322 }
a0d0e21e
LW
3323 }
3324 }
4633a7c4 3325 sayNO;
c277df42 3326 break;
a0d0e21e 3327 case END:
0f5d15d6
IZ
3328 if (PL_reg_call_cc) {
3329 re_cc_state *cur_call_cc = PL_reg_call_cc;
3330 CURCUR *cctmp = PL_regcc;
3331 regexp *re = PL_reg_re;
3332 CHECKPOINT cp, lastcp;
3333
3334 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3335 REGCP_SET(lastcp);
0f5d15d6
IZ
3336 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3337 the caller. */
3338 PL_reginput = locinput; /* Make position available to
3339 the callcc. */
3340 cache_re(PL_reg_call_cc->re);
3341 PL_regcc = PL_reg_call_cc->cc;
3342 PL_reg_call_cc = PL_reg_call_cc->prev;
3343 if (regmatch(cur_call_cc->node)) {
3344 PL_reg_call_cc = cur_call_cc;
3345 regcpblow(cp);
3346 sayYES;
3347 }
02db2b7b 3348 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3349 regcppop();
3350 PL_reg_call_cc = cur_call_cc;
3351 PL_regcc = cctmp;
3352 PL_reg_re = re;
3353 cache_re(re);
3354
3355 DEBUG_r(
3356 PerlIO_printf(Perl_debug_log,
3357 "%*s continuation failed...\n",
3358 REPORT_CODE_OFF+PL_regindent*2, "")
3359 );
7821416a 3360 sayNO_SILENT;
0f5d15d6 3361 }
7821416a
IZ
3362 if (locinput < PL_regtill) {
3363 DEBUG_r(PerlIO_printf(Perl_debug_log,
3364 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3365 PL_colors[4],
3366 (long)(locinput - PL_reg_starttry),
3367 (long)(PL_regtill - PL_reg_starttry),
3368 PL_colors[5]));
3369 sayNO_FINAL; /* Cannot match: too short. */
3370 }
3371 PL_reginput = locinput; /* put where regtry can find it */
3372 sayYES_FINAL; /* Success! */
7e5428c5 3373 case SUCCEED:
3280af22 3374 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3375 sayYES_LOUD; /* Success! */
c277df42
IZ
3376 case SUSPEND:
3377 n = 1;
9fe1d20c 3378 PL_reginput = locinput;
9041c2e3 3379 goto do_ifmatch;
a0d0e21e 3380 case UNLESSM:
c277df42 3381 n = 0;
a0ed51b3 3382 if (scan->flags) {
efb30f32
HS
3383 s = HOPBACKc(locinput, scan->flags);
3384 if (!s)
3385 goto say_yes;
3386 PL_reginput = s;
a0ed51b3
LW
3387 }
3388 else
3389 PL_reginput = locinput;
c277df42
IZ
3390 goto do_ifmatch;
3391 case IFMATCH:
3392 n = 1;
a0ed51b3 3393 if (scan->flags) {
efb30f32
HS
3394 s = HOPBACKc(locinput, scan->flags);
3395 if (!s)
3396 goto say_no;
3397 PL_reginput = s;
a0ed51b3
LW
3398 }
3399 else
3400 PL_reginput = locinput;
3401
c277df42 3402 do_ifmatch:
c277df42
IZ
3403 inner = NEXTOPER(NEXTOPER(scan));
3404 if (regmatch(inner) != n) {
3405 say_no:
3406 if (logical) {
3407 logical = 0;
3408 sw = 0;
3409 goto do_longjump;
a0ed51b3
LW
3410 }
3411 else
c277df42
IZ
3412 sayNO;
3413 }
3414 say_yes:
3415 if (logical) {
3416 logical = 0;
3417 sw = 1;
3418 }
fe44a5e8 3419 if (OP(scan) == SUSPEND) {
3280af22 3420 locinput = PL_reginput;
565764a8 3421 nextchr = UCHARAT(locinput);
fe44a5e8 3422 }
c277df42
IZ
3423 /* FALL THROUGH. */
3424 case LONGJMP:
3425 do_longjump:
3426 next = scan + ARG(scan);
3427 if (next == scan)
3428 next = NULL;
a0d0e21e
LW
3429 break;
3430 default:
b900a521 3431 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3432 PTR2UV(scan), OP(scan));
cea2e8a9 3433 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3434 }
02db2b7b 3435 reenter:
a0d0e21e
LW
3436 scan = next;
3437 }
a687059c 3438
a0d0e21e
LW
3439 /*
3440 * We get here only if there's trouble -- normally "case END" is
3441 * the terminating point.
3442 */
cea2e8a9 3443 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3444 /*NOTREACHED*/
4633a7c4
LW
3445 sayNO;
3446
7821416a
IZ
3447yes_loud:
3448 DEBUG_r(
3449 PerlIO_printf(Perl_debug_log,
3450 "%*s %scould match...%s\n",
3451 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3452 );
3453 goto yes;
3454yes_final:
3455 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3456 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3457yes:
3458#ifdef DEBUGGING
3280af22 3459 PL_regindent--;
4633a7c4 3460#endif
02db2b7b
IZ
3461
3462#if 0 /* Breaks $^R */
3463 if (unwind)
3464 regcpblow(firstcp);
3465#endif
4633a7c4
LW
3466 return 1;
3467
3468no:
7821416a
IZ
3469 DEBUG_r(
3470 PerlIO_printf(Perl_debug_log,
3471 "%*s %sfailed...%s\n",
3472 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3473 );
3474 goto do_no;
3475no_final:
3476do_no:
02db2b7b
IZ
3477 if (unwind) {
3478 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3479
3480 switch (uw->type) {
3481 case RE_UNWIND_BRANCH:
3482 case RE_UNWIND_BRANCHJ:
3483 {
3484 re_unwind_branch_t *uwb = &(uw->branch);
3485 I32 lastparen = uwb->lastparen;
9041c2e3 3486
02db2b7b
IZ
3487 REGCP_UNWIND(uwb->lastcp);
3488 for (n = *PL_reglastparen; n > lastparen; n--)
3489 PL_regendp[n] = -1;
3490 *PL_reglastparen = n;
3491 scan = next = uwb->next;
9041c2e3
NIS
3492 if ( !scan ||
3493 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3494 ? BRANCH : BRANCHJ) ) { /* Failure */
3495 unwind = uwb->prev;
3496#ifdef DEBUGGING
3497 PL_regindent--;
3498#endif
3499 goto do_no;
3500 }
3501 /* Have more choice yet. Reuse the same uwb. */
3502 /*SUPPRESS 560*/
3503 if ((n = (uwb->type == RE_UNWIND_BRANCH
3504 ? NEXT_OFF(next) : ARG(next))))
3505 next += n;
3506 else
3507 next = NULL; /* XXXX Needn't unwinding in this case... */
3508 uwb->next = next;
3509 next = NEXTOPER(scan);
3510 if (uwb->type == RE_UNWIND_BRANCHJ)
3511 next = NEXTOPER(next);
3512 locinput = uwb->locinput;
3513 nextchr = uwb->nextchr;
3514#ifdef DEBUGGING
3515 PL_regindent = uwb->regindent;
3516#endif
3517
3518 goto reenter;
3519 }
3520 /* NOT REACHED */
3521 default:
3522 Perl_croak(aTHX_ "regexp unwind memory corruption");
3523 }
3524 /* NOT REACHED */
3525 }
4633a7c4 3526#ifdef DEBUGGING
3280af22 3527 PL_regindent--;
4633a7c4 3528#endif
a0d0e21e 3529 return 0;
a687059c
LW
3530}
3531
3532/*
3533 - regrepeat - repeatedly match something simple, report how many
3534 */
3535/*
3536 * [This routine now assumes that it will only match on things of length 1.
3537 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3538 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3539 */
76e3520e 3540STATIC I32
cea2e8a9 3541S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3542{
a0d0e21e 3543 register char *scan;
a0d0e21e 3544 register I32 c;
3280af22 3545 register char *loceol = PL_regeol;
a0ed51b3 3546 register I32 hardcount = 0;
ffc61ed2 3547 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3548
3280af22 3549 scan = PL_reginput;
c277df42 3550 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3551 loceol = scan + max;
a0d0e21e 3552 switch (OP(p)) {
22c35a8c 3553 case REG_ANY:
1aa99e6b 3554 if (do_utf8) {
ffc61ed2 3555 loceol = PL_regeol;
1aa99e6b 3556 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3557 scan += UTF8SKIP(scan);
3558 hardcount++;
3559 }
3560 } else {
3561 while (scan < loceol && *scan != '\n')
3562 scan++;
a0ed51b3
LW
3563 }
3564 break;
ffc61ed2 3565 case SANY:
3baa4c62 3566 scan = loceol;
a0ed51b3 3567 break;
bbce6d69 3568 case EXACT: /* length of string is 1 */
cd439c50 3569 c = (U8)*STRING(p);
bbce6d69 3570 while (scan < loceol && UCHARAT(scan) == c)
3571 scan++;
3572 break;
3573 case EXACTF: /* length of string is 1 */
cd439c50 3574 c = (U8)*STRING(p);
bbce6d69 3575 while (scan < loceol &&
22c35a8c 3576 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3577 scan++;
3578 break;
3579 case EXACTFL: /* length of string is 1 */
3280af22 3580 PL_reg_flags |= RF_tainted;
cd439c50 3581 c = (U8)*STRING(p);
bbce6d69 3582 while (scan < loceol &&
22c35a8c 3583 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3584 scan++;
3585 break;
3586 case ANYOF:
ffc61ed2
JH
3587 if (do_utf8) {
3588 loceol = PL_regeol;
1aa99e6b
IH
3589 while (hardcount < max && scan < loceol &&
3590 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3591 scan += UTF8SKIP(scan);
3592 hardcount++;
3593 }
3594 } else {
3595 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3596 scan++;
3597 }
a0d0e21e
LW
3598 break;
3599 case ALNUM:
1aa99e6b 3600 if (do_utf8) {
ffc61ed2 3601 loceol = PL_regeol;
8269fa76 3602 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3603 while (hardcount < max && scan < loceol &&
3568d838 3604 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3605 scan += UTF8SKIP(scan);
3606 hardcount++;
3607 }
3608 } else {
3609 while (scan < loceol && isALNUM(*scan))
3610 scan++;
a0ed51b3
LW
3611 }
3612 break;
bbce6d69 3613 case ALNUML:
3280af22 3614 PL_reg_flags |= RF_tainted;
1aa99e6b 3615 if (do_utf8) {
ffc61ed2 3616 loceol = PL_regeol;
1aa99e6b
IH
3617 while (hardcount < max && scan < loceol &&
3618 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3619 scan += UTF8SKIP(scan);
3620 hardcount++;
3621 }
3622 } else {
3623 while (scan < loceol && isALNUM_LC(*scan))
3624 scan++;
a0ed51b3
LW
3625 }
3626 break;
a0d0e21e 3627 case NALNUM:
1aa99e6b 3628 if (do_utf8) {
ffc61ed2 3629 loceol = PL_regeol;
8269fa76 3630 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3631 while (hardcount < max && scan < loceol &&
3568d838 3632 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3633 scan += UTF8SKIP(scan);
3634 hardcount++;
3635 }
3636 } else {
3637 while (scan < loceol && !isALNUM(*scan))
3638 scan++;
a0ed51b3
LW
3639 }
3640 break;
bbce6d69 3641 case NALNUML:
3280af22 3642 PL_reg_flags |= RF_tainted;
1aa99e6b 3643 if (do_utf8) {
ffc61ed2 3644 loceol = PL_regeol;
1aa99e6b
IH
3645 while (hardcount < max && scan < loceol &&
3646 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3647 scan += UTF8SKIP(scan);
3648 hardcount++;
3649 }
3650 } else {
3651 while (scan < loceol && !isALNUM_LC(*scan))
3652 scan++;
a0ed51b3
LW
3653 }
3654 break;
a0d0e21e 3655 case SPACE:
1aa99e6b 3656 if (do_utf8) {
ffc61ed2 3657 loceol = PL_regeol;
8269fa76 3658 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3659 while (hardcount < max && scan < loceol &&
3568d838
JH
3660 (*scan == ' ' ||
3661 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3662 scan += UTF8SKIP(scan);
3663 hardcount++;
3664 }
3665 } else {
3666 while (scan < loceol && isSPACE(*scan))
3667 scan++;
a0ed51b3
LW
3668 }
3669 break;
bbce6d69 3670 case SPACEL:
3280af22 3671 PL_reg_flags |= RF_tainted;
1aa99e6b 3672 if (do_utf8) {
ffc61ed2 3673 loceol = PL_regeol;
1aa99e6b 3674 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3675 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3676 scan += UTF8SKIP(scan);
3677 hardcount++;
3678 }
3679 } else {
3680 while (scan < loceol && isSPACE_LC(*scan))
3681 scan++;
a0ed51b3
LW
3682 }
3683 break;
a0d0e21e 3684 case NSPACE:
1aa99e6b 3685 if (do_utf8) {
ffc61ed2 3686 loceol = PL_regeol;
8269fa76 3687 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3688 while (hardcount < max && scan < loceol &&
3568d838
JH
3689 !(*scan == ' ' ||
3690 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3691 scan += UTF8SKIP(scan);
3692 hardcount++;
3693 }
3694 } else {
3695 while (scan < loceol && !isSPACE(*scan))
3696 scan++;
3697 break;
a0ed51b3 3698 }
bbce6d69 3699 case NSPACEL:
3280af22 3700 PL_reg_flags |= RF_tainted;
1aa99e6b 3701 if (do_utf8) {
ffc61ed2 3702 loceol = PL_regeol;
1aa99e6b 3703 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3704 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3705 scan += UTF8SKIP(scan);
3706 hardcount++;
3707 }
3708 } else {
3709 while (scan < loceol && !isSPACE_LC(*scan))
3710 scan++;
a0ed51b3
LW
3711 }
3712 break;
a0d0e21e 3713 case DIGIT:
1aa99e6b 3714 if (do_utf8) {
ffc61ed2 3715 loceol = PL_regeol;
8269fa76 3716 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3717 while (hardcount < max && scan < loceol &&
3568d838 3718 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3719 scan += UTF8SKIP(scan);
3720 hardcount++;
3721 }
3722 } else {
3723 while (scan < loceol && isDIGIT(*scan))
3724 scan++;
a0ed51b3
LW
3725 }
3726 break;
a0d0e21e 3727 case NDIGIT:
1aa99e6b 3728 if (do_utf8) {
ffc61ed2 3729 loceol = PL_regeol;
8269fa76 3730 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3731 while (hardcount < max && scan < loceol &&
3568d838 3732 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3733 scan += UTF8SKIP(scan);
3734 hardcount++;
3735 }
3736 } else {
3737 while (scan < loceol && !isDIGIT(*scan))
3738 scan++;
a0ed51b3
LW
3739 }
3740 break;
a0d0e21e
LW
3741 default: /* Called on something of 0 width. */
3742 break; /* So match right here or not at all. */
3743 }
a687059c 3744
a0ed51b3
LW
3745 if (hardcount)
3746 c = hardcount;
3747 else
3748 c = scan - PL_reginput;
3280af22 3749 PL_reginput = scan;
a687059c 3750
9041c2e3 3751 DEBUG_r(
c277df42
IZ
3752 {
3753 SV *prop = sv_newmortal();
3754
3755 regprop(prop, p);
9041c2e3
NIS
3756 PerlIO_printf(Perl_debug_log,
3757 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3758 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3759 });
9041c2e3 3760
a0d0e21e 3761 return(c);
a687059c
LW
3762}
3763
3764/*
c277df42 3765 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3766 *
c277df42
IZ
3767 * The repeater is supposed to have constant length.
3768 */
3769
76e3520e 3770STATIC I32
cea2e8a9 3771S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3772{
3773 register char *scan;
3774 register char *start;
3280af22 3775 register char *loceol = PL_regeol;
a0ed51b3 3776 I32 l = 0;
708e3b05 3777 I32 count = 0, res = 1;
a0ed51b3
LW
3778
3779 if (!max)
3780 return 0;
c277df42 3781
3280af22 3782 start = PL_reginput;
1aa99e6b 3783 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3784 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3785 if (!count++) {
3786 l = 0;
3787 while (start < PL_reginput) {
3788 l++;
3789 start += UTF8SKIP(start);
3790 }
3791 *lp = l;
3792 if (l == 0)
3793 return max;
3794 }
3795 if (count == max)
3796 return count;
3797 }
3798 }
3799 else {
708e3b05 3800 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3801 if (!count++) {
3802 *lp = l = PL_reginput - start;
3803 if (max != REG_INFTY && l*max < loceol - scan)
3804 loceol = scan + l*max;
3805 if (l == 0)
3806 return max;
c277df42
IZ
3807 }
3808 }
3809 }
708e3b05 3810 if (!res)
3280af22 3811 PL_reginput = scan;
9041c2e3 3812
a0ed51b3 3813 return count;
c277df42
IZ
3814}
3815
3816/*
ffc61ed2
JH
3817- regclass_swash - prepare the utf8 swash
3818*/
3819
3820SV *
3821Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3822{
3823 SV *sw = NULL;
3824 SV *si = NULL;
3825
3826 if (PL_regdata && PL_regdata->count) {
3827 U32 n = ARG(node);
3828
3829 if (PL_regdata->what[n] == 's') {
3830 SV *rv = (SV*)PL_regdata->data[n];
3831 AV *av = (AV*)SvRV((SV*)rv);
3832 SV **a;
9041c2e3 3833
ffc61ed2
JH
3834 si = *av_fetch(av, 0, FALSE);
3835 a = av_fetch(av, 1, FALSE);
9041c2e3 3836
ffc61ed2
JH
3837 if (a)
3838 sw = *a;
3839 else if (si && doinit) {
3840 sw = swash_init("utf8", "", si, 1, 0);
3841 (void)av_store(av, 1, sw);
3842 }
3843 }
3844 }
3845
3846 if (initsvp)
3847 *initsvp = si;
3848
3849 return sw;
3850}
3851
3852/*
cb8d8820 3853 - reginclass - determine if a character falls into a character class
bbce6d69 3854 */
3855
76e3520e 3856STATIC bool
ffc61ed2 3857S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3858{
ffc61ed2 3859 char flags = ANYOF_FLAGS(n);
bbce6d69 3860 bool match = FALSE;
1aa99e6b 3861 UV c;
3568d838 3862 STRLEN len = 0;
1aa99e6b 3863
3568d838 3864 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3865
ffc61ed2
JH
3866 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3867 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3868 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3869 match = TRUE;
bbce6d69 3870 }
3568d838 3871 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3872 match = TRUE;
ffc61ed2
JH
3873 if (!match) {
3874 SV *sw = regclass_swash(n, TRUE, 0);
3875
3876 if (sw) {
3568d838 3877 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3878 match = TRUE;
3879 else if (flags & ANYOF_FOLD) {
3880 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3881
ffc61ed2
JH
3882 if (flags & ANYOF_LOCALE) {
3883 PL_reg_flags |= RF_tainted;
9041c2e3 3884 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3885 }
1aa99e6b 3886 else
9041c2e3 3887 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3888 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3889 match = TRUE;
3890 }
3891 }
bbce6d69 3892 }
3893 }
1aa99e6b 3894 if (!match && c < 256) {
ffc61ed2
JH
3895 if (ANYOF_BITMAP_TEST(n, c))
3896 match = TRUE;
3897 else if (flags & ANYOF_FOLD) {
3568d838 3898 I32 f;
a0ed51b3 3899
ffc61ed2
JH
3900 if (flags & ANYOF_LOCALE) {
3901 PL_reg_flags |= RF_tainted;
3902 f = PL_fold_locale[c];
3903 }
3904 else
3905 f = PL_fold[c];
3906 if (f != c && ANYOF_BITMAP_TEST(n, f))
3907 match = TRUE;
3908 }
3909
3910 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3911 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3912 if (
3913 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3914 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3915 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3916 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3917 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3918 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3919 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3920 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3921 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3933 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3943 ) /* How's that for a conditional? */
3944 {
3945 match = TRUE;
3946 }
a0ed51b3 3947 }
a0ed51b3
LW
3948 }
3949
a0ed51b3
LW
3950 return (flags & ANYOF_INVERT) ? !match : match;
3951}
161b471a 3952
dfe13c55 3953STATIC U8 *
cea2e8a9 3954S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3955{
1aa99e6b
IH
3956 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3957}
3958
3959STATIC U8 *
3960S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3961{
a0ed51b3 3962 if (off >= 0) {
1aa99e6b 3963 while (off-- && s < lim) {
ffc61ed2 3964 /* XXX could check well-formedness here */
a0ed51b3 3965 s += UTF8SKIP(s);
ffc61ed2 3966 }
a0ed51b3
LW
3967 }
3968 else {
3969 while (off++) {
1aa99e6b 3970 if (s > lim) {
a0ed51b3 3971 s--;
ffc61ed2 3972 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3973 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3974 s--;
ffc61ed2
JH
3975 }
3976 /* XXX could check well-formedness here */
a0ed51b3
LW
3977 }
3978 }
3979 }
3980 return s;
3981}
161b471a 3982
dfe13c55 3983STATIC U8 *
1aa99e6b 3984S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 3985{
1aa99e6b
IH
3986 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3987}
3988
3989STATIC U8 *
3990S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
3991{
3992 if (off >= 0) {
1aa99e6b 3993 while (off-- && s < lim) {
ffc61ed2 3994 /* XXX could check well-formedness here */
a0ed51b3 3995 s += UTF8SKIP(s);
ffc61ed2 3996 }
a0ed51b3
LW
3997 if (off >= 0)
3998 return 0;
3999 }
4000 else {
4001 while (off++) {
1aa99e6b 4002 if (s > lim) {
a0ed51b3 4003 s--;
ffc61ed2 4004 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4005 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4006 s--;
ffc61ed2
JH
4007 }
4008 /* XXX could check well-formedness here */
a0ed51b3
LW
4009 }
4010 else
4011 break;
4012 }
4013 if (off <= 0)
4014 return 0;
4015 }
4016 return s;
4017}
51371543
GS
4018
4019#ifdef PERL_OBJECT
51371543
GS
4020#include "XSUB.h"
4021#endif
4022
4023static void
4024restore_pos(pTHXo_ void *arg)
4025{
51371543
GS
4026 if (PL_reg_eval_set) {
4027 if (PL_reg_oldsaved) {
4028 PL_reg_re->subbeg = PL_reg_oldsaved;
4029 PL_reg_re->sublen = PL_reg_oldsavedlen;
4030 RX_MATCH_COPIED_on(PL_reg_re);
4031 }
4032 PL_reg_magic->mg_len = PL_reg_oldpos;
4033 PL_reg_eval_set = 0;
4034 PL_curpm = PL_reg_oldcurpm;
4035 }
4036}