This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate mainline
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
1aa99e6b
IH
119#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
125
8269fa76 126#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 127
8269fa76 128static void restore_pos(pTHXo_ void *arg);
51371543 129
76e3520e 130STATIC CHECKPOINT
cea2e8a9 131S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 132{
3280af22 133 int retval = PL_savestack_ix;
b1ce53c5
JH
134#define REGCP_PAREN_ELEMS 4
135 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
136 int p;
137
b1ce53c5
JH
138#define REGCP_OTHER_ELEMS 5
139 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 140 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 141/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
142 SSPUSHINT(PL_regendp[p]);
143 SSPUSHINT(PL_regstartp[p]);
3280af22 144 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
145 SSPUSHINT(p);
146 }
b1ce53c5 147/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
148 SSPUSHINT(PL_regsize);
149 SSPUSHINT(*PL_reglastparen);
150 SSPUSHPTR(PL_reginput);
41123dfd
JH
151#define REGCP_FRAME_ELEMS 2
152/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
153 * are needed for the regexp context stack bookkeeping. */
154 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 155 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 156
a0d0e21e
LW
157 return retval;
158}
159
c277df42 160/* These are needed since we do not localize EVAL nodes: */
02db2b7b 161# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 162 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 163 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 164
02db2b7b 165# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 166 PerlIO_printf(Perl_debug_log, \
faccc32b 167 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 168 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 169
76e3520e 170STATIC char *
cea2e8a9 171S_regcppop(pTHX)
a0d0e21e 172{
b1ce53c5 173 I32 i;
a0d0e21e
LW
174 U32 paren = 0;
175 char *input;
cf93c79d 176 I32 tmps;
b1ce53c5
JH
177
178 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 179 i = SSPOPINT;
b1ce53c5
JH
180 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
181 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 182 input = (char *) SSPOPPTR;
3280af22
NIS
183 *PL_reglastparen = SSPOPINT;
184 PL_regsize = SSPOPINT;
b1ce53c5
JH
185
186 /* Now restore the parentheses context. */
41123dfd
JH
187 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
188 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 189 paren = (U32)SSPOPINT;
3280af22 190 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
191 PL_regstartp[paren] = SSPOPINT;
192 tmps = SSPOPINT;
3280af22
NIS
193 if (paren <= *PL_reglastparen)
194 PL_regendp[paren] = tmps;
c277df42 195 DEBUG_r(
c3464db5 196 PerlIO_printf(Perl_debug_log,
b900a521 197 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 198 (UV)paren, (IV)PL_regstartp[paren],
b900a521 199 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 200 (IV)PL_regendp[paren],
3280af22 201 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 202 );
a0d0e21e 203 }
c277df42 204 DEBUG_r(
3280af22 205 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 206 PerlIO_printf(Perl_debug_log,
faccc32b
JH
207 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
208 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
209 }
210 );
daf18116 211#if 1
dafc8851
JH
212 /* It would seem that the similar code in regtry()
213 * already takes care of this, and in fact it is in
214 * a better location to since this code can #if 0-ed out
215 * but the code in regtry() is needed or otherwise tests
216 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
217 * (as of patchlevel 7877) will fail. Then again,
218 * this code seems to be necessary or otherwise
219 * building DynaLoader will fail:
220 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
221 * --jhi */
3280af22
NIS
222 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
223 if (paren > PL_regsize)
cf93c79d
IZ
224 PL_regstartp[paren] = -1;
225 PL_regendp[paren] = -1;
a0d0e21e 226 }
dafc8851 227#endif
a0d0e21e
LW
228 return input;
229}
230
0f5d15d6 231STATIC char *
cea2e8a9 232S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
233{
234 I32 tmp = PL_savestack_ix;
235
236 PL_savestack_ix = ss;
237 regcppop();
238 PL_savestack_ix = tmp;
942e002e 239 return Nullch;
0f5d15d6
IZ
240}
241
242typedef struct re_cc_state
243{
244 I32 ss;
245 regnode *node;
246 struct re_cc_state *prev;
247 CURCUR *cc;
248 regexp *re;
249} re_cc_state;
250
02db2b7b 251#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 252
29d1e993
HS
253#define TRYPAREN(paren, n, input) { \
254 if (paren) { \
255 if (n) { \
256 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
257 PL_regendp[paren] = input - PL_bostr; \
258 } \
259 else \
260 PL_regendp[paren] = -1; \
261 } \
262 if (regmatch(next)) \
263 sayYES; \
264 if (paren && n) \
265 PL_regendp[paren] = -1; \
266}
267
268
a687059c 269/*
e50aee73 270 * pregexec and friends
a687059c
LW
271 */
272
273/*
c277df42 274 - pregexec - match a regexp against a string
a687059c 275 */
c277df42 276I32
864dbfa3 277Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 278 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
279/* strend: pointer to null at end of string */
280/* strbeg: real beginning of string */
281/* minend: end of match must be >=minend after stringarg. */
282/* nosave: For optimizations. */
283{
284 return
9041c2e3 285 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
286 nosave ? 0 : REXEC_COPY_STR);
287}
0f5d15d6
IZ
288
289STATIC void
cea2e8a9 290S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
291{
292 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
293#ifdef DEBUGGING
294 PL_regprogram = prog->program;
295#endif
296 PL_regnpar = prog->nparens;
9041c2e3
NIS
297 PL_regdata = prog->data;
298 PL_reg_re = prog;
0f5d15d6 299}
22e551b9 300
9041c2e3 301/*
cad2e5aa
JH
302 * Need to implement the following flags for reg_anch:
303 *
304 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
305 * USE_INTUIT_ML
306 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
307 * INTUIT_AUTORITATIVE_ML
308 * INTUIT_ONCE_NOML - Intuit can match in one location only.
309 * INTUIT_ONCE_ML
310 *
311 * Another flag for this function: SECOND_TIME (so that float substrs
312 * with giant delta may be not rechecked).
313 */
314
315/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
316
2c2d71f5 317/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
318 Otherwise, only SvCUR(sv) is used to get strbeg. */
319
320/* XXXX We assume that strpos is strbeg unless sv. */
321
6eb5f6b9
JH
322/* XXXX Some places assume that there is a fixed substring.
323 An update may be needed if optimizer marks as "INTUITable"
324 RExen without fixed substrings. Similarly, it is assumed that
325 lengths of all the strings are no more than minlen, thus they
326 cannot come from lookahead.
327 (Or minlen should take into account lookahead.) */
328
2c2d71f5
JH
329/* A failure to find a constant substring means that there is no need to make
330 an expensive call to REx engine, thus we celebrate a failure. Similarly,
331 finding a substring too deep into the string means that less calls to
30944b6d
IZ
332 regtry() should be needed.
333
334 REx compiler's optimizer found 4 possible hints:
335 a) Anchored substring;
336 b) Fixed substring;
337 c) Whether we are anchored (beginning-of-line or \G);
338 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 339 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
340 string which does not contradict any of them.
341 */
2c2d71f5 342
6eb5f6b9
JH
343/* Most of decisions we do here should have been done at compile time.
344 The nodes of the REx which we used for the search should have been
345 deleted from the finite automaton. */
346
cad2e5aa
JH
347char *
348Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
349 char *strend, U32 flags, re_scream_pos_data *data)
350{
2c2d71f5 351 register I32 start_shift;
cad2e5aa 352 /* Should be nonnegative! */
2c2d71f5
JH
353 register I32 end_shift;
354 register char *s;
355 register SV *check;
a1933d95 356 char *strbeg;
cad2e5aa
JH
357 char *t;
358 I32 ml_anch;
2c2d71f5 359 char *tmp;
6eb5f6b9
JH
360 register char *other_last = Nullch; /* other substr checked before this */
361 char *check_at; /* check substr found at this pos */
30944b6d
IZ
362#ifdef DEBUGGING
363 char *i_strpos = strpos;
364#endif
cad2e5aa
JH
365
366 DEBUG_r( if (!PL_colorset) reginitcolors() );
367 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 368 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
369 PL_colors[4],PL_colors[5],PL_colors[0],
370 prog->precomp,
371 PL_colors[1],
372 (strlen(prog->precomp) > 60 ? "..." : ""),
373 PL_colors[0],
b900a521 374 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
375 strpos, PL_colors[1],
376 (strend - strpos > 60 ? "..." : ""))
377 );
378
1aa99e6b
IH
379 if (prog->reganch & ROPT_UTF8)
380 PL_reg_flags |= RF_utf8;
381
382 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 383 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 384 goto fail;
2c2d71f5 385 }
a1933d95 386 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 387 PL_regeol = strend;
653099ff 388 check = prog->check_substr;
2c2d71f5 389 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
390 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
391 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 392 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 393
5675c1a6
IZ
394 if (!ml_anch) {
395 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
396 /* SvCUR is not set on references: SvRV and SvPVX overlap */
397 && sv && !SvROK(sv)
a1933d95 398 && (strpos != strbeg)) {
5675c1a6
IZ
399 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
400 goto fail;
401 }
3baa4c62
JH
402 if (prog->check_offset_min == prog->check_offset_max &&
403 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 404 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
405 I32 slen;
406
1aa99e6b 407 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
408 if (SvTAIL(check)) {
409 slen = SvCUR(check); /* >= 1 */
cad2e5aa 410
9041c2e3 411 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
412 || (strend - s == slen && strend[-1] != '\n')) {
413 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
414 goto fail_finish;
cad2e5aa
JH
415 }
416 /* Now should match s[0..slen-2] */
417 slen--;
653099ff 418 if (slen && (*SvPVX(check) != *s
cad2e5aa 419 || (slen > 1
653099ff 420 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
421 report_neq:
422 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
423 goto fail_finish;
424 }
cad2e5aa 425 }
653099ff
GS
426 else if (*SvPVX(check) != *s
427 || ((slen = SvCUR(check)) > 1
428 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
429 goto report_neq;
430 goto success_at_start;
5675c1a6 431 }
cad2e5aa 432 }
2c2d71f5 433 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 434 s = strpos;
2c2d71f5 435 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 436 end_shift = prog->minlen - start_shift -
653099ff 437 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 438 if (!ml_anch) {
653099ff
GS
439 I32 end = prog->check_offset_max + CHR_SVLEN(check)
440 - (SvTAIL(check) != 0);
1aa99e6b 441 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
442
443 if (end_shift < eshift)
444 end_shift = eshift;
445 }
cad2e5aa 446 }
2c2d71f5 447 else { /* Can match at random position */
cad2e5aa
JH
448 ml_anch = 0;
449 s = strpos;
2c2d71f5
JH
450 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
451 /* Should be nonnegative! */
452 end_shift = prog->minlen - start_shift -
653099ff 453 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
454 }
455
2c2d71f5 456#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 457 if (end_shift < 0)
6bbae5e6 458 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
459#endif
460
2c2d71f5
JH
461 restart:
462 /* Find a possible match in the region s..strend by looking for
463 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 464 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
465 I32 p = -1; /* Internal iterator of scream. */
466 I32 *pp = data ? data->scream_pos : &p;
467
2c2d71f5
JH
468 if (PL_screamfirst[BmRARE(check)] >= 0
469 || ( BmRARE(check) == '\n'
470 && (BmPREVIOUS(check) == SvCUR(check) - 1)
471 && SvTAIL(check) ))
9041c2e3 472 s = screaminstr(sv, check,
2c2d71f5 473 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 474 else
2c2d71f5 475 goto fail_finish;
cad2e5aa
JH
476 if (data)
477 *data->scream_olds = s;
478 }
3baa4c62
JH
479 else if (prog->reganch & ROPT_SANY_SEEN)
480 s = fbm_instr((U8*)(s + start_shift),
481 (U8*)(strend - end_shift),
482 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 483 else
1aa99e6b
IH
484 s = fbm_instr(HOP3(s, start_shift, strend),
485 HOP3(strend, -end_shift, strbeg),
2c2d71f5 486 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
487
488 /* Update the count-of-usability, remove useless subpatterns,
489 unshift s. */
2c2d71f5
JH
490
491 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
492 (s ? "Found" : "Did not find"),
493 ((check == prog->anchored_substr) ? "anchored" : "floating"),
494 PL_colors[0],
7b0972df
JH
495 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
496 SvPVX(check),
2c2d71f5
JH
497 PL_colors[1], (SvTAIL(check) ? "$" : ""),
498 (s ? " at offset " : "...\n") ) );
499
500 if (!s)
501 goto fail_finish;
502
6eb5f6b9
JH
503 check_at = s;
504
2c2d71f5 505 /* Finish the diagnostic message */
30944b6d 506 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
507
508 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
509 Start with the other substr.
510 XXXX no SCREAM optimization yet - and a very coarse implementation
511 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
512 *always* match. Probably should be marked during compile...
513 Probably it is right to do no SCREAM here...
514 */
515
516 if (prog->float_substr && prog->anchored_substr) {
30944b6d 517 /* Take into account the "other" substring. */
2c2d71f5
JH
518 /* XXXX May be hopelessly wrong for UTF... */
519 if (!other_last)
6eb5f6b9 520 other_last = strpos;
2c2d71f5 521 if (check == prog->float_substr) {
30944b6d
IZ
522 do_other_anchored:
523 {
1aa99e6b 524 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
525 char *s1 = s;
526
2c2d71f5
JH
527 t = s - prog->check_offset_max;
528 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
529 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 530 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 531 && t > strpos)))
30944b6d 532 /* EMPTY */;
2c2d71f5
JH
533 else
534 t = strpos;
1aa99e6b 535 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
536 if (t < other_last) /* These positions already checked */
537 t = other_last;
1aa99e6b 538 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
539 if (last < last1)
540 last1 = last;
541 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
542 /* On end-of-str: see comment below. */
543 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
544 HOP3(HOP3(last1, prog->anchored_offset, strend)
545 + SvCUR(prog->anchored_substr),
546 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
547 prog->anchored_substr,
548 PL_multiline ? FBMrf_MULTILINE : 0);
549 DEBUG_r(PerlIO_printf(Perl_debug_log,
550 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
551 (s ? "Found" : "Contradicts"),
552 PL_colors[0],
7b0972df
JH
553 (int)(SvCUR(prog->anchored_substr)
554 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
555 SvPVX(prog->anchored_substr),
556 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
557 if (!s) {
558 if (last1 >= last2) {
559 DEBUG_r(PerlIO_printf(Perl_debug_log,
560 ", giving up...\n"));
561 goto fail_finish;
562 }
563 DEBUG_r(PerlIO_printf(Perl_debug_log,
564 ", trying floating at offset %ld...\n",
1aa99e6b
IH
565 (long)(HOP3c(s1, 1, strend) - i_strpos)));
566 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
567 s = HOP3c(last, 1, strend);
2c2d71f5
JH
568 goto restart;
569 }
570 else {
571 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 572 (long)(s - i_strpos)));
1aa99e6b
IH
573 t = HOP3c(s, -prog->anchored_offset, strbeg);
574 other_last = HOP3c(s, 1, strend);
30944b6d 575 s = s1;
2c2d71f5
JH
576 if (t == strpos)
577 goto try_at_start;
2c2d71f5
JH
578 goto try_at_offset;
579 }
30944b6d 580 }
2c2d71f5
JH
581 }
582 else { /* Take into account the floating substring. */
583 char *last, *last1;
584 char *s1 = s;
585
1aa99e6b
IH
586 t = HOP3c(s, -start_shift, strbeg);
587 last1 = last =
588 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
589 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
590 last = HOP3c(t, prog->float_max_offset, strend);
591 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
592 if (s < other_last)
593 s = other_last;
2c2d71f5
JH
594 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
595 /* fbm_instr() takes into account exact value of end-of-str
596 if the check is SvTAIL(ed). Since false positives are OK,
597 and end-of-str is not later than strend we are OK. */
598 s = fbm_instr((unsigned char*)s,
599 (unsigned char*)last + SvCUR(prog->float_substr)
600 - (SvTAIL(prog->float_substr)!=0),
601 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
602 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
603 (s ? "Found" : "Contradicts"),
604 PL_colors[0],
7b0972df
JH
605 (int)(SvCUR(prog->float_substr)
606 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
607 SvPVX(prog->float_substr),
608 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
609 if (!s) {
610 if (last1 == last) {
611 DEBUG_r(PerlIO_printf(Perl_debug_log,
612 ", giving up...\n"));
613 goto fail_finish;
614 }
615 DEBUG_r(PerlIO_printf(Perl_debug_log,
616 ", trying anchored starting at offset %ld...\n",
30944b6d 617 (long)(s1 + 1 - i_strpos)));
803ff556 618 other_last = last;
1aa99e6b 619 s = HOP3c(t, 1, strend);
2c2d71f5
JH
620 goto restart;
621 }
622 else {
623 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 624 (long)(s - i_strpos)));
803ff556 625 other_last = s; /* Fix this later. --Hugo */
30944b6d 626 s = s1;
2c2d71f5
JH
627 if (t == strpos)
628 goto try_at_start;
2c2d71f5
JH
629 goto try_at_offset;
630 }
631 }
cad2e5aa 632 }
2c2d71f5
JH
633
634 t = s - prog->check_offset_max;
2c2d71f5
JH
635 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
636 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
637 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
638 && t > strpos))) {
2c2d71f5
JH
639 /* Fixed substring is found far enough so that the match
640 cannot start at strpos. */
641 try_at_offset:
cad2e5aa 642 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
643 /* Eventually fbm_*() should handle this, but often
644 anchored_offset is not 0, so this check will not be wasted. */
645 /* XXXX In the code below we prefer to look for "^" even in
646 presence of anchored substrings. And we search even
647 beyond the found float position. These pessimizations
648 are historical artefacts only. */
649 find_anchor:
2c2d71f5 650 while (t < strend - prog->minlen) {
cad2e5aa 651 if (*t == '\n') {
4ee3650e 652 if (t < check_at - prog->check_offset_min) {
30944b6d 653 if (prog->anchored_substr) {
4ee3650e
GS
654 /* Since we moved from the found position,
655 we definitely contradict the found anchored
30944b6d
IZ
656 substr. Due to the above check we do not
657 contradict "check" substr.
658 Thus we can arrive here only if check substr
659 is float. Redo checking for "other"=="fixed".
660 */
9041c2e3 661 strpos = t + 1;
30944b6d
IZ
662 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
663 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
664 goto do_other_anchored;
665 }
4ee3650e
GS
666 /* We don't contradict the found floating substring. */
667 /* XXXX Why not check for STCLASS? */
cad2e5aa 668 s = t + 1;
2c2d71f5 669 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 670 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
671 goto set_useful;
672 }
4ee3650e
GS
673 /* Position contradicts check-string */
674 /* XXXX probably better to look for check-string
675 than for "\n", so one should lower the limit for t? */
676 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 677 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 678 other_last = strpos = s = t + 1;
cad2e5aa
JH
679 goto restart;
680 }
681 t++;
682 }
2c2d71f5
JH
683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
684 PL_colors[0],PL_colors[1]));
685 goto fail_finish;
cad2e5aa 686 }
f5952150
GS
687 else {
688 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
689 PL_colors[0],PL_colors[1]));
690 }
cad2e5aa
JH
691 s = t;
692 set_useful:
2c2d71f5 693 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
694 }
695 else {
f5952150 696 /* The found string does not prohibit matching at strpos,
2c2d71f5 697 - no optimization of calling REx engine can be performed,
f5952150
GS
698 unless it was an MBOL and we are not after MBOL,
699 or a future STCLASS check will fail this. */
2c2d71f5
JH
700 try_at_start:
701 /* Even in this situation we may use MBOL flag if strpos is offset
702 wrt the start of the string. */
05b4157f 703 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 704 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
705 /* May be due to an implicit anchor of m{.*foo} */
706 && !(prog->reganch & ROPT_IMPLICIT))
707 {
cad2e5aa
JH
708 t = strpos;
709 goto find_anchor;
710 }
30944b6d 711 DEBUG_r( if (ml_anch)
f5952150
GS
712 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
713 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 714 );
2c2d71f5 715 success_at_start:
30944b6d 716 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 717 && prog->check_substr /* Could be deleted already */
cad2e5aa 718 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
719 && prog->check_substr == prog->float_substr)
720 {
cad2e5aa 721 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 722 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
723 SvREFCNT_dec(prog->check_substr);
724 prog->check_substr = Nullsv; /* disable */
725 prog->float_substr = Nullsv; /* clear */
5e39e1e5 726 check = Nullsv; /* abort */
cad2e5aa 727 s = strpos;
3cf5c195
IZ
728 /* XXXX This is a remnant of the old implementation. It
729 looks wasteful, since now INTUIT can use many
6eb5f6b9 730 other heuristics. */
cad2e5aa
JH
731 prog->reganch &= ~RE_USE_INTUIT;
732 }
733 else
734 s = strpos;
735 }
736
6eb5f6b9
JH
737 /* Last resort... */
738 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
739 if (prog->regstclass) {
740 /* minlen == 0 is possible if regstclass is \b or \B,
741 and the fixed substr is ''$.
742 Since minlen is already taken into account, s+1 is before strend;
743 accidentally, minlen >= 1 guaranties no false positives at s + 1
744 even for \b or \B. But (minlen? 1 : 0) below assumes that
745 regstclass does not come from lookahead... */
746 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
747 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 748 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 749 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 750 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 751 : 1);
6eb5f6b9 752 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
753 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
754 : (prog->float_substr
755 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
756 cl_l, strend)
757 : strend);
a1933d95 758 char *startpos = strbeg;
6eb5f6b9
JH
759
760 t = s;
76384e4a 761 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 762 PL_regdata = prog->data;
76384e4a
GS
763 PL_bostr = startpos;
764 }
6eb5f6b9
JH
765 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
766 if (!s) {
767#ifdef DEBUGGING
768 char *what;
769#endif
770 if (endpos == strend) {
771 DEBUG_r( PerlIO_printf(Perl_debug_log,
772 "Could not match STCLASS...\n") );
773 goto fail;
774 }
66e933ab
GS
775 DEBUG_r( PerlIO_printf(Perl_debug_log,
776 "This position contradicts STCLASS...\n") );
653099ff
GS
777 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
778 goto fail;
6eb5f6b9
JH
779 /* Contradict one of substrings */
780 if (prog->anchored_substr) {
6eb5f6b9
JH
781 if (prog->anchored_substr == check) {
782 DEBUG_r( what = "anchored" );
783 hop_and_restart:
1aa99e6b 784 s = HOP3c(t, 1, strend);
66e933ab
GS
785 if (s + start_shift + end_shift > strend) {
786 /* XXXX Should be taken into account earlier? */
787 DEBUG_r( PerlIO_printf(Perl_debug_log,
788 "Could not match STCLASS...\n") );
789 goto fail;
790 }
5e39e1e5
HS
791 if (!check)
792 goto giveup;
6eb5f6b9 793 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 794 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
795 what, (long)(s + start_shift - i_strpos)) );
796 goto restart;
797 }
66e933ab 798 /* Have both, check_string is floating */
6eb5f6b9
JH
799 if (t + start_shift >= check_at) /* Contradicts floating=check */
800 goto retry_floating_check;
801 /* Recheck anchored substring, but not floating... */
9041c2e3 802 s = check_at;
5e39e1e5
HS
803 if (!check)
804 goto giveup;
6eb5f6b9 805 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 806 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
807 (long)(other_last - i_strpos)) );
808 goto do_other_anchored;
809 }
60e71179
GS
810 /* Another way we could have checked stclass at the
811 current position only: */
812 if (ml_anch) {
813 s = t = t + 1;
5e39e1e5
HS
814 if (!check)
815 goto giveup;
60e71179 816 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
817 "Looking for /%s^%s/m starting at offset %ld...\n",
818 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 819 goto try_at_offset;
66e933ab 820 }
60e71179
GS
821 if (!prog->float_substr) /* Could have been deleted */
822 goto fail;
6eb5f6b9
JH
823 /* Check is floating subtring. */
824 retry_floating_check:
825 t = check_at - start_shift;
826 DEBUG_r( what = "floating" );
827 goto hop_and_restart;
828 }
829 DEBUG_r( if (t != s)
9041c2e3 830 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
831 "By STCLASS: moving %ld --> %ld\n",
832 (long)(t - i_strpos), (long)(s - i_strpos));
833 else
9041c2e3 834 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
835 "Does not contradict STCLASS...\n") );
836 }
5e39e1e5
HS
837 giveup:
838 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
839 PL_colors[4], (check ? "Guessed" : "Giving up"),
840 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 841 return s;
2c2d71f5
JH
842
843 fail_finish: /* Substring not found */
66e933ab
GS
844 if (prog->check_substr) /* could be removed already */
845 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 846 fail:
2c2d71f5 847 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
848 PL_colors[4],PL_colors[5]));
849 return Nullch;
850}
9661b544 851
6eb5f6b9 852/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
853STATIC char *
854S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 855{
6eb5f6b9
JH
856 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
857 char *m;
d8093b23
G
858 STRLEN ln;
859 unsigned int c1;
860 unsigned int c2;
6eb5f6b9
JH
861 char *e;
862 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 863 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 864
6eb5f6b9
JH
865 /* We know what class it must start with. */
866 switch (OP(c)) {
6eb5f6b9 867 case ANYOF:
a0ed51b3 868 while (s < strend) {
ffc61ed2 869 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
870 if (tmp && (norun || regtry(prog, s)))
871 goto got_it;
872 else
873 tmp = doevery;
a0ed51b3 874 }
6eb5f6b9
JH
875 else
876 tmp = 1;
ffc61ed2 877 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 878 }
6eb5f6b9
JH
879 break;
880 case EXACTF:
881 m = STRING(c);
882 ln = STR_LEN(c);
1aa99e6b
IH
883 if (UTF) {
884 c1 = to_utf8_lower((U8*)m);
885 c2 = to_utf8_upper((U8*)m);
886 }
887 else {
888 c1 = *(U8*)m;
889 c2 = PL_fold[c1];
890 }
6eb5f6b9
JH
891 goto do_exactf;
892 case EXACTFL:
893 m = STRING(c);
894 ln = STR_LEN(c);
d8093b23 895 c1 = *(U8*)m;
6eb5f6b9
JH
896 c2 = PL_fold_locale[c1];
897 do_exactf:
898 e = strend - ln;
b3c9acc1 899
6eb5f6b9
JH
900 if (norun && e < s)
901 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
902
903 if (do_utf8) {
904 STRLEN len;
905 if (c1 == c2)
906 while (s <= e) {
9041c2e3 907 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
908 && regtry(prog, s) )
909 goto got_it;
910 s += len;
911 }
912 else
913 while (s <= e) {
9041c2e3 914 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
915 if ( (c == c1 || c == c2) && regtry(prog, s) )
916 goto got_it;
917 s += len;
918 }
919 }
920 else {
921 if (c1 == c2)
922 while (s <= e) {
923 if ( *(U8*)s == c1
924 && (ln == 1 || !(OP(c) == EXACTF
925 ? ibcmp(s, m, ln)
926 : ibcmp_locale(s, m, ln)))
927 && (norun || regtry(prog, s)) )
928 goto got_it;
929 s++;
930 }
931 else
932 while (s <= e) {
933 if ( (*(U8*)s == c1 || *(U8*)s == c2)
934 && (ln == 1 || !(OP(c) == EXACTF
935 ? ibcmp(s, m, ln)
936 : ibcmp_locale(s, m, ln)))
937 && (norun || regtry(prog, s)) )
938 goto got_it;
939 s++;
940 }
b3c9acc1
IZ
941 }
942 break;
bbce6d69 943 case BOUNDL:
3280af22 944 PL_reg_flags |= RF_tainted;
bbce6d69 945 /* FALL THROUGH */
a0d0e21e 946 case BOUND:
ffc61ed2
JH
947 if (do_utf8) {
948 if (s == startpos)
949 tmp = '\n';
950 else {
1aa99e6b 951 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 952
2b9d42f0 953 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
954 }
955 tmp = ((OP(c) == BOUND ?
9041c2e3 956 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 957 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
958 while (s < strend) {
959 if (tmp == !(OP(c) == BOUND ?
960 swash_fetch(PL_utf8_alnum, (U8*)s) :
961 isALNUM_LC_utf8((U8*)s)))
962 {
963 tmp = !tmp;
964 if ((norun || regtry(prog, s)))
965 goto got_it;
966 }
967 s += UTF8SKIP(s);
a687059c 968 }
a0d0e21e 969 }
667bb95a 970 else {
ffc61ed2
JH
971 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
972 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
973 while (s < strend) {
974 if (tmp ==
975 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
976 tmp = !tmp;
977 if ((norun || regtry(prog, s)))
978 goto got_it;
979 }
980 s++;
a0ed51b3 981 }
a0ed51b3 982 }
6eb5f6b9 983 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
984 goto got_it;
985 break;
bbce6d69 986 case NBOUNDL:
3280af22 987 PL_reg_flags |= RF_tainted;
bbce6d69 988 /* FALL THROUGH */
a0d0e21e 989 case NBOUND:
ffc61ed2
JH
990 if (do_utf8) {
991 if (s == startpos)
992 tmp = '\n';
993 else {
1aa99e6b 994 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 995
2b9d42f0 996 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
997 }
998 tmp = ((OP(c) == NBOUND ?
9041c2e3 999 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1000 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1001 while (s < strend) {
1002 if (tmp == !(OP(c) == NBOUND ?
1003 swash_fetch(PL_utf8_alnum, (U8*)s) :
1004 isALNUM_LC_utf8((U8*)s)))
1005 tmp = !tmp;
1006 else if ((norun || regtry(prog, s)))
1007 goto got_it;
1008 s += UTF8SKIP(s);
1009 }
a0d0e21e 1010 }
667bb95a 1011 else {
ffc61ed2
JH
1012 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
1013 tmp = ((OP(c) == NBOUND ?
1014 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1015 while (s < strend) {
1016 if (tmp ==
1017 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1018 tmp = !tmp;
1019 else if ((norun || regtry(prog, s)))
1020 goto got_it;
1021 s++;
1022 }
a0ed51b3 1023 }
6eb5f6b9 1024 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1025 goto got_it;
1026 break;
a0d0e21e 1027 case ALNUM:
ffc61ed2 1028 if (do_utf8) {
8269fa76 1029 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1030 while (s < strend) {
1031 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1032 if (tmp && (norun || regtry(prog, s)))
1033 goto got_it;
1034 else
1035 tmp = doevery;
1036 }
bbce6d69 1037 else
ffc61ed2
JH
1038 tmp = 1;
1039 s += UTF8SKIP(s);
bbce6d69 1040 }
bbce6d69 1041 }
ffc61ed2
JH
1042 else {
1043 while (s < strend) {
1044 if (isALNUM(*s)) {
1045 if (tmp && (norun || regtry(prog, s)))
1046 goto got_it;
1047 else
1048 tmp = doevery;
1049 }
a0ed51b3 1050 else
ffc61ed2
JH
1051 tmp = 1;
1052 s++;
a0ed51b3 1053 }
a0ed51b3
LW
1054 }
1055 break;
bbce6d69 1056 case ALNUML:
3280af22 1057 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1058 if (do_utf8) {
1059 while (s < strend) {
1060 if (isALNUM_LC_utf8((U8*)s)) {
1061 if (tmp && (norun || regtry(prog, s)))
1062 goto got_it;
1063 else
1064 tmp = doevery;
1065 }
a687059c 1066 else
ffc61ed2
JH
1067 tmp = 1;
1068 s += UTF8SKIP(s);
a0d0e21e 1069 }
a0d0e21e 1070 }
ffc61ed2
JH
1071 else {
1072 while (s < strend) {
1073 if (isALNUM_LC(*s)) {
1074 if (tmp && (norun || regtry(prog, s)))
1075 goto got_it;
1076 else
1077 tmp = doevery;
1078 }
a0ed51b3 1079 else
ffc61ed2
JH
1080 tmp = 1;
1081 s++;
a0ed51b3 1082 }
a0ed51b3
LW
1083 }
1084 break;
a0d0e21e 1085 case NALNUM:
ffc61ed2 1086 if (do_utf8) {
8269fa76 1087 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1088 while (s < strend) {
1089 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1090 if (tmp && (norun || regtry(prog, s)))
1091 goto got_it;
1092 else
1093 tmp = doevery;
1094 }
bbce6d69 1095 else
ffc61ed2
JH
1096 tmp = 1;
1097 s += UTF8SKIP(s);
bbce6d69 1098 }
bbce6d69 1099 }
ffc61ed2
JH
1100 else {
1101 while (s < strend) {
1102 if (!isALNUM(*s)) {
1103 if (tmp && (norun || regtry(prog, s)))
1104 goto got_it;
1105 else
1106 tmp = doevery;
1107 }
a0ed51b3 1108 else
ffc61ed2
JH
1109 tmp = 1;
1110 s++;
a0ed51b3 1111 }
a0ed51b3
LW
1112 }
1113 break;
bbce6d69 1114 case NALNUML:
3280af22 1115 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1116 if (do_utf8) {
1117 while (s < strend) {
1118 if (!isALNUM_LC_utf8((U8*)s)) {
1119 if (tmp && (norun || regtry(prog, s)))
1120 goto got_it;
1121 else
1122 tmp = doevery;
1123 }
a687059c 1124 else
ffc61ed2
JH
1125 tmp = 1;
1126 s += UTF8SKIP(s);
a687059c 1127 }
a0d0e21e 1128 }
ffc61ed2
JH
1129 else {
1130 while (s < strend) {
1131 if (!isALNUM_LC(*s)) {
1132 if (tmp && (norun || regtry(prog, s)))
1133 goto got_it;
1134 else
1135 tmp = doevery;
1136 }
a0ed51b3 1137 else
ffc61ed2
JH
1138 tmp = 1;
1139 s++;
a0ed51b3 1140 }
a0ed51b3
LW
1141 }
1142 break;
a0d0e21e 1143 case SPACE:
ffc61ed2 1144 if (do_utf8) {
8269fa76 1145 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2
JH
1146 while (s < strend) {
1147 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1148 if (tmp && (norun || regtry(prog, s)))
1149 goto got_it;
1150 else
1151 tmp = doevery;
1152 }
a0d0e21e 1153 else
ffc61ed2
JH
1154 tmp = 1;
1155 s += UTF8SKIP(s);
2304df62 1156 }
a0d0e21e 1157 }
ffc61ed2
JH
1158 else {
1159 while (s < strend) {
1160 if (isSPACE(*s)) {
1161 if (tmp && (norun || regtry(prog, s)))
1162 goto got_it;
1163 else
1164 tmp = doevery;
1165 }
a0ed51b3 1166 else
ffc61ed2
JH
1167 tmp = 1;
1168 s++;
a0ed51b3 1169 }
a0ed51b3
LW
1170 }
1171 break;
bbce6d69 1172 case SPACEL:
3280af22 1173 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1174 if (do_utf8) {
1175 while (s < strend) {
1176 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1177 if (tmp && (norun || regtry(prog, s)))
1178 goto got_it;
1179 else
1180 tmp = doevery;
1181 }
bbce6d69 1182 else
ffc61ed2
JH
1183 tmp = 1;
1184 s += UTF8SKIP(s);
bbce6d69 1185 }
bbce6d69 1186 }
ffc61ed2
JH
1187 else {
1188 while (s < strend) {
1189 if (isSPACE_LC(*s)) {
1190 if (tmp && (norun || regtry(prog, s)))
1191 goto got_it;
1192 else
1193 tmp = doevery;
1194 }
a0ed51b3 1195 else
ffc61ed2
JH
1196 tmp = 1;
1197 s++;
a0ed51b3 1198 }
a0ed51b3
LW
1199 }
1200 break;
a0d0e21e 1201 case NSPACE:
ffc61ed2 1202 if (do_utf8) {
8269fa76 1203 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2
JH
1204 while (s < strend) {
1205 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1206 if (tmp && (norun || regtry(prog, s)))
1207 goto got_it;
1208 else
1209 tmp = doevery;
1210 }
a0d0e21e 1211 else
ffc61ed2
JH
1212 tmp = 1;
1213 s += UTF8SKIP(s);
a687059c 1214 }
a0d0e21e 1215 }
ffc61ed2
JH
1216 else {
1217 while (s < strend) {
1218 if (!isSPACE(*s)) {
1219 if (tmp && (norun || regtry(prog, s)))
1220 goto got_it;
1221 else
1222 tmp = doevery;
1223 }
a0ed51b3 1224 else
ffc61ed2
JH
1225 tmp = 1;
1226 s++;
a0ed51b3 1227 }
a0ed51b3
LW
1228 }
1229 break;
bbce6d69 1230 case NSPACEL:
3280af22 1231 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1232 if (do_utf8) {
1233 while (s < strend) {
1234 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1235 if (tmp && (norun || regtry(prog, s)))
1236 goto got_it;
1237 else
1238 tmp = doevery;
1239 }
bbce6d69 1240 else
ffc61ed2
JH
1241 tmp = 1;
1242 s += UTF8SKIP(s);
bbce6d69 1243 }
bbce6d69 1244 }
ffc61ed2
JH
1245 else {
1246 while (s < strend) {
1247 if (!isSPACE_LC(*s)) {
1248 if (tmp && (norun || regtry(prog, s)))
1249 goto got_it;
1250 else
1251 tmp = doevery;
1252 }
a0ed51b3 1253 else
ffc61ed2
JH
1254 tmp = 1;
1255 s++;
a0ed51b3 1256 }
a0ed51b3
LW
1257 }
1258 break;
a0d0e21e 1259 case DIGIT:
ffc61ed2 1260 if (do_utf8) {
8269fa76 1261 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2
JH
1262 while (s < strend) {
1263 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1264 if (tmp && (norun || regtry(prog, s)))
1265 goto got_it;
1266 else
1267 tmp = doevery;
1268 }
a0d0e21e 1269 else
ffc61ed2
JH
1270 tmp = 1;
1271 s += UTF8SKIP(s);
2b69d0c2 1272 }
a0d0e21e 1273 }
ffc61ed2
JH
1274 else {
1275 while (s < strend) {
1276 if (isDIGIT(*s)) {
1277 if (tmp && (norun || regtry(prog, s)))
1278 goto got_it;
1279 else
1280 tmp = doevery;
1281 }
a0ed51b3 1282 else
ffc61ed2
JH
1283 tmp = 1;
1284 s++;
a0ed51b3 1285 }
a0ed51b3
LW
1286 }
1287 break;
b8c5462f
JH
1288 case DIGITL:
1289 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1290 if (do_utf8) {
1291 while (s < strend) {
1292 if (isDIGIT_LC_utf8((U8*)s)) {
1293 if (tmp && (norun || regtry(prog, s)))
1294 goto got_it;
1295 else
1296 tmp = doevery;
1297 }
b8c5462f 1298 else
ffc61ed2
JH
1299 tmp = 1;
1300 s += UTF8SKIP(s);
b8c5462f 1301 }
b8c5462f 1302 }
ffc61ed2
JH
1303 else {
1304 while (s < strend) {
1305 if (isDIGIT_LC(*s)) {
1306 if (tmp && (norun || regtry(prog, s)))
1307 goto got_it;
1308 else
1309 tmp = doevery;
1310 }
b8c5462f 1311 else
ffc61ed2
JH
1312 tmp = 1;
1313 s++;
b8c5462f 1314 }
b8c5462f
JH
1315 }
1316 break;
a0d0e21e 1317 case NDIGIT:
ffc61ed2 1318 if (do_utf8) {
8269fa76 1319 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2
JH
1320 while (s < strend) {
1321 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1322 if (tmp && (norun || regtry(prog, s)))
1323 goto got_it;
1324 else
1325 tmp = doevery;
1326 }
a0d0e21e 1327 else
ffc61ed2
JH
1328 tmp = 1;
1329 s += UTF8SKIP(s);
a687059c 1330 }
a0d0e21e 1331 }
ffc61ed2
JH
1332 else {
1333 while (s < strend) {
1334 if (!isDIGIT(*s)) {
1335 if (tmp && (norun || regtry(prog, s)))
1336 goto got_it;
1337 else
1338 tmp = doevery;
1339 }
a0ed51b3 1340 else
ffc61ed2
JH
1341 tmp = 1;
1342 s++;
a0ed51b3 1343 }
a0ed51b3
LW
1344 }
1345 break;
b8c5462f
JH
1346 case NDIGITL:
1347 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1348 if (do_utf8) {
1349 while (s < strend) {
1350 if (!isDIGIT_LC_utf8((U8*)s)) {
1351 if (tmp && (norun || regtry(prog, s)))
1352 goto got_it;
1353 else
1354 tmp = doevery;
1355 }
b8c5462f 1356 else
ffc61ed2
JH
1357 tmp = 1;
1358 s += UTF8SKIP(s);
b8c5462f 1359 }
a0ed51b3 1360 }
ffc61ed2
JH
1361 else {
1362 while (s < strend) {
1363 if (!isDIGIT_LC(*s)) {
1364 if (tmp && (norun || regtry(prog, s)))
1365 goto got_it;
1366 else
1367 tmp = doevery;
1368 }
cf93c79d 1369 else
ffc61ed2
JH
1370 tmp = 1;
1371 s++;
b8c5462f 1372 }
b8c5462f
JH
1373 }
1374 break;
b3c9acc1 1375 default:
3c3eec57
GS
1376 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1377 break;
d6a28714 1378 }
6eb5f6b9
JH
1379 return 0;
1380 got_it:
1381 return s;
1382}
1383
1384/*
1385 - regexec_flags - match a regexp against a string
1386 */
1387I32
1388Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1389 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1390/* strend: pointer to null at end of string */
1391/* strbeg: real beginning of string */
1392/* minend: end of match must be >=minend after stringarg. */
1393/* data: May be used for some additional optimizations. */
1394/* nosave: For optimizations. */
1395{
6eb5f6b9
JH
1396 register char *s;
1397 register regnode *c;
1398 register char *startpos = stringarg;
6eb5f6b9
JH
1399 I32 minlen; /* must match at least this many chars */
1400 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1401 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1402 constant substr. */ /* CC */
1403 I32 end_shift = 0; /* Same for the end. */ /* CC */
1404 I32 scream_pos = -1; /* Internal iterator of scream. */
1405 char *scream_olds;
1406 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1407 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1408
1409 PL_regcc = 0;
1410
1411 cache_re(prog);
1412#ifdef DEBUGGING
aea4f609 1413 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1414#endif
1415
1416 /* Be paranoid... */
1417 if (prog == NULL || startpos == NULL) {
1418 Perl_croak(aTHX_ "NULL regexp parameter");
1419 return 0;
1420 }
1421
1422 minlen = prog->minlen;
1aa99e6b 1423 if (do_utf8) {
3baa4c62
JH
1424 if (!(prog->reganch & ROPT_SANY_SEEN))
1425 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1426 }
1427 else {
1428 if (strend - startpos < minlen) goto phooey;
1429 }
6eb5f6b9
JH
1430
1431 if (startpos == strbeg) /* is ^ valid at stringarg? */
1432 PL_regprev = '\n';
1433 else {
1aa99e6b
IH
1434 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1435 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
2b9d42f0 1436 PL_regprev = utf8n_to_uvchr(s, (U8*)stringarg - s, NULL, 0);
1aa99e6b
IH
1437 }
1438 else
1439 PL_regprev = (U32)stringarg[-1];
6eb5f6b9
JH
1440 if (!PL_multiline && PL_regprev == '\n')
1441 PL_regprev = '\0'; /* force ^ to NOT match */
1442 }
1443
1444 /* Check validity of program. */
1445 if (UCHARAT(prog->program) != REG_MAGIC) {
1446 Perl_croak(aTHX_ "corrupted regexp program");
1447 }
1448
1449 PL_reg_flags = 0;
1450 PL_reg_eval_set = 0;
1451 PL_reg_maxiter = 0;
1452
1453 if (prog->reganch & ROPT_UTF8)
1454 PL_reg_flags |= RF_utf8;
1455
1456 /* Mark beginning of line for ^ and lookbehind. */
1457 PL_regbol = startpos;
1458 PL_bostr = strbeg;
1459 PL_reg_sv = sv;
1460
1461 /* Mark end of line for $ (and such) */
1462 PL_regeol = strend;
1463
1464 /* see how far we have to get to not match where we matched before */
1465 PL_regtill = startpos+minend;
1466
1467 /* We start without call_cc context. */
1468 PL_reg_call_cc = 0;
1469
1470 /* If there is a "must appear" string, look for it. */
1471 s = startpos;
1472
1473 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1474 MAGIC *mg;
1475
1476 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1477 PL_reg_ganch = startpos;
1478 else if (sv && SvTYPE(sv) >= SVt_PVMG
1479 && SvMAGIC(sv)
1480 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1481 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1482 if (prog->reganch & ROPT_ANCH_GPOS) {
1483 if (s > PL_reg_ganch)
1484 goto phooey;
1485 s = PL_reg_ganch;
1486 }
1487 }
1488 else /* pos() not defined */
1489 PL_reg_ganch = strbeg;
1490 }
1491
699c3c34
JH
1492 if (do_utf8 == (UTF!=0) &&
1493 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1494 re_scream_pos_data d;
1495
1496 d.scream_olds = &scream_olds;
1497 d.scream_pos = &scream_pos;
1498 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1499 if (!s)
1500 goto phooey; /* not present */
1501 }
1502
1503 DEBUG_r( if (!PL_colorset) reginitcolors() );
1504 DEBUG_r(PerlIO_printf(Perl_debug_log,
1505 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1506 PL_colors[4],PL_colors[5],PL_colors[0],
1507 prog->precomp,
1508 PL_colors[1],
1509 (strlen(prog->precomp) > 60 ? "..." : ""),
1510 PL_colors[0],
1511 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1512 startpos, PL_colors[1],
1513 (strend - startpos > 60 ? "..." : ""))
1514 );
1515
1516 /* Simplest case: anchored match need be tried only once. */
1517 /* [unless only anchor is BOL and multiline is set] */
1518 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1519 if (s == startpos && regtry(prog, startpos))
1520 goto got_it;
1521 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1522 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1523 {
1524 char *end;
1525
1526 if (minlen)
1527 dontbother = minlen - 1;
1aa99e6b 1528 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1529 /* for multiline we only have to try after newlines */
1530 if (prog->check_substr) {
1531 if (s == startpos)
1532 goto after_try;
1533 while (1) {
1534 if (regtry(prog, s))
1535 goto got_it;
1536 after_try:
1537 if (s >= end)
1538 goto phooey;
1539 if (prog->reganch & RE_USE_INTUIT) {
1540 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1541 if (!s)
1542 goto phooey;
1543 }
1544 else
1545 s++;
1546 }
1547 } else {
1548 if (s > startpos)
1549 s--;
1550 while (s < end) {
1551 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1552 if (regtry(prog, s))
1553 goto got_it;
1554 }
1555 }
1556 }
1557 }
1558 goto phooey;
1559 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1560 if (regtry(prog, PL_reg_ganch))
1561 goto got_it;
1562 goto phooey;
1563 }
1564
1565 /* Messy cases: unanchored match. */
9041c2e3 1566 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1567 /* we have /x+whatever/ */
1568 /* it must be a one character string (XXXX Except UTF?) */
1569 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1570#ifdef DEBUGGING
1571 int did_match = 0;
1572#endif
1573
1aa99e6b 1574 if (do_utf8) {
6eb5f6b9
JH
1575 while (s < strend) {
1576 if (*s == ch) {
bf93d4cc 1577 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1578 if (regtry(prog, s)) goto got_it;
1579 s += UTF8SKIP(s);
1580 while (s < strend && *s == ch)
1581 s += UTF8SKIP(s);
1582 }
1583 s += UTF8SKIP(s);
1584 }
1585 }
1586 else {
1587 while (s < strend) {
1588 if (*s == ch) {
bf93d4cc 1589 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1590 if (regtry(prog, s)) goto got_it;
1591 s++;
1592 while (s < strend && *s == ch)
1593 s++;
1594 }
1595 s++;
1596 }
1597 }
bf93d4cc
GS
1598 DEBUG_r(did_match ||
1599 PerlIO_printf(Perl_debug_log,
1600 "Did not find anchored character...\n"));
6eb5f6b9
JH
1601 }
1602 /*SUPPRESS 560*/
1aa99e6b
IH
1603 else if (do_utf8 == (UTF!=0) &&
1604 (prog->anchored_substr != Nullsv
9041c2e3 1605 || (prog->float_substr != Nullsv
1aa99e6b 1606 && prog->float_max_offset < strend - s))) {
9041c2e3 1607 SV *must = prog->anchored_substr
6eb5f6b9 1608 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1609 I32 back_max =
6eb5f6b9 1610 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1611 I32 back_min =
6eb5f6b9 1612 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1613 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1614 -(I32)(CHR_SVLEN(must)
1aa99e6b 1615 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1616 char *last1; /* Last position checked before */
bf93d4cc
GS
1617#ifdef DEBUGGING
1618 int did_match = 0;
1619#endif
6eb5f6b9
JH
1620
1621 if (s > PL_bostr)
1622 last1 = HOPc(s, -1);
1623 else
1624 last1 = s - 1; /* bogus */
1625
1626 /* XXXX check_substr already used to find `s', can optimize if
1627 check_substr==must. */
1628 scream_pos = -1;
1629 dontbother = end_shift;
1630 strend = HOPc(strend, -dontbother);
1631 while ( (s <= last) &&
9041c2e3 1632 ((flags & REXEC_SCREAM)
1aa99e6b 1633 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1634 end_shift, &scream_pos, 0))
1aa99e6b 1635 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1636 (unsigned char*)strend, must,
6eb5f6b9 1637 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1638 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1639 if (HOPc(s, -back_max) > last1) {
1640 last1 = HOPc(s, -back_min);
1641 s = HOPc(s, -back_max);
1642 }
1643 else {
1644 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1645
1646 last1 = HOPc(s, -back_min);
1647 s = t;
1648 }
1aa99e6b 1649 if (do_utf8) {
6eb5f6b9
JH
1650 while (s <= last1) {
1651 if (regtry(prog, s))
1652 goto got_it;
1653 s += UTF8SKIP(s);
1654 }
1655 }
1656 else {
1657 while (s <= last1) {
1658 if (regtry(prog, s))
1659 goto got_it;
1660 s++;
1661 }
1662 }
1663 }
bf93d4cc
GS
1664 DEBUG_r(did_match ||
1665 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1666 ((must == prog->anchored_substr)
1667 ? "anchored" : "floating"),
1668 PL_colors[0],
1669 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1670 SvPVX(must),
1671 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1672 goto phooey;
1673 }
155aba94 1674 else if ((c = prog->regstclass)) {
66e933ab
GS
1675 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1676 /* don't bother with what can't match */
6eb5f6b9 1677 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1678 DEBUG_r({
1679 SV *prop = sv_newmortal();
1680 regprop(prop, c);
1681 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1682 });
6eb5f6b9
JH
1683 if (find_byclass(prog, c, s, strend, startpos, 0))
1684 goto got_it;
bf93d4cc 1685 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1686 }
1687 else {
1688 dontbother = 0;
1689 if (prog->float_substr != Nullsv) { /* Trim the end. */
1690 char *last;
d6a28714
JH
1691
1692 if (flags & REXEC_SCREAM) {
1693 last = screaminstr(sv, prog->float_substr, s - strbeg,
1694 end_shift, &scream_pos, 1); /* last one */
1695 if (!last)
ffc61ed2 1696 last = scream_olds; /* Only one occurrence. */
b8c5462f 1697 }
d6a28714
JH
1698 else {
1699 STRLEN len;
1700 char *little = SvPV(prog->float_substr, len);
1701
1702 if (SvTAIL(prog->float_substr)) {
1703 if (memEQ(strend - len + 1, little, len - 1))
1704 last = strend - len + 1;
1705 else if (!PL_multiline)
9041c2e3 1706 last = memEQ(strend - len, little, len)
d6a28714 1707 ? strend - len : Nullch;
b8c5462f 1708 else
d6a28714
JH
1709 goto find_last;
1710 } else {
1711 find_last:
9041c2e3 1712 if (len)
d6a28714 1713 last = rninstr(s, strend, little, little + len);
b8c5462f 1714 else
d6a28714 1715 last = strend; /* matching `$' */
b8c5462f 1716 }
b8c5462f 1717 }
bf93d4cc
GS
1718 if (last == NULL) {
1719 DEBUG_r(PerlIO_printf(Perl_debug_log,
1720 "%sCan't trim the tail, match fails (should not happen)%s\n",
1721 PL_colors[4],PL_colors[5]));
1722 goto phooey; /* Should not happen! */
1723 }
d6a28714
JH
1724 dontbother = strend - last + prog->float_min_offset;
1725 }
1726 if (minlen && (dontbother < minlen))
1727 dontbother = minlen - 1;
1728 strend -= dontbother; /* this one's always in bytes! */
1729 /* We don't know much -- general case. */
1aa99e6b 1730 if (do_utf8) {
d6a28714
JH
1731 for (;;) {
1732 if (regtry(prog, s))
1733 goto got_it;
1734 if (s >= strend)
1735 break;
b8c5462f 1736 s += UTF8SKIP(s);
d6a28714
JH
1737 };
1738 }
1739 else {
1740 do {
1741 if (regtry(prog, s))
1742 goto got_it;
1743 } while (s++ < strend);
1744 }
1745 }
1746
1747 /* Failure. */
1748 goto phooey;
1749
1750got_it:
1751 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1752
1753 if (PL_reg_eval_set) {
1754 /* Preserve the current value of $^R */
1755 if (oreplsv != GvSV(PL_replgv))
1756 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1757 restored, the value remains
1758 the same. */
1759 restore_pos(aTHXo_ 0);
1760 }
1761
1762 /* make sure $`, $&, $', and $digit will work later */
1763 if ( !(flags & REXEC_NOT_FIRST) ) {
1764 if (RX_MATCH_COPIED(prog)) {
1765 Safefree(prog->subbeg);
1766 RX_MATCH_COPIED_off(prog);
1767 }
1768 if (flags & REXEC_COPY_STR) {
1769 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1770
1771 s = savepvn(strbeg, i);
1772 prog->subbeg = s;
1773 prog->sublen = i;
1774 RX_MATCH_COPIED_on(prog);
1775 }
1776 else {
1777 prog->subbeg = strbeg;
1778 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1779 }
1780 }
9041c2e3 1781
d6a28714
JH
1782 return 1;
1783
1784phooey:
bf93d4cc
GS
1785 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1786 PL_colors[4],PL_colors[5]));
d6a28714
JH
1787 if (PL_reg_eval_set)
1788 restore_pos(aTHXo_ 0);
1789 return 0;
1790}
1791
1792/*
1793 - regtry - try match at specific point
1794 */
1795STATIC I32 /* 0 failure, 1 success */
1796S_regtry(pTHX_ regexp *prog, char *startpos)
1797{
d6a28714
JH
1798 register I32 i;
1799 register I32 *sp;
1800 register I32 *ep;
1801 CHECKPOINT lastcp;
1802
02db2b7b
IZ
1803#ifdef DEBUGGING
1804 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1805#endif
d6a28714
JH
1806 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1807 MAGIC *mg;
1808
1809 PL_reg_eval_set = RS_init;
1810 DEBUG_r(DEBUG_s(
b900a521
JH
1811 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1812 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1813 ));
e8347627 1814 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1815 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1816 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1817 SAVETMPS;
1818 /* Apparently this is not needed, judging by wantarray. */
e8347627 1819 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1820 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1821
1822 if (PL_reg_sv) {
1823 /* Make $_ available to executed code. */
1824 if (PL_reg_sv != DEFSV) {
1825 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1826 SAVESPTR(DEFSV);
1827 DEFSV = PL_reg_sv;
b8c5462f 1828 }
d6a28714 1829
9041c2e3 1830 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
d6a28714
JH
1831 && (mg = mg_find(PL_reg_sv, 'g')))) {
1832 /* prepare for quick setting of pos */
1833 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1834 mg = mg_find(PL_reg_sv, 'g');
1835 mg->mg_len = -1;
b8c5462f 1836 }
d6a28714
JH
1837 PL_reg_magic = mg;
1838 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1839 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1840 }
1841 if (!PL_reg_curpm)
0f79a09d 1842 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1843 PL_reg_curpm->op_pmregexp = prog;
1844 PL_reg_oldcurpm = PL_curpm;
1845 PL_curpm = PL_reg_curpm;
1846 if (RX_MATCH_COPIED(prog)) {
1847 /* Here is a serious problem: we cannot rewrite subbeg,
1848 since it may be needed if this match fails. Thus
1849 $` inside (?{}) could fail... */
1850 PL_reg_oldsaved = prog->subbeg;
1851 PL_reg_oldsavedlen = prog->sublen;
1852 RX_MATCH_COPIED_off(prog);
1853 }
1854 else
1855 PL_reg_oldsaved = Nullch;
1856 prog->subbeg = PL_bostr;
1857 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1858 }
1859 prog->startp[0] = startpos - PL_bostr;
1860 PL_reginput = startpos;
1861 PL_regstartp = prog->startp;
1862 PL_regendp = prog->endp;
1863 PL_reglastparen = &prog->lastparen;
1864 prog->lastparen = 0;
1865 PL_regsize = 0;
1866 DEBUG_r(PL_reg_starttry = startpos);
1867 if (PL_reg_start_tmpl <= prog->nparens) {
1868 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1869 if(PL_reg_start_tmp)
1870 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1871 else
1872 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1873 }
1874
1875 /* XXXX What this code is doing here?!!! There should be no need
1876 to do this again and again, PL_reglastparen should take care of
3dd2943c 1877 this! --ilya*/
dafc8851
JH
1878
1879 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1880 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1881 * PL_reglastparen), is not needed at all by the test suite
1882 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1883 * enough, for building DynaLoader, or otherwise this
1884 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1885 * will happen. Meanwhile, this code *is* needed for the
1886 * above-mentioned test suite tests to succeed. The common theme
1887 * on those tests seems to be returning null fields from matches.
1888 * --jhi */
dafc8851 1889#if 1
d6a28714
JH
1890 sp = prog->startp;
1891 ep = prog->endp;
1892 if (prog->nparens) {
09e8ae3b 1893 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1894 *++sp = -1;
1895 *++ep = -1;
1896 }
1897 }
dafc8851 1898#endif
02db2b7b 1899 REGCP_SET(lastcp);
d6a28714
JH
1900 if (regmatch(prog->program + 1)) {
1901 prog->endp[0] = PL_reginput - PL_bostr;
1902 return 1;
1903 }
02db2b7b 1904 REGCP_UNWIND(lastcp);
d6a28714
JH
1905 return 0;
1906}
1907
02db2b7b
IZ
1908#define RE_UNWIND_BRANCH 1
1909#define RE_UNWIND_BRANCHJ 2
1910
1911union re_unwind_t;
1912
1913typedef struct { /* XX: makes sense to enlarge it... */
1914 I32 type;
1915 I32 prev;
1916 CHECKPOINT lastcp;
1917} re_unwind_generic_t;
1918
1919typedef struct {
1920 I32 type;
1921 I32 prev;
1922 CHECKPOINT lastcp;
1923 I32 lastparen;
1924 regnode *next;
1925 char *locinput;
1926 I32 nextchr;
1927#ifdef DEBUGGING
1928 int regindent;
1929#endif
1930} re_unwind_branch_t;
1931
1932typedef union re_unwind_t {
1933 I32 type;
1934 re_unwind_generic_t generic;
1935 re_unwind_branch_t branch;
1936} re_unwind_t;
1937
d6a28714
JH
1938/*
1939 - regmatch - main matching routine
1940 *
1941 * Conceptually the strategy is simple: check to see whether the current
1942 * node matches, call self recursively to see whether the rest matches,
1943 * and then act accordingly. In practice we make some effort to avoid
1944 * recursion, in particular by going through "ordinary" nodes (that don't
1945 * need to know whether the rest of the match failed) by a loop instead of
1946 * by recursion.
1947 */
1948/* [lwall] I've hoisted the register declarations to the outer block in order to
1949 * maybe save a little bit of pushing and popping on the stack. It also takes
1950 * advantage of machines that use a register save mask on subroutine entry.
1951 */
1952STATIC I32 /* 0 failure, 1 success */
1953S_regmatch(pTHX_ regnode *prog)
1954{
d6a28714
JH
1955 register regnode *scan; /* Current node. */
1956 regnode *next; /* Next node. */
1957 regnode *inner; /* Next node in internal branch. */
1958 register I32 nextchr; /* renamed nextchr - nextchar colides with
1959 function of same name */
1960 register I32 n; /* no or next */
1961 register I32 ln; /* len or last */
1962 register char *s; /* operand or save */
1963 register char *locinput = PL_reginput;
1964 register I32 c1, c2, paren; /* case fold search, parenth */
1965 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1966 I32 unwind = 0;
1967 I32 firstcp = PL_savestack_ix;
ffc61ed2 1968 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1969
d6a28714
JH
1970#ifdef DEBUGGING
1971 PL_regindent++;
1972#endif
1973
1974 /* Note that nextchr is a byte even in UTF */
1975 nextchr = UCHARAT(locinput);
1976 scan = prog;
1977 while (scan != NULL) {
1978#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1979#if 1
d6a28714
JH
1980# define sayYES goto yes
1981# define sayNO goto no
7821416a
IZ
1982# define sayYES_FINAL goto yes_final
1983# define sayYES_LOUD goto yes_loud
1984# define sayNO_FINAL goto no_final
1985# define sayNO_SILENT goto do_no
d6a28714
JH
1986# define saySAME(x) if (x) goto yes; else goto no
1987# define REPORT_CODE_OFF 24
1988#else
1989# define sayYES return 1
1990# define sayNO return 0
7821416a
IZ
1991# define sayYES_FINAL return 1
1992# define sayYES_LOUD return 1
1993# define sayNO_FINAL return 0
1994# define sayNO_SILENT return 0
d6a28714
JH
1995# define saySAME(x) return x
1996#endif
1997 DEBUG_r( {
1998 SV *prop = sv_newmortal();
1999 int docolor = *PL_colors[0];
2000 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 2001 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2002 /* The part of the string before starttry has one color
2003 (pref0_len chars), between starttry and current
2004 position another one (pref_len - pref0_len chars),
2005 after the current position the third one.
2006 We assume that pref0_len <= pref_len, otherwise we
2007 decrease pref0_len. */
9041c2e3 2008 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2009 ? (5 + taill) - l : locinput - PL_bostr;
2010 int pref0_len;
d6a28714 2011
1aa99e6b
IH
2012 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2013 pref_len++;
2014 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2015 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2016 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2017 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2018 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2019 l--;
d6a28714
JH
2020 if (pref0_len < 0)
2021 pref0_len = 0;
2022 if (pref0_len > pref_len)
2023 pref0_len = pref_len;
2024 regprop(prop, scan);
9041c2e3 2025 PerlIO_printf(Perl_debug_log,
b900a521 2026 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2027 (IV)(locinput - PL_bostr),
2028 PL_colors[4], pref0_len,
d6a28714 2029 locinput - pref_len, PL_colors[5],
9041c2e3 2030 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2031 locinput - pref_len + pref0_len, PL_colors[3],
2032 (docolor ? "" : "> <"),
2033 PL_colors[0], l, locinput, PL_colors[1],
2034 15 - l - pref_len + 1,
2035 "",
b900a521 2036 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2037 SvPVX(prop));
2038 } );
2039
2040 next = scan + NEXT_OFF(scan);
2041 if (next == scan)
2042 next = NULL;
2043
2044 switch (OP(scan)) {
2045 case BOL:
2046 if (locinput == PL_bostr
2047 ? PL_regprev == '\n'
9041c2e3 2048 : (PL_multiline &&
d6a28714
JH
2049 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2050 {
2051 /* regtill = regbol; */
b8c5462f
JH
2052 break;
2053 }
d6a28714
JH
2054 sayNO;
2055 case MBOL:
2056 if (locinput == PL_bostr
2057 ? PL_regprev == '\n'
2058 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2059 {
b8c5462f
JH
2060 break;
2061 }
d6a28714
JH
2062 sayNO;
2063 case SBOL:
c2a73568 2064 if (locinput == PL_bostr)
b8c5462f 2065 break;
d6a28714
JH
2066 sayNO;
2067 case GPOS:
2068 if (locinput == PL_reg_ganch)
2069 break;
2070 sayNO;
2071 case EOL:
2072 if (PL_multiline)
2073 goto meol;
2074 else
2075 goto seol;
2076 case MEOL:
2077 meol:
2078 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2079 sayNO;
b8c5462f 2080 break;
d6a28714
JH
2081 case SEOL:
2082 seol:
2083 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2084 sayNO;
d6a28714 2085 if (PL_regeol - locinput > 1)
b8c5462f 2086 sayNO;
b8c5462f 2087 break;
d6a28714
JH
2088 case EOS:
2089 if (PL_regeol != locinput)
b8c5462f 2090 sayNO;
d6a28714 2091 break;
ffc61ed2 2092 case SANY:
d6a28714 2093 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2094 sayNO;
b8c5462f 2095 nextchr = UCHARAT(++locinput);
a0d0e21e 2096 break;
ffc61ed2 2097 case REG_ANY:
1aa99e6b
IH
2098 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2099 sayNO;
2100 if (do_utf8) {
b8c5462f 2101 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2102 if (locinput > PL_regeol)
2103 sayNO;
a0ed51b3 2104 nextchr = UCHARAT(locinput);
a0ed51b3 2105 }
1aa99e6b
IH
2106 else
2107 nextchr = UCHARAT(++locinput);
a0ed51b3 2108 break;
d6a28714 2109 case EXACT:
cd439c50
IZ
2110 s = STRING(scan);
2111 ln = STR_LEN(scan);
1aa99e6b
IH
2112 if (do_utf8 != (UTF!=0)) {
2113 char *l = locinput;
2114 char *e = s + ln;
2115 STRLEN len;
2116 if (do_utf8)
2117 while (s < e) {
2118 if (l >= PL_regeol)
2119 sayNO;
9041c2e3 2120 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2121 sayNO;
2122 s++;
2123 l += len;
2124 }
2125 else
2126 while (s < e) {
2127 if (l >= PL_regeol)
2128 sayNO;
9041c2e3 2129 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2130 sayNO;
2131 s += len;
2132 l++;
2133 }
2134 locinput = l;
2135 nextchr = UCHARAT(locinput);
2136 break;
2137 }
d6a28714
JH
2138 /* Inline the first character, for speed. */
2139 if (UCHARAT(s) != nextchr)
2140 sayNO;
2141 if (PL_regeol - locinput < ln)
2142 sayNO;
2143 if (ln > 1 && memNE(s, locinput, ln))
2144 sayNO;
2145 locinput += ln;
2146 nextchr = UCHARAT(locinput);
2147 break;
2148 case EXACTFL:
b8c5462f
JH
2149 PL_reg_flags |= RF_tainted;
2150 /* FALL THROUGH */
d6a28714 2151 case EXACTF:
cd439c50
IZ
2152 s = STRING(scan);
2153 ln = STR_LEN(scan);
d6a28714 2154
1aa99e6b 2155 if (do_utf8) {
d6a28714 2156 char *l = locinput;
1aa99e6b
IH
2157 char *e;
2158 e = s + ln;
d6a28714
JH
2159 c1 = OP(scan) == EXACTF;
2160 while (s < e) {
1aa99e6b 2161 if (l >= PL_regeol) {
d6a28714
JH
2162 sayNO;
2163 }
2b9d42f0 2164 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2165 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2166 sayNO;
2167 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2168 l += UTF8SKIP(l);
b8c5462f 2169 }
d6a28714 2170 locinput = l;
a0ed51b3
LW
2171 nextchr = UCHARAT(locinput);
2172 break;
2173 }
d6a28714
JH
2174
2175 /* Inline the first character, for speed. */
2176 if (UCHARAT(s) != nextchr &&
2177 UCHARAT(s) != ((OP(scan) == EXACTF)
2178 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2179 sayNO;
d6a28714 2180 if (PL_regeol - locinput < ln)
b8c5462f 2181 sayNO;
d6a28714
JH
2182 if (ln > 1 && (OP(scan) == EXACTF
2183 ? ibcmp(s, locinput, ln)
2184 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2185 sayNO;
d6a28714
JH
2186 locinput += ln;
2187 nextchr = UCHARAT(locinput);
a0d0e21e 2188 break;
d6a28714 2189 case ANYOF:
ffc61ed2
JH
2190 if (do_utf8) {
2191 if (!reginclass(scan, (U8*)locinput, do_utf8))
2192 sayNO;
2193 if (locinput >= PL_regeol)
2194 sayNO;
2195 locinput += PL_utf8skip[nextchr];
b8c5462f 2196 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2197 }
2198 else {
2199 if (nextchr < 0)
2200 nextchr = UCHARAT(locinput);
2201 if (!reginclass(scan, (U8*)locinput, do_utf8))
2202 sayNO;
2203 if (!nextchr && locinput >= PL_regeol)
2204 sayNO;
2205 nextchr = UCHARAT(++locinput);
2206 }
b8c5462f 2207 break;
d6a28714 2208 case ALNUML:
b8c5462f
JH
2209 PL_reg_flags |= RF_tainted;
2210 /* FALL THROUGH */
d6a28714 2211 case ALNUM:
b8c5462f 2212 if (!nextchr)
4633a7c4 2213 sayNO;
ffc61ed2
JH
2214 if (do_utf8) {
2215 if (!(OP(scan) == ALNUM
d6a28714
JH
2216 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2217 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2218 {
2219 sayNO;
a0ed51b3 2220 }
b8c5462f 2221 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2222 nextchr = UCHARAT(locinput);
2223 break;
2224 }
ffc61ed2 2225 if (!(OP(scan) == ALNUM
d6a28714 2226 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2227 sayNO;
b8c5462f 2228 nextchr = UCHARAT(++locinput);
a0d0e21e 2229 break;
d6a28714 2230 case NALNUML:
b8c5462f
JH
2231 PL_reg_flags |= RF_tainted;
2232 /* FALL THROUGH */
d6a28714
JH
2233 case NALNUM:
2234 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2235 sayNO;
ffc61ed2 2236 if (do_utf8) {
8269fa76 2237 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2238 if (OP(scan) == NALNUM
d6a28714
JH
2239 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2240 : isALNUM_LC_utf8((U8*)locinput))
2241 {
b8c5462f 2242 sayNO;
d6a28714 2243 }
b8c5462f
JH
2244 locinput += PL_utf8skip[nextchr];
2245 nextchr = UCHARAT(locinput);
2246 break;
2247 }
ffc61ed2 2248 if (OP(scan) == NALNUM
d6a28714 2249 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2250 sayNO;
76e3520e 2251 nextchr = UCHARAT(++locinput);
a0d0e21e 2252 break;
d6a28714
JH
2253 case BOUNDL:
2254 case NBOUNDL:
3280af22 2255 PL_reg_flags |= RF_tainted;
bbce6d69 2256 /* FALL THROUGH */
d6a28714
JH
2257 case BOUND:
2258 case NBOUND:
2259 /* was last char in word? */
ffc61ed2
JH
2260 if (do_utf8) {
2261 if (locinput == PL_regbol)
2262 ln = PL_regprev;
2263 else {
2264 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2265
2b9d42f0 2266 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2267 }
2268 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2269 ln = isALNUM_uni(ln);
8269fa76 2270 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
2271 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2272 }
2273 else {
9041c2e3 2274 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2275 n = isALNUM_LC_utf8((U8*)locinput);
2276 }
a0ed51b3 2277 }
d6a28714 2278 else {
ffc61ed2
JH
2279 ln = (locinput != PL_regbol) ?
2280 UCHARAT(locinput - 1) : PL_regprev;
2281 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2282 ln = isALNUM(ln);
2283 n = isALNUM(nextchr);
2284 }
2285 else {
2286 ln = isALNUM_LC(ln);
2287 n = isALNUM_LC(nextchr);
2288 }
d6a28714 2289 }
ffc61ed2
JH
2290 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2291 OP(scan) == BOUNDL))
2292 sayNO;
a0ed51b3 2293 break;
d6a28714 2294 case SPACEL:
3280af22 2295 PL_reg_flags |= RF_tainted;
bbce6d69 2296 /* FALL THROUGH */
d6a28714 2297 case SPACE:
9442cb0e 2298 if (!nextchr)
4633a7c4 2299 sayNO;
1aa99e6b 2300 if (do_utf8) {
fd400ab9 2301 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2302 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2
JH
2303 if (!(OP(scan) == SPACE
2304 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2305 : isSPACE_LC_utf8((U8*)locinput)))
2306 {
2307 sayNO;
2308 }
2309 locinput += PL_utf8skip[nextchr];
2310 nextchr = UCHARAT(locinput);
2311 break;
d6a28714 2312 }
ffc61ed2
JH
2313 if (!(OP(scan) == SPACE
2314 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2315 sayNO;
2316 nextchr = UCHARAT(++locinput);
2317 }
2318 else {
2319 if (!(OP(scan) == SPACE
2320 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2321 sayNO;
2322 nextchr = UCHARAT(++locinput);
a0ed51b3 2323 }
a0ed51b3 2324 break;
d6a28714 2325 case NSPACEL:
3280af22 2326 PL_reg_flags |= RF_tainted;
bbce6d69 2327 /* FALL THROUGH */
d6a28714 2328 case NSPACE:
9442cb0e 2329 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2330 sayNO;
1aa99e6b 2331 if (do_utf8) {
8269fa76 2332 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2333 if (OP(scan) == NSPACE
9442cb0e 2334 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2335 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2336 {
2337 sayNO;
2338 }
2339 locinput += PL_utf8skip[nextchr];
2340 nextchr = UCHARAT(locinput);
2341 break;
a0ed51b3 2342 }
ffc61ed2 2343 if (OP(scan) == NSPACE
d6a28714 2344 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2345 sayNO;
76e3520e 2346 nextchr = UCHARAT(++locinput);
a0d0e21e 2347 break;
d6a28714 2348 case DIGITL:
a0ed51b3
LW
2349 PL_reg_flags |= RF_tainted;
2350 /* FALL THROUGH */
d6a28714 2351 case DIGIT:
9442cb0e 2352 if (!nextchr)
a0ed51b3 2353 sayNO;
1aa99e6b 2354 if (do_utf8) {
8269fa76 2355 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2356 if (!(OP(scan) == DIGIT
9442cb0e
GS
2357 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2358 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2359 {
a0ed51b3 2360 sayNO;
dfe13c55 2361 }
6f06b55f 2362 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2363 nextchr = UCHARAT(locinput);
2364 break;
2365 }
ffc61ed2 2366 if (!(OP(scan) == DIGIT
9442cb0e 2367 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2368 sayNO;
2369 nextchr = UCHARAT(++locinput);
2370 break;
d6a28714 2371 case NDIGITL:
b8c5462f
JH
2372 PL_reg_flags |= RF_tainted;
2373 /* FALL THROUGH */
d6a28714 2374 case NDIGIT:
9442cb0e 2375 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2376 sayNO;
1aa99e6b 2377 if (do_utf8) {
8269fa76 2378 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2379 if (OP(scan) == NDIGIT
9442cb0e
GS
2380 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2381 : isDIGIT_LC_utf8((U8*)locinput))
2382 {
a0ed51b3 2383 sayNO;
9442cb0e 2384 }
6f06b55f 2385 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2386 nextchr = UCHARAT(locinput);
2387 break;
2388 }
ffc61ed2 2389 if (OP(scan) == NDIGIT
9442cb0e 2390 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2391 sayNO;
2392 nextchr = UCHARAT(++locinput);
2393 break;
2394 case CLUMP:
8269fa76 2395 LOAD_UTF8_CHARCLASS(mark,"~");
dfe13c55 2396 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2397 sayNO;
6f06b55f 2398 locinput += PL_utf8skip[nextchr];
dfe13c55 2399 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2400 locinput += UTF8SKIP(locinput);
2401 if (locinput > PL_regeol)
2402 sayNO;
2403 nextchr = UCHARAT(locinput);
2404 break;
c8756f30 2405 case REFFL:
3280af22 2406 PL_reg_flags |= RF_tainted;
c8756f30 2407 /* FALL THROUGH */
c277df42 2408 case REF:
c8756f30 2409 case REFF:
c277df42 2410 n = ARG(scan); /* which paren pair */
cf93c79d 2411 ln = PL_regstartp[n];
2c2d71f5 2412 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2413 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2414 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2415 if (ln == PL_regendp[n])
a0d0e21e 2416 break;
a0ed51b3 2417
cf93c79d 2418 s = PL_bostr + ln;
1aa99e6b 2419 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2420 char *l = locinput;
cf93c79d 2421 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2422 /*
2423 * Note that we can't do the "other character" lookup trick as
2424 * in the 8-bit case (no pun intended) because in Unicode we
2425 * have to map both upper and title case to lower case.
2426 */
2427 if (OP(scan) == REFF) {
2428 while (s < e) {
2429 if (l >= PL_regeol)
2430 sayNO;
dfe13c55 2431 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2432 sayNO;
2433 s += UTF8SKIP(s);
2434 l += UTF8SKIP(l);
2435 }
2436 }
2437 else {
2438 while (s < e) {
2439 if (l >= PL_regeol)
2440 sayNO;
dfe13c55 2441 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2442 sayNO;
2443 s += UTF8SKIP(s);
2444 l += UTF8SKIP(l);
2445 }
2446 }
2447 locinput = l;
2448 nextchr = UCHARAT(locinput);
2449 break;
2450 }
2451
a0d0e21e 2452 /* Inline the first character, for speed. */
76e3520e 2453 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2454 (OP(scan) == REF ||
2455 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2456 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2457 sayNO;
cf93c79d 2458 ln = PL_regendp[n] - ln;
3280af22 2459 if (locinput + ln > PL_regeol)
4633a7c4 2460 sayNO;
c8756f30
AK
2461 if (ln > 1 && (OP(scan) == REF
2462 ? memNE(s, locinput, ln)
2463 : (OP(scan) == REFF
2464 ? ibcmp(s, locinput, ln)
2465 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2466 sayNO;
a0d0e21e 2467 locinput += ln;
76e3520e 2468 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2469 break;
2470
2471 case NOTHING:
c277df42 2472 case TAIL:
a0d0e21e
LW
2473 break;
2474 case BACK:
2475 break;
c277df42
IZ
2476 case EVAL:
2477 {
2478 dSP;
533c011a 2479 OP_4tree *oop = PL_op;
3280af22
NIS
2480 COP *ocurcop = PL_curcop;
2481 SV **ocurpad = PL_curpad;
c277df42 2482 SV *ret;
9041c2e3 2483
c277df42 2484 n = ARG(scan);
533c011a 2485 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2486 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2487 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2488 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2489
cea2e8a9 2490 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2491 SPAGAIN;
2492 ret = POPs;
2493 PUTBACK;
9041c2e3 2494
0f5d15d6
IZ
2495 PL_op = oop;
2496 PL_curpad = ocurpad;
2497 PL_curcop = ocurcop;
c277df42 2498 if (logical) {
0f5d15d6
IZ
2499 if (logical == 2) { /* Postponed subexpression. */
2500 regexp *re;
22c35a8c 2501 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2502 re_cc_state state;
0f5d15d6
IZ
2503 CHECKPOINT cp, lastcp;
2504
2505 if(SvROK(ret) || SvRMAGICAL(ret)) {
2506 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2507
2508 if(SvMAGICAL(sv))
2509 mg = mg_find(sv, 'r');
2510 }
2511 if (mg) {
2512 re = (regexp *)mg->mg_obj;
df0003d4 2513 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2514 }
2515 else {
2516 STRLEN len;
2517 char *t = SvPV(ret, len);
2518 PMOP pm;
2519 char *oprecomp = PL_regprecomp;
2520 I32 osize = PL_regsize;
2521 I32 onpar = PL_regnpar;
2522
5fcd1c1b 2523 Zero(&pm, 1, PMOP);
cea2e8a9 2524 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2525 if (!(SvFLAGS(ret)
0f5d15d6
IZ
2526 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2527 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2528 PL_regprecomp = oprecomp;
2529 PL_regsize = osize;
2530 PL_regnpar = onpar;
2531 }
2532 DEBUG_r(
9041c2e3 2533 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2534 "Entering embedded `%s%.60s%s%s'\n",
2535 PL_colors[0],
2536 re->precomp,
2537 PL_colors[1],
2538 (strlen(re->precomp) > 60 ? "..." : ""))
2539 );
2540 state.node = next;
2541 state.prev = PL_reg_call_cc;
2542 state.cc = PL_regcc;
2543 state.re = PL_reg_re;
2544
2ab05381 2545 PL_regcc = 0;
9041c2e3 2546
0f5d15d6 2547 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2548 REGCP_SET(lastcp);
0f5d15d6
IZ
2549 cache_re(re);
2550 state.ss = PL_savestack_ix;
2551 *PL_reglastparen = 0;
2552 PL_reg_call_cc = &state;
2553 PL_reginput = locinput;
2c2d71f5
JH
2554
2555 /* XXXX This is too dramatic a measure... */
2556 PL_reg_maxiter = 0;
2557
0f5d15d6 2558 if (regmatch(re->program + 1)) {
2c914db6
IZ
2559 /* Even though we succeeded, we need to restore
2560 global variables, since we may be wrapped inside
2561 SUSPEND, thus the match may be not finished yet. */
2562
2563 /* XXXX Do this only if SUSPENDed? */
2564 PL_reg_call_cc = state.prev;
2565 PL_regcc = state.cc;
2566 PL_reg_re = state.re;
2567 cache_re(PL_reg_re);
2568
2569 /* XXXX This is too dramatic a measure... */
2570 PL_reg_maxiter = 0;
2571
2572 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2573 ReREFCNT_dec(re);
2574 regcpblow(cp);
2575 sayYES;
2576 }
0f5d15d6 2577 ReREFCNT_dec(re);
02db2b7b 2578 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2579 regcppop();
2580 PL_reg_call_cc = state.prev;
2581 PL_regcc = state.cc;
2582 PL_reg_re = state.re;
d3790889 2583 cache_re(PL_reg_re);
2c2d71f5
JH
2584
2585 /* XXXX This is too dramatic a measure... */
2586 PL_reg_maxiter = 0;
2587
0f5d15d6
IZ
2588 sayNO;
2589 }
c277df42 2590 sw = SvTRUE(ret);
0f5d15d6 2591 logical = 0;
a0ed51b3
LW
2592 }
2593 else
3280af22 2594 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2595 break;
2596 }
a0d0e21e 2597 case OPEN:
c277df42 2598 n = ARG(scan); /* which paren pair */
3280af22
NIS
2599 PL_reg_start_tmp[n] = locinput;
2600 if (n > PL_regsize)
2601 PL_regsize = n;
a0d0e21e
LW
2602 break;
2603 case CLOSE:
c277df42 2604 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2605 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2606 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2607 if (n > *PL_reglastparen)
2608 *PL_reglastparen = n;
a0d0e21e 2609 break;
c277df42
IZ
2610 case GROUPP:
2611 n = ARG(scan); /* which paren pair */
cf93c79d 2612 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2613 break;
2614 case IFTHEN:
2c2d71f5 2615 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2616 if (sw)
2617 next = NEXTOPER(NEXTOPER(scan));
2618 else {
2619 next = scan + ARG(scan);
2620 if (OP(next) == IFTHEN) /* Fake one. */
2621 next = NEXTOPER(NEXTOPER(next));
2622 }
2623 break;
2624 case LOGICAL:
0f5d15d6 2625 logical = scan->flags;
c277df42 2626 break;
2ab05381
IZ
2627/*******************************************************************
2628 PL_regcc contains infoblock about the innermost (...)* loop, and
2629 a pointer to the next outer infoblock.
2630
2631 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2632
2633 1) After matching X, regnode for CURLYX is processed;
2634
9041c2e3 2635 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2636 regmatch() recursively with the starting point at WHILEM node;
2637
2638 3) Each hit of WHILEM node tries to match A and Z (in the order
2639 depending on the current iteration, min/max of {min,max} and
2640 greediness). The information about where are nodes for "A"
2641 and "Z" is read from the infoblock, as is info on how many times "A"
2642 was already matched, and greediness.
2643
2644 4) After A matches, the same WHILEM node is hit again.
2645
2646 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2647 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2648 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2649 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2650 of the external loop.
2651
2652 Currently present infoblocks form a tree with a stem formed by PL_curcc
2653 and whatever it mentions via ->next, and additional attached trees
2654 corresponding to temporarily unset infoblocks as in "5" above.
2655
9041c2e3 2656 In the following picture infoblocks for outer loop of
2ab05381
IZ
2657 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2658 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2659 infoblocks are drawn below the "reset" infoblock.
2660
2661 In fact in the picture below we do not show failed matches for Z and T
2662 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2663 more obvious *why* one needs to *temporary* unset infoblocks.]
2664
2665 Matched REx position InfoBlocks Comment
2666 (Y(A)*?Z)*?T x
2667 Y(A)*?Z)*?T x <- O
2668 Y (A)*?Z)*?T x <- O
2669 Y A)*?Z)*?T x <- O <- I
2670 YA )*?Z)*?T x <- O <- I
2671 YA A)*?Z)*?T x <- O <- I
2672 YAA )*?Z)*?T x <- O <- I
2673 YAA Z)*?T x <- O # Temporary unset I
2674 I
2675
2676 YAAZ Y(A)*?Z)*?T x <- O
2677 I
2678
2679 YAAZY (A)*?Z)*?T x <- O
2680 I
2681
2682 YAAZY A)*?Z)*?T x <- O <- I
2683 I
2684
2685 YAAZYA )*?Z)*?T x <- O <- I
2686 I
2687
2688 YAAZYA Z)*?T x <- O # Temporary unset I
2689 I,I
2690
2691 YAAZYAZ )*?T x <- O
2692 I,I
2693
2694 YAAZYAZ T x # Temporary unset O
2695 O
2696 I,I
2697
2698 YAAZYAZT x
2699 O
2700 I,I
2701 *******************************************************************/
a0d0e21e
LW
2702 case CURLYX: {
2703 CURCUR cc;
3280af22 2704 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2705 /* No need to save/restore up to this paren */
2706 I32 parenfloor = scan->flags;
c277df42
IZ
2707
2708 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2709 next += ARG(next);
3280af22
NIS
2710 cc.oldcc = PL_regcc;
2711 PL_regcc = &cc;
cb434fcc
IZ
2712 /* XXXX Probably it is better to teach regpush to support
2713 parenfloor > PL_regsize... */
2714 if (parenfloor > *PL_reglastparen)
2715 parenfloor = *PL_reglastparen; /* Pessimization... */
2716 cc.parenfloor = parenfloor;
a0d0e21e
LW
2717 cc.cur = -1;
2718 cc.min = ARG1(scan);
2719 cc.max = ARG2(scan);
c277df42 2720 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2721 cc.next = next;
2722 cc.minmod = minmod;
2723 cc.lastloc = 0;
3280af22 2724 PL_reginput = locinput;
a0d0e21e
LW
2725 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2726 regcpblow(cp);
3280af22 2727 PL_regcc = cc.oldcc;
4633a7c4 2728 saySAME(n);
a0d0e21e
LW
2729 }
2730 /* NOT REACHED */
2731 case WHILEM: {
2732 /*
2733 * This is really hard to understand, because after we match
2734 * what we're trying to match, we must make sure the rest of
2c2d71f5 2735 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2736 * to go back UP the parse tree by recursing ever deeper. And
2737 * if it fails, we have to reset our parent's current state
2738 * that we can try again after backing off.
2739 */
2740
c277df42 2741 CHECKPOINT cp, lastcp;
3280af22 2742 CURCUR* cc = PL_regcc;
c277df42
IZ
2743 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2744
4633a7c4 2745 n = cc->cur + 1; /* how many we know we matched */
3280af22 2746 PL_reginput = locinput;
a0d0e21e 2747
c277df42 2748 DEBUG_r(
9041c2e3
NIS
2749 PerlIO_printf(Perl_debug_log,
2750 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2751 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2752 (long)n, (long)cc->min,
c277df42
IZ
2753 (long)cc->max, (long)cc)
2754 );
4633a7c4 2755
a0d0e21e
LW
2756 /* If degenerate scan matches "", assume scan done. */
2757
579cf2c3 2758 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2759 PL_regcc = cc->oldcc;
2ab05381
IZ
2760 if (PL_regcc)
2761 ln = PL_regcc->cur;
c277df42 2762 DEBUG_r(
c3464db5
DD
2763 PerlIO_printf(Perl_debug_log,
2764 "%*s empty match detected, try continuation...\n",
3280af22 2765 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2766 );
a0d0e21e 2767 if (regmatch(cc->next))
4633a7c4 2768 sayYES;
2ab05381
IZ
2769 if (PL_regcc)
2770 PL_regcc->cur = ln;
3280af22 2771 PL_regcc = cc;
4633a7c4 2772 sayNO;
a0d0e21e
LW
2773 }
2774
2775 /* First just match a string of min scans. */
2776
2777 if (n < cc->min) {
2778 cc->cur = n;
2779 cc->lastloc = locinput;
4633a7c4
LW
2780 if (regmatch(cc->scan))
2781 sayYES;
2782 cc->cur = n - 1;
c277df42 2783 cc->lastloc = lastloc;
4633a7c4 2784 sayNO;
a0d0e21e
LW
2785 }
2786
2c2d71f5
JH
2787 if (scan->flags) {
2788 /* Check whether we already were at this position.
2789 Postpone detection until we know the match is not
2790 *that* much linear. */
2791 if (!PL_reg_maxiter) {
2792 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2793 PL_reg_leftiter = PL_reg_maxiter;
2794 }
2795 if (PL_reg_leftiter-- == 0) {
2796 I32 size = (PL_reg_maxiter + 7)/8;
2797 if (PL_reg_poscache) {
2798 if (PL_reg_poscache_size < size) {
2799 Renew(PL_reg_poscache, size, char);
2800 PL_reg_poscache_size = size;
2801 }
2802 Zero(PL_reg_poscache, size, char);
2803 }
2804 else {
2805 PL_reg_poscache_size = size;
2806 Newz(29, PL_reg_poscache, size, char);
2807 }
2808 DEBUG_r(
2809 PerlIO_printf(Perl_debug_log,
2810 "%sDetected a super-linear match, switching on caching%s...\n",
2811 PL_colors[4], PL_colors[5])
2812 );
2813 }
2814 if (PL_reg_leftiter < 0) {
2815 I32 o = locinput - PL_bostr, b;
2816
2817 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2818 b = o % 8;
2819 o /= 8;
2820 if (PL_reg_poscache[o] & (1<<b)) {
2821 DEBUG_r(
2822 PerlIO_printf(Perl_debug_log,
2823 "%*s already tried at this position...\n",
2824 REPORT_CODE_OFF+PL_regindent*2, "")
2825 );
7821416a 2826 sayNO_SILENT;
2c2d71f5
JH
2827 }
2828 PL_reg_poscache[o] |= (1<<b);
2829 }
2830 }
2831
a0d0e21e
LW
2832 /* Prefer next over scan for minimal matching. */
2833
2834 if (cc->minmod) {
3280af22 2835 PL_regcc = cc->oldcc;
2ab05381
IZ
2836 if (PL_regcc)
2837 ln = PL_regcc->cur;
5f05dabc 2838 cp = regcppush(cc->parenfloor);
02db2b7b 2839 REGCP_SET(lastcp);
5f05dabc 2840 if (regmatch(cc->next)) {
c277df42 2841 regcpblow(cp);
4633a7c4 2842 sayYES; /* All done. */
5f05dabc 2843 }
02db2b7b 2844 REGCP_UNWIND(lastcp);
5f05dabc 2845 regcppop();
2ab05381
IZ
2846 if (PL_regcc)
2847 PL_regcc->cur = ln;
3280af22 2848 PL_regcc = cc;
a0d0e21e 2849
c277df42 2850 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2851 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2852 && !(PL_reg_flags & RF_warned)) {
2853 PL_reg_flags |= RF_warned;
e476b1b5 2854 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2855 "Complex regular subexpression recursion",
2856 REG_INFTY - 1);
c277df42 2857 }
4633a7c4 2858 sayNO;
c277df42 2859 }
a687059c 2860
c277df42 2861 DEBUG_r(
c3464db5
DD
2862 PerlIO_printf(Perl_debug_log,
2863 "%*s trying longer...\n",
3280af22 2864 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2865 );
a0d0e21e 2866 /* Try scanning more and see if it helps. */
3280af22 2867 PL_reginput = locinput;
a0d0e21e
LW
2868 cc->cur = n;
2869 cc->lastloc = locinput;
5f05dabc 2870 cp = regcppush(cc->parenfloor);
02db2b7b 2871 REGCP_SET(lastcp);
5f05dabc 2872 if (regmatch(cc->scan)) {
c277df42 2873 regcpblow(cp);
4633a7c4 2874 sayYES;
5f05dabc 2875 }
02db2b7b 2876 REGCP_UNWIND(lastcp);
5f05dabc 2877 regcppop();
4633a7c4 2878 cc->cur = n - 1;
c277df42 2879 cc->lastloc = lastloc;
4633a7c4 2880 sayNO;
a0d0e21e
LW
2881 }
2882
2883 /* Prefer scan over next for maximal matching. */
2884
2885 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2886 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2887 cc->cur = n;
2888 cc->lastloc = locinput;
02db2b7b 2889 REGCP_SET(lastcp);
5f05dabc 2890 if (regmatch(cc->scan)) {
c277df42 2891 regcpblow(cp);
4633a7c4 2892 sayYES;
5f05dabc 2893 }
02db2b7b 2894 REGCP_UNWIND(lastcp);
a0d0e21e 2895 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2896 PL_reginput = locinput;
c277df42 2897 DEBUG_r(
c3464db5
DD
2898 PerlIO_printf(Perl_debug_log,
2899 "%*s failed, try continuation...\n",
3280af22 2900 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2901 );
2902 }
9041c2e3 2903 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2904 && !(PL_reg_flags & RF_warned)) {
3280af22 2905 PL_reg_flags |= RF_warned;
e476b1b5 2906 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2907 "Complex regular subexpression recursion",
2908 REG_INFTY - 1);
a0d0e21e
LW
2909 }
2910
2911 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2912 PL_regcc = cc->oldcc;
2ab05381
IZ
2913 if (PL_regcc)
2914 ln = PL_regcc->cur;
a0d0e21e 2915 if (regmatch(cc->next))
4633a7c4 2916 sayYES;
2ab05381
IZ
2917 if (PL_regcc)
2918 PL_regcc->cur = ln;
3280af22 2919 PL_regcc = cc;
4633a7c4 2920 cc->cur = n - 1;
c277df42 2921 cc->lastloc = lastloc;
4633a7c4 2922 sayNO;
a0d0e21e
LW
2923 }
2924 /* NOT REACHED */
9041c2e3 2925 case BRANCHJ:
c277df42
IZ
2926 next = scan + ARG(scan);
2927 if (next == scan)
2928 next = NULL;
2929 inner = NEXTOPER(NEXTOPER(scan));
2930 goto do_branch;
9041c2e3 2931 case BRANCH:
c277df42
IZ
2932 inner = NEXTOPER(scan);
2933 do_branch:
2934 {
2935 CHECKPOINT lastcp;
2936 c1 = OP(scan);
2937 if (OP(next) != c1) /* No choice. */
2938 next = inner; /* Avoid recursion. */
a0d0e21e 2939 else {
02db2b7b
IZ
2940 I32 lastparen = *PL_reglastparen;
2941 I32 unwind1;
2942 re_unwind_branch_t *uw;
2943
2944 /* Put unwinding data on stack */
2945 unwind1 = SSNEWt(1,re_unwind_branch_t);
2946 uw = SSPTRt(unwind1,re_unwind_branch_t);
2947 uw->prev = unwind;
2948 unwind = unwind1;
2949 uw->type = ((c1 == BRANCH)
2950 ? RE_UNWIND_BRANCH
2951 : RE_UNWIND_BRANCHJ);
2952 uw->lastparen = lastparen;
2953 uw->next = next;
2954 uw->locinput = locinput;
2955 uw->nextchr = nextchr;
2956#ifdef DEBUGGING
2957 uw->regindent = ++PL_regindent;
2958#endif
c277df42 2959
02db2b7b
IZ
2960 REGCP_SET(uw->lastcp);
2961
2962 /* Now go into the first branch */
2963 next = inner;
a687059c 2964 }
a0d0e21e
LW
2965 }
2966 break;
2967 case MINMOD:
2968 minmod = 1;
2969 break;
c277df42
IZ
2970 case CURLYM:
2971 {
00db4c45 2972 I32 l = 0;
c277df42 2973 CHECKPOINT lastcp;
9041c2e3 2974
c277df42
IZ
2975 /* We suppose that the next guy does not need
2976 backtracking: in particular, it is of constant length,
2977 and has no parenths to influence future backrefs. */
2978 ln = ARG1(scan); /* min to match */
2979 n = ARG2(scan); /* max to match */
c277df42
IZ
2980 paren = scan->flags;
2981 if (paren) {
3280af22
NIS
2982 if (paren > PL_regsize)
2983 PL_regsize = paren;
2984 if (paren > *PL_reglastparen)
2985 *PL_reglastparen = paren;
c277df42 2986 }
dc45a647 2987 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2988 if (paren)
2989 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2990 PL_reginput = locinput;
c277df42
IZ
2991 if (minmod) {
2992 minmod = 0;
2993 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2994 sayNO;
5f4b28b2 2995 if (ln && l == 0 && n >= ln
c277df42
IZ
2996 /* In fact, this is tricky. If paren, then the
2997 fact that we did/didnot match may influence
2998 future execution. */
2999 && !(paren && ln == 0))
3000 ln = n;
3280af22 3001 locinput = PL_reginput;
22c35a8c 3002 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3003 c1 = (U8)*STRING(next);
c277df42 3004 if (OP(next) == EXACTF)
22c35a8c 3005 c2 = PL_fold[c1];
c277df42 3006 else if (OP(next) == EXACTFL)
22c35a8c 3007 c2 = PL_fold_locale[c1];
c277df42
IZ
3008 else
3009 c2 = c1;
a0ed51b3
LW
3010 }
3011 else
c277df42 3012 c1 = c2 = -1000;
02db2b7b 3013 REGCP_SET(lastcp);
5f4b28b2 3014 /* This may be improved if l == 0. */
c277df42
IZ
3015 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3016 /* If it could work, try it. */
3017 if (c1 == -1000 ||
3280af22
NIS
3018 UCHARAT(PL_reginput) == c1 ||
3019 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3020 {
3021 if (paren) {
3022 if (n) {
cf93c79d
IZ
3023 PL_regstartp[paren] =
3024 HOPc(PL_reginput, -l) - PL_bostr;
3025 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3026 }
3027 else
cf93c79d 3028 PL_regendp[paren] = -1;
c277df42
IZ
3029 }
3030 if (regmatch(next))
3031 sayYES;
02db2b7b 3032 REGCP_UNWIND(lastcp);
c277df42
IZ
3033 }
3034 /* Couldn't or didn't -- move forward. */
3280af22 3035 PL_reginput = locinput;
c277df42
IZ
3036 if (regrepeat_hard(scan, 1, &l)) {
3037 ln++;
3280af22 3038 locinput = PL_reginput;
c277df42
IZ
3039 }
3040 else
3041 sayNO;
3042 }
a0ed51b3
LW
3043 }
3044 else {
c277df42
IZ
3045 n = regrepeat_hard(scan, n, &l);
3046 if (n != 0 && l == 0
3047 /* In fact, this is tricky. If paren, then the
3048 fact that we did/didnot match may influence
3049 future execution. */
3050 && !(paren && ln == 0))
3051 ln = n;
3280af22 3052 locinput = PL_reginput;
c277df42 3053 DEBUG_r(
5c0ca799 3054 PerlIO_printf(Perl_debug_log,
faccc32b 3055 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3056 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3057 (IV) n, (IV)l)
c277df42
IZ
3058 );
3059 if (n >= ln) {
22c35a8c 3060 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3061 c1 = (U8)*STRING(next);
c277df42 3062 if (OP(next) == EXACTF)
22c35a8c 3063 c2 = PL_fold[c1];
c277df42 3064 else if (OP(next) == EXACTFL)
22c35a8c 3065 c2 = PL_fold_locale[c1];
c277df42
IZ
3066 else
3067 c2 = c1;
a0ed51b3
LW
3068 }
3069 else
c277df42
IZ
3070 c1 = c2 = -1000;
3071 }
02db2b7b 3072 REGCP_SET(lastcp);
c277df42
IZ
3073 while (n >= ln) {
3074 /* If it could work, try it. */
3075 if (c1 == -1000 ||
3280af22
NIS
3076 UCHARAT(PL_reginput) == c1 ||
3077 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3078 {
3079 DEBUG_r(
c3464db5 3080 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3081 "%*s trying tail with n=%"IVdf"...\n",
3082 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3083 );
3084 if (paren) {
3085 if (n) {
cf93c79d
IZ
3086 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3087 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3088 }
a0ed51b3 3089 else
cf93c79d 3090 PL_regendp[paren] = -1;
c277df42 3091 }
a0ed51b3
LW
3092 if (regmatch(next))
3093 sayYES;
02db2b7b 3094 REGCP_UNWIND(lastcp);
a0ed51b3 3095 }
c277df42
IZ
3096 /* Couldn't or didn't -- back up. */
3097 n--;
dfe13c55 3098 locinput = HOPc(locinput, -l);
3280af22 3099 PL_reginput = locinput;
c277df42
IZ
3100 }
3101 }
3102 sayNO;
3103 break;
3104 }
3105 case CURLYN:
3106 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3107 if (paren > PL_regsize)
3108 PL_regsize = paren;
3109 if (paren > *PL_reglastparen)
3110 *PL_reglastparen = paren;
c277df42
IZ
3111 ln = ARG1(scan); /* min to match */
3112 n = ARG2(scan); /* max to match */
dc45a647 3113 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3114 goto repeat;
a0d0e21e 3115 case CURLY:
c277df42 3116 paren = 0;
a0d0e21e
LW
3117 ln = ARG1(scan); /* min to match */
3118 n = ARG2(scan); /* max to match */
dc45a647 3119 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3120 goto repeat;
3121 case STAR:
3122 ln = 0;
c277df42 3123 n = REG_INFTY;
a0d0e21e 3124 scan = NEXTOPER(scan);
c277df42 3125 paren = 0;
a0d0e21e
LW
3126 goto repeat;
3127 case PLUS:
c277df42
IZ
3128 ln = 1;
3129 n = REG_INFTY;
3130 scan = NEXTOPER(scan);
3131 paren = 0;
3132 repeat:
a0d0e21e
LW
3133 /*
3134 * Lookahead to avoid useless match attempts
3135 * when we know what character comes next.
3136 */
22c35a8c 3137 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3138 U8 *s = (U8*)STRING(next);
3139 if (!UTF) {
3140 c2 = c1 = *s;
3141 if (OP(next) == EXACTF)
3142 c2 = PL_fold[c1];
3143 else if (OP(next) == EXACTFL)
3144 c2 = PL_fold_locale[c1];
3145 }
3146 else { /* UTF */
3147 if (OP(next) == EXACTF) {
3148 c1 = to_utf8_lower(s);
3149 c2 = to_utf8_upper(s);
3150 }
3151 else {
9041c2e3 3152 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3153 }
3154 }
bbce6d69 3155 }
a0d0e21e 3156 else
bbce6d69 3157 c1 = c2 = -1000;
3280af22 3158 PL_reginput = locinput;
a0d0e21e 3159 if (minmod) {
c277df42 3160 CHECKPOINT lastcp;
a0d0e21e
LW
3161 minmod = 0;
3162 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3163 sayNO;
a0ed51b3 3164 locinput = PL_reginput;
02db2b7b 3165 REGCP_SET(lastcp);
0fe9bf95 3166 if (c1 != -1000) {
1aa99e6b 3167 char *e; /* Should not check after this */
0fe9bf95
IZ
3168 char *old = locinput;
3169
1aa99e6b 3170 if (n == REG_INFTY) {
0fe9bf95 3171 e = PL_regeol - 1;
1aa99e6b
IH
3172 if (do_utf8)
3173 while (UTF8_IS_CONTINUATION(*(U8*)e))
3174 e--;
3175 }
3176 else if (do_utf8) {
3177 int m = n - ln;
3178 for (e = locinput;
3179 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3180 e += UTF8SKIP(e);
3181 }
3182 else {
3183 e = locinput + n - ln;
3184 if (e >= PL_regeol)
3185 e = PL_regeol - 1;
3186 }
0fe9bf95 3187 while (1) {
1aa99e6b 3188 int count;
0fe9bf95 3189 /* Find place 'next' could work */
1aa99e6b
IH
3190 if (!do_utf8) {
3191 if (c1 == c2) {
3192 while (locinput <= e && *locinput != c1)
3193 locinput++;
3194 } else {
9041c2e3 3195 while (locinput <= e
1aa99e6b
IH
3196 && *locinput != c1
3197 && *locinput != c2)
3198 locinput++;
3199 }
3200 count = locinput - old;
3201 }
3202 else {
3203 STRLEN len;
3204 if (c1 == c2) {
3205 for (count = 0;
3206 locinput <= e &&
9041c2e3 3207 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3208 count++)
3209 locinput += len;
3210
3211 } else {
3212 for (count = 0; locinput <= e; count++) {
9041c2e3 3213 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3214 if (c == c1 || c == c2)
3215 break;
9041c2e3 3216 locinput += len;
1aa99e6b
IH
3217 }
3218 }
0fe9bf95 3219 }
9041c2e3 3220 if (locinput > e)
0fe9bf95
IZ
3221 sayNO;
3222 /* PL_reginput == old now */
3223 if (locinput != old) {
3224 ln = 1; /* Did some */
1aa99e6b 3225 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3226 sayNO;
3227 }
3228 /* PL_reginput == locinput now */
29d1e993 3229 TRYPAREN(paren, ln, locinput);
0fe9bf95 3230 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3231 REGCP_UNWIND(lastcp);
0fe9bf95 3232 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3233 old = locinput;
3234 if (do_utf8)
3235 locinput += UTF8SKIP(locinput);
3236 else
3237 locinput++;
0fe9bf95
IZ
3238 }
3239 }
3240 else
c277df42 3241 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3242 UV c;
3243 if (c1 != -1000) {
3244 if (do_utf8)
9041c2e3 3245 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3246 else
9041c2e3 3247 c = UCHARAT(PL_reginput);
2390ecbc
PP
3248 /* If it could work, try it. */
3249 if (c == c1 || c == c2)
3250 {
3251 TRYPAREN(paren, n, PL_reginput);
3252 REGCP_UNWIND(lastcp);
3253 }
1aa99e6b 3254 }
a0d0e21e 3255 /* If it could work, try it. */
2390ecbc 3256 else if (c1 == -1000)
bbce6d69 3257 {
29d1e993 3258 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3259 REGCP_UNWIND(lastcp);
bbce6d69 3260 }
c277df42 3261 /* Couldn't or didn't -- move forward. */
a0ed51b3 3262 PL_reginput = locinput;
a0d0e21e
LW
3263 if (regrepeat(scan, 1)) {
3264 ln++;
a0ed51b3
LW
3265 locinput = PL_reginput;
3266 }
3267 else
4633a7c4 3268 sayNO;
a0d0e21e
LW
3269 }
3270 }
3271 else {
c277df42 3272 CHECKPOINT lastcp;
a0d0e21e 3273 n = regrepeat(scan, n);
a0ed51b3 3274 locinput = PL_reginput;
22c35a8c 3275 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3276 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3277 ln = n; /* why back off? */
1aeab75a
GS
3278 /* ...because $ and \Z can match before *and* after
3279 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3280 We should back off by one in this case. */
3281 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3282 ln--;
3283 }
02db2b7b 3284 REGCP_SET(lastcp);
c277df42 3285 if (paren) {
1aa99e6b 3286 UV c;
c277df42 3287 while (n >= ln) {
1aa99e6b
IH
3288 if (c1 != -1000) {
3289 if (do_utf8)
9041c2e3 3290 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3291 else
9041c2e3 3292 c = UCHARAT(PL_reginput);
1aa99e6b 3293 }
c277df42 3294 /* If it could work, try it. */
1aa99e6b 3295 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3296 {
29d1e993 3297 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3298 REGCP_UNWIND(lastcp);
c277df42
IZ
3299 }
3300 /* Couldn't or didn't -- back up. */
3301 n--;
dfe13c55 3302 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3303 }
a0ed51b3
LW
3304 }
3305 else {
1aa99e6b 3306 UV c;
c277df42 3307 while (n >= ln) {
1aa99e6b
IH
3308 if (c1 != -1000) {
3309 if (do_utf8)
9041c2e3 3310 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3311 else
9041c2e3 3312 c = UCHARAT(PL_reginput);
1aa99e6b 3313 }
c277df42 3314 /* If it could work, try it. */
1aa99e6b 3315 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3316 {
29d1e993 3317 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3318 REGCP_UNWIND(lastcp);
c277df42
IZ
3319 }
3320 /* Couldn't or didn't -- back up. */
3321 n--;
dfe13c55 3322 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3323 }
a0d0e21e
LW
3324 }
3325 }
4633a7c4 3326 sayNO;
c277df42 3327 break;
a0d0e21e 3328 case END:
0f5d15d6
IZ
3329 if (PL_reg_call_cc) {
3330 re_cc_state *cur_call_cc = PL_reg_call_cc;
3331 CURCUR *cctmp = PL_regcc;
3332 regexp *re = PL_reg_re;
3333 CHECKPOINT cp, lastcp;
3334
3335 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3336 REGCP_SET(lastcp);
0f5d15d6
IZ
3337 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3338 the caller. */
3339 PL_reginput = locinput; /* Make position available to
3340 the callcc. */
3341 cache_re(PL_reg_call_cc->re);
3342 PL_regcc = PL_reg_call_cc->cc;
3343 PL_reg_call_cc = PL_reg_call_cc->prev;
3344 if (regmatch(cur_call_cc->node)) {
3345 PL_reg_call_cc = cur_call_cc;
3346 regcpblow(cp);
3347 sayYES;
3348 }
02db2b7b 3349 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3350 regcppop();
3351 PL_reg_call_cc = cur_call_cc;
3352 PL_regcc = cctmp;
3353 PL_reg_re = re;
3354 cache_re(re);
3355
3356 DEBUG_r(
3357 PerlIO_printf(Perl_debug_log,
3358 "%*s continuation failed...\n",
3359 REPORT_CODE_OFF+PL_regindent*2, "")
3360 );
7821416a 3361 sayNO_SILENT;
0f5d15d6 3362 }
7821416a
IZ
3363 if (locinput < PL_regtill) {
3364 DEBUG_r(PerlIO_printf(Perl_debug_log,
3365 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3366 PL_colors[4],
3367 (long)(locinput - PL_reg_starttry),
3368 (long)(PL_regtill - PL_reg_starttry),
3369 PL_colors[5]));
3370 sayNO_FINAL; /* Cannot match: too short. */
3371 }
3372 PL_reginput = locinput; /* put where regtry can find it */
3373 sayYES_FINAL; /* Success! */
7e5428c5 3374 case SUCCEED:
3280af22 3375 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3376 sayYES_LOUD; /* Success! */
c277df42
IZ
3377 case SUSPEND:
3378 n = 1;
9fe1d20c 3379 PL_reginput = locinput;
9041c2e3 3380 goto do_ifmatch;
a0d0e21e 3381 case UNLESSM:
c277df42 3382 n = 0;
a0ed51b3 3383 if (scan->flags) {
0fe9bf95
IZ
3384 if (UTF) { /* XXXX This is absolutely
3385 broken, we read before
3386 start of string. */
3387 s = HOPMAYBEc(locinput, -scan->flags);
3388 if (!s)
3389 goto say_yes;
3390 PL_reginput = s;
3391 }
3392 else {
9041c2e3 3393 if (locinput < PL_bostr + scan->flags)
0fe9bf95
IZ
3394 goto say_yes;
3395 PL_reginput = locinput - scan->flags;
3396 goto do_ifmatch;
3397 }
a0ed51b3
LW
3398 }
3399 else
3400 PL_reginput = locinput;
c277df42
IZ
3401 goto do_ifmatch;
3402 case IFMATCH:
3403 n = 1;
a0ed51b3 3404 if (scan->flags) {
0fe9bf95
IZ
3405 if (UTF) { /* XXXX This is absolutely
3406 broken, we read before
3407 start of string. */
3408 s = HOPMAYBEc(locinput, -scan->flags);
3409 if (!s || s < PL_bostr)
3410 goto say_no;
3411 PL_reginput = s;
3412 }
3413 else {
9041c2e3 3414 if (locinput < PL_bostr + scan->flags)
0fe9bf95
IZ
3415 goto say_no;
3416 PL_reginput = locinput - scan->flags;
3417 goto do_ifmatch;
3418 }
a0ed51b3
LW
3419 }
3420 else
3421 PL_reginput = locinput;
3422
c277df42 3423 do_ifmatch:
c277df42
IZ
3424 inner = NEXTOPER(NEXTOPER(scan));
3425 if (regmatch(inner) != n) {
3426 say_no:
3427 if (logical) {
3428 logical = 0;
3429 sw = 0;
3430 goto do_longjump;
a0ed51b3
LW
3431 }
3432 else
c277df42
IZ
3433 sayNO;
3434 }
3435 say_yes:
3436 if (logical) {
3437 logical = 0;
3438 sw = 1;
3439 }
fe44a5e8 3440 if (OP(scan) == SUSPEND) {
3280af22 3441 locinput = PL_reginput;
565764a8 3442 nextchr = UCHARAT(locinput);
fe44a5e8 3443 }
c277df42
IZ
3444 /* FALL THROUGH. */
3445 case LONGJMP:
3446 do_longjump:
3447 next = scan + ARG(scan);
3448 if (next == scan)
3449 next = NULL;
a0d0e21e
LW
3450 break;
3451 default:
b900a521 3452 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3453 PTR2UV(scan), OP(scan));
cea2e8a9 3454 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3455 }
02db2b7b 3456 reenter:
a0d0e21e
LW
3457 scan = next;
3458 }
a687059c 3459
a0d0e21e
LW
3460 /*
3461 * We get here only if there's trouble -- normally "case END" is
3462 * the terminating point.
3463 */
cea2e8a9 3464 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3465 /*NOTREACHED*/
4633a7c4
LW
3466 sayNO;
3467
7821416a
IZ
3468yes_loud:
3469 DEBUG_r(
3470 PerlIO_printf(Perl_debug_log,
3471 "%*s %scould match...%s\n",
3472 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3473 );
3474 goto yes;
3475yes_final:
3476 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3477 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3478yes:
3479#ifdef DEBUGGING
3280af22 3480 PL_regindent--;
4633a7c4 3481#endif
02db2b7b
IZ
3482
3483#if 0 /* Breaks $^R */
3484 if (unwind)
3485 regcpblow(firstcp);
3486#endif
4633a7c4
LW
3487 return 1;
3488
3489no:
7821416a
IZ
3490 DEBUG_r(
3491 PerlIO_printf(Perl_debug_log,
3492 "%*s %sfailed...%s\n",
3493 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3494 );
3495 goto do_no;
3496no_final:
3497do_no:
02db2b7b
IZ
3498 if (unwind) {
3499 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3500
3501 switch (uw->type) {
3502 case RE_UNWIND_BRANCH:
3503 case RE_UNWIND_BRANCHJ:
3504 {
3505 re_unwind_branch_t *uwb = &(uw->branch);
3506 I32 lastparen = uwb->lastparen;
9041c2e3 3507
02db2b7b
IZ
3508 REGCP_UNWIND(uwb->lastcp);
3509 for (n = *PL_reglastparen; n > lastparen; n--)
3510 PL_regendp[n] = -1;
3511 *PL_reglastparen = n;
3512 scan = next = uwb->next;
9041c2e3
NIS
3513 if ( !scan ||
3514 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3515 ? BRANCH : BRANCHJ) ) { /* Failure */
3516 unwind = uwb->prev;
3517#ifdef DEBUGGING
3518 PL_regindent--;
3519#endif
3520 goto do_no;
3521 }
3522 /* Have more choice yet. Reuse the same uwb. */
3523 /*SUPPRESS 560*/
3524 if ((n = (uwb->type == RE_UNWIND_BRANCH
3525 ? NEXT_OFF(next) : ARG(next))))
3526 next += n;
3527 else
3528 next = NULL; /* XXXX Needn't unwinding in this case... */
3529 uwb->next = next;
3530 next = NEXTOPER(scan);
3531 if (uwb->type == RE_UNWIND_BRANCHJ)
3532 next = NEXTOPER(next);
3533 locinput = uwb->locinput;
3534 nextchr = uwb->nextchr;
3535#ifdef DEBUGGING
3536 PL_regindent = uwb->regindent;
3537#endif
3538
3539 goto reenter;
3540 }
3541 /* NOT REACHED */
3542 default:
3543 Perl_croak(aTHX_ "regexp unwind memory corruption");
3544 }
3545 /* NOT REACHED */
3546 }
4633a7c4 3547#ifdef DEBUGGING
3280af22 3548 PL_regindent--;
4633a7c4 3549#endif
a0d0e21e 3550 return 0;
a687059c
LW
3551}
3552
3553/*
3554 - regrepeat - repeatedly match something simple, report how many
3555 */
3556/*
3557 * [This routine now assumes that it will only match on things of length 1.
3558 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3559 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3560 */
76e3520e 3561STATIC I32
cea2e8a9 3562S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3563{
a0d0e21e 3564 register char *scan;
a0d0e21e 3565 register I32 c;
3280af22 3566 register char *loceol = PL_regeol;
a0ed51b3 3567 register I32 hardcount = 0;
ffc61ed2 3568 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3569
3280af22 3570 scan = PL_reginput;
c277df42 3571 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3572 loceol = scan + max;
a0d0e21e 3573 switch (OP(p)) {
22c35a8c 3574 case REG_ANY:
1aa99e6b 3575 if (do_utf8) {
ffc61ed2 3576 loceol = PL_regeol;
1aa99e6b 3577 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3578 scan += UTF8SKIP(scan);
3579 hardcount++;
3580 }
3581 } else {
3582 while (scan < loceol && *scan != '\n')
3583 scan++;
a0ed51b3
LW
3584 }
3585 break;
ffc61ed2 3586 case SANY:
3baa4c62 3587 scan = loceol;
a0ed51b3 3588 break;
bbce6d69 3589 case EXACT: /* length of string is 1 */
cd439c50 3590 c = (U8)*STRING(p);
bbce6d69 3591 while (scan < loceol && UCHARAT(scan) == c)
3592 scan++;
3593 break;
3594 case EXACTF: /* length of string is 1 */
cd439c50 3595 c = (U8)*STRING(p);
bbce6d69 3596 while (scan < loceol &&
22c35a8c 3597 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3598 scan++;
3599 break;
3600 case EXACTFL: /* length of string is 1 */
3280af22 3601 PL_reg_flags |= RF_tainted;
cd439c50 3602 c = (U8)*STRING(p);
bbce6d69 3603 while (scan < loceol &&
22c35a8c 3604 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3605 scan++;
3606 break;
3607 case ANYOF:
ffc61ed2
JH
3608 if (do_utf8) {
3609 loceol = PL_regeol;
1aa99e6b
IH
3610 while (hardcount < max && scan < loceol &&
3611 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3612 scan += UTF8SKIP(scan);
3613 hardcount++;
3614 }
3615 } else {
3616 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3617 scan++;
3618 }
a0d0e21e
LW
3619 break;
3620 case ALNUM:
1aa99e6b 3621 if (do_utf8) {
ffc61ed2 3622 loceol = PL_regeol;
8269fa76 3623 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b
IH
3624 while (hardcount < max && scan < loceol &&
3625 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3626 scan += UTF8SKIP(scan);
3627 hardcount++;
3628 }
3629 } else {
3630 while (scan < loceol && isALNUM(*scan))
3631 scan++;
a0ed51b3
LW
3632 }
3633 break;
bbce6d69 3634 case ALNUML:
3280af22 3635 PL_reg_flags |= RF_tainted;
1aa99e6b 3636 if (do_utf8) {
ffc61ed2 3637 loceol = PL_regeol;
1aa99e6b
IH
3638 while (hardcount < max && scan < loceol &&
3639 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3640 scan += UTF8SKIP(scan);
3641 hardcount++;
3642 }
3643 } else {
3644 while (scan < loceol && isALNUM_LC(*scan))
3645 scan++;
a0ed51b3
LW
3646 }
3647 break;
a0d0e21e 3648 case NALNUM:
1aa99e6b 3649 if (do_utf8) {
ffc61ed2 3650 loceol = PL_regeol;
8269fa76 3651 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b
IH
3652 while (hardcount < max && scan < loceol &&
3653 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3654 scan += UTF8SKIP(scan);
3655 hardcount++;
3656 }
3657 } else {
3658 while (scan < loceol && !isALNUM(*scan))
3659 scan++;
a0ed51b3
LW
3660 }
3661 break;
bbce6d69 3662 case NALNUML:
3280af22 3663 PL_reg_flags |= RF_tainted;
1aa99e6b 3664 if (do_utf8) {
ffc61ed2 3665 loceol = PL_regeol;
1aa99e6b
IH
3666 while (hardcount < max && scan < loceol &&
3667 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3668 scan += UTF8SKIP(scan);
3669 hardcount++;
3670 }
3671 } else {
3672 while (scan < loceol && !isALNUM_LC(*scan))
3673 scan++;
a0ed51b3
LW
3674 }
3675 break;
a0d0e21e 3676 case SPACE:
1aa99e6b 3677 if (do_utf8) {
ffc61ed2 3678 loceol = PL_regeol;
8269fa76 3679 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3680 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3681 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3682 scan += UTF8SKIP(scan);
3683 hardcount++;
3684 }
3685 } else {
3686 while (scan < loceol && isSPACE(*scan))
3687 scan++;
a0ed51b3
LW
3688 }
3689 break;
bbce6d69 3690 case SPACEL:
3280af22 3691 PL_reg_flags |= RF_tainted;
1aa99e6b 3692 if (do_utf8) {
ffc61ed2 3693 loceol = PL_regeol;
1aa99e6b 3694 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3695 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3696 scan += UTF8SKIP(scan);
3697 hardcount++;
3698 }
3699 } else {
3700 while (scan < loceol && isSPACE_LC(*scan))
3701 scan++;
a0ed51b3
LW
3702 }
3703 break;
a0d0e21e 3704 case NSPACE:
1aa99e6b 3705 if (do_utf8) {
ffc61ed2 3706 loceol = PL_regeol;
8269fa76 3707 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3708 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3709 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3710 scan += UTF8SKIP(scan);
3711 hardcount++;
3712 }
3713 } else {
3714 while (scan < loceol && !isSPACE(*scan))
3715 scan++;
3716 break;
a0ed51b3 3717 }
bbce6d69 3718 case NSPACEL:
3280af22 3719 PL_reg_flags |= RF_tainted;
1aa99e6b 3720 if (do_utf8) {
ffc61ed2 3721 loceol = PL_regeol;
1aa99e6b 3722 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3723 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3724 scan += UTF8SKIP(scan);
3725 hardcount++;
3726 }
3727 } else {
3728 while (scan < loceol && !isSPACE_LC(*scan))
3729 scan++;
a0ed51b3
LW
3730 }
3731 break;
a0d0e21e 3732 case DIGIT:
1aa99e6b 3733 if (do_utf8) {
ffc61ed2 3734 loceol = PL_regeol;
8269fa76 3735 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b
IH
3736 while (hardcount < max && scan < loceol &&
3737 swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3738 scan += UTF8SKIP(scan);
3739 hardcount++;
3740 }
3741 } else {
3742 while (scan < loceol && isDIGIT(*scan))
3743 scan++;
a0ed51b3
LW
3744 }
3745 break;
a0d0e21e 3746 case NDIGIT:
1aa99e6b 3747 if (do_utf8) {
ffc61ed2 3748 loceol = PL_regeol;
8269fa76 3749 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b
IH
3750 while (hardcount < max && scan < loceol &&
3751 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3752 scan += UTF8SKIP(scan);
3753 hardcount++;
3754 }
3755 } else {
3756 while (scan < loceol && !isDIGIT(*scan))
3757 scan++;
a0ed51b3
LW
3758 }
3759 break;
a0d0e21e
LW
3760 default: /* Called on something of 0 width. */
3761 break; /* So match right here or not at all. */
3762 }
a687059c 3763
a0ed51b3
LW
3764 if (hardcount)
3765 c = hardcount;
3766 else
3767 c = scan - PL_reginput;
3280af22 3768 PL_reginput = scan;
a687059c 3769
9041c2e3 3770 DEBUG_r(
c277df42
IZ
3771 {
3772 SV *prop = sv_newmortal();
3773
3774 regprop(prop, p);
9041c2e3
NIS
3775 PerlIO_printf(Perl_debug_log,
3776 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3777 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3778 });
9041c2e3 3779
a0d0e21e 3780 return(c);
a687059c
LW
3781}
3782
3783/*
c277df42 3784 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3785 *
c277df42
IZ
3786 * The repeater is supposed to have constant length.
3787 */
3788
76e3520e 3789STATIC I32
cea2e8a9 3790S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3791{
3792 register char *scan;
3793 register char *start;
3280af22 3794 register char *loceol = PL_regeol;
a0ed51b3 3795 I32 l = 0;
708e3b05 3796 I32 count = 0, res = 1;
a0ed51b3
LW
3797
3798 if (!max)
3799 return 0;
c277df42 3800
3280af22 3801 start = PL_reginput;
1aa99e6b 3802 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3803 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3804 if (!count++) {
3805 l = 0;
3806 while (start < PL_reginput) {
3807 l++;
3808 start += UTF8SKIP(start);
3809 }
3810 *lp = l;
3811 if (l == 0)
3812 return max;
3813 }
3814 if (count == max)
3815 return count;
3816 }
3817 }
3818 else {
708e3b05 3819 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3820 if (!count++) {
3821 *lp = l = PL_reginput - start;
3822 if (max != REG_INFTY && l*max < loceol - scan)
3823 loceol = scan + l*max;
3824 if (l == 0)
3825 return max;
c277df42
IZ
3826 }
3827 }
3828 }
708e3b05 3829 if (!res)
3280af22 3830 PL_reginput = scan;
9041c2e3 3831
a0ed51b3 3832 return count;
c277df42
IZ
3833}
3834
3835/*
ffc61ed2
JH
3836- regclass_swash - prepare the utf8 swash
3837*/
3838
3839SV *
3840Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3841{
3842 SV *sw = NULL;
3843 SV *si = NULL;
3844
3845 if (PL_regdata && PL_regdata->count) {
3846 U32 n = ARG(node);
3847
3848 if (PL_regdata->what[n] == 's') {
3849 SV *rv = (SV*)PL_regdata->data[n];
3850 AV *av = (AV*)SvRV((SV*)rv);
3851 SV **a;
9041c2e3 3852
ffc61ed2
JH
3853 si = *av_fetch(av, 0, FALSE);
3854 a = av_fetch(av, 1, FALSE);
9041c2e3 3855
ffc61ed2
JH
3856 if (a)
3857 sw = *a;
3858 else if (si && doinit) {
3859 sw = swash_init("utf8", "", si, 1, 0);
3860 (void)av_store(av, 1, sw);
3861 }
3862 }
3863 }
3864
3865 if (initsvp)
3866 *initsvp = si;
3867
3868 return sw;
3869}
3870
3871/*
cb8d8820 3872 - reginclass - determine if a character falls into a character class
bbce6d69 3873 */
3874
76e3520e 3875STATIC bool
ffc61ed2 3876S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3877{
ffc61ed2 3878 char flags = ANYOF_FLAGS(n);
bbce6d69 3879 bool match = FALSE;
1aa99e6b
IH
3880 UV c;
3881 STRLEN len;
3882
3883 if (do_utf8)
9041c2e3 3884 c = utf8_to_uvchr(p, &len);
1aa99e6b
IH
3885 else
3886 c = *p;
bbce6d69 3887
ffc61ed2
JH
3888 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3889 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3890 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3891 match = TRUE;
bbce6d69 3892 }
1aa99e6b
IH
3893 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3894 match = TRUE;
ffc61ed2
JH
3895 if (!match) {
3896 SV *sw = regclass_swash(n, TRUE, 0);
3897
3898 if (sw) {
3899 if (swash_fetch(sw, p))
3900 match = TRUE;
3901 else if (flags & ANYOF_FOLD) {
3902 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3903
ffc61ed2
JH
3904 if (flags & ANYOF_LOCALE) {
3905 PL_reg_flags |= RF_tainted;
9041c2e3 3906 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3907 }
1aa99e6b 3908 else
9041c2e3 3909 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
ffc61ed2
JH
3910 if (swash_fetch(sw, tmpbuf))
3911 match = TRUE;
3912 }
3913 }
bbce6d69 3914 }
3915 }
1aa99e6b 3916 if (!match && c < 256) {
ffc61ed2
JH
3917 if (ANYOF_BITMAP_TEST(n, c))
3918 match = TRUE;
3919 else if (flags & ANYOF_FOLD) {
3920 I32 f;
a0ed51b3 3921
ffc61ed2
JH
3922 if (flags & ANYOF_LOCALE) {
3923 PL_reg_flags |= RF_tainted;
3924 f = PL_fold_locale[c];
3925 }
3926 else
3927 f = PL_fold[c];
3928 if (f != c && ANYOF_BITMAP_TEST(n, f))
3929 match = TRUE;
3930 }
3931
3932 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3933 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3934 if (
3935 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3943 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3944 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3945 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3946 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3947 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3948 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3949 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3950 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3951 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3952 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3953 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3954 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3955 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3956 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3957 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3958 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3959 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3960 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3961 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3962 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3963 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3964 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3965 ) /* How's that for a conditional? */
3966 {
3967 match = TRUE;
3968 }
a0ed51b3 3969 }
a0ed51b3
LW
3970 }
3971
a0ed51b3
LW
3972 return (flags & ANYOF_INVERT) ? !match : match;
3973}
161b471a 3974
dfe13c55 3975STATIC U8 *
cea2e8a9 3976S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3977{
1aa99e6b
IH
3978 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3979}
3980
3981STATIC U8 *
3982S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3983{
a0ed51b3 3984 if (off >= 0) {
1aa99e6b 3985 while (off-- && s < lim) {
ffc61ed2 3986 /* XXX could check well-formedness here */
a0ed51b3 3987 s += UTF8SKIP(s);
ffc61ed2 3988 }
a0ed51b3
LW
3989 }
3990 else {
3991 while (off++) {
1aa99e6b 3992 if (s > lim) {
a0ed51b3 3993 s--;
ffc61ed2 3994 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3995 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3996 s--;
ffc61ed2
JH
3997 }
3998 /* XXX could check well-formedness here */
a0ed51b3
LW
3999 }
4000 }
4001 }
4002 return s;
4003}
161b471a 4004
dfe13c55 4005STATIC U8 *
1aa99e6b 4006S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4007{
1aa99e6b
IH
4008 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4009}
4010
4011STATIC U8 *
4012S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4013{
4014 if (off >= 0) {
1aa99e6b 4015 while (off-- && s < lim) {
ffc61ed2 4016 /* XXX could check well-formedness here */
a0ed51b3 4017 s += UTF8SKIP(s);
ffc61ed2 4018 }
a0ed51b3
LW
4019 if (off >= 0)
4020 return 0;
4021 }
4022 else {
4023 while (off++) {
1aa99e6b 4024 if (s > lim) {
a0ed51b3 4025 s--;
ffc61ed2 4026 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4027 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4028 s--;
ffc61ed2
JH
4029 }
4030 /* XXX could check well-formedness here */
a0ed51b3
LW
4031 }
4032 else
4033 break;
4034 }
4035 if (off <= 0)
4036 return 0;
4037 }
4038 return s;
4039}
51371543
GS
4040
4041#ifdef PERL_OBJECT
51371543
GS
4042#include "XSUB.h"
4043#endif
4044
4045static void
4046restore_pos(pTHXo_ void *arg)
4047{
51371543
GS
4048 if (PL_reg_eval_set) {
4049 if (PL_reg_oldsaved) {
4050 PL_reg_re->subbeg = PL_reg_oldsaved;
4051 PL_reg_re->sublen = PL_reg_oldsavedlen;
4052 RX_MATCH_COPIED_on(PL_reg_re);
4053 }
4054 PL_reg_magic->mg_len = PL_reg_oldpos;
4055 PL_reg_eval_set = 0;
4056 PL_curpm = PL_reg_oldcurpm;
4057 }
4058}