This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Detypos.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
1aa99e6b
IH
119#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
125
8269fa76 126#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 127
8269fa76 128static void restore_pos(pTHXo_ void *arg);
51371543 129
76e3520e 130STATIC CHECKPOINT
cea2e8a9 131S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 132{
3280af22 133 int retval = PL_savestack_ix;
b1ce53c5
JH
134#define REGCP_PAREN_ELEMS 4
135 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
136 int p;
137
b1ce53c5
JH
138#define REGCP_OTHER_ELEMS 5
139 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 140 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 141/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
142 SSPUSHINT(PL_regendp[p]);
143 SSPUSHINT(PL_regstartp[p]);
3280af22 144 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
145 SSPUSHINT(p);
146 }
b1ce53c5 147/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
148 SSPUSHINT(PL_regsize);
149 SSPUSHINT(*PL_reglastparen);
150 SSPUSHPTR(PL_reginput);
41123dfd
JH
151#define REGCP_FRAME_ELEMS 2
152/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
153 * are needed for the regexp context stack bookkeeping. */
154 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 155 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 156
a0d0e21e
LW
157 return retval;
158}
159
c277df42 160/* These are needed since we do not localize EVAL nodes: */
02db2b7b 161# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 162 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 163 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 164
02db2b7b 165# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 166 PerlIO_printf(Perl_debug_log, \
faccc32b 167 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 168 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 169
76e3520e 170STATIC char *
cea2e8a9 171S_regcppop(pTHX)
a0d0e21e 172{
b1ce53c5 173 I32 i;
a0d0e21e
LW
174 U32 paren = 0;
175 char *input;
cf93c79d 176 I32 tmps;
b1ce53c5
JH
177
178 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 179 i = SSPOPINT;
b1ce53c5
JH
180 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
181 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 182 input = (char *) SSPOPPTR;
3280af22
NIS
183 *PL_reglastparen = SSPOPINT;
184 PL_regsize = SSPOPINT;
b1ce53c5
JH
185
186 /* Now restore the parentheses context. */
41123dfd
JH
187 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
188 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 189 paren = (U32)SSPOPINT;
3280af22 190 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
191 PL_regstartp[paren] = SSPOPINT;
192 tmps = SSPOPINT;
3280af22
NIS
193 if (paren <= *PL_reglastparen)
194 PL_regendp[paren] = tmps;
c277df42 195 DEBUG_r(
c3464db5 196 PerlIO_printf(Perl_debug_log,
b900a521 197 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 198 (UV)paren, (IV)PL_regstartp[paren],
b900a521 199 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 200 (IV)PL_regendp[paren],
3280af22 201 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 202 );
a0d0e21e 203 }
c277df42 204 DEBUG_r(
3280af22 205 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 206 PerlIO_printf(Perl_debug_log,
faccc32b
JH
207 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
208 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
209 }
210 );
daf18116 211#if 1
dafc8851
JH
212 /* It would seem that the similar code in regtry()
213 * already takes care of this, and in fact it is in
214 * a better location to since this code can #if 0-ed out
215 * but the code in regtry() is needed or otherwise tests
216 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
217 * (as of patchlevel 7877) will fail. Then again,
218 * this code seems to be necessary or otherwise
219 * building DynaLoader will fail:
220 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
221 * --jhi */
3280af22
NIS
222 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
223 if (paren > PL_regsize)
cf93c79d
IZ
224 PL_regstartp[paren] = -1;
225 PL_regendp[paren] = -1;
a0d0e21e 226 }
dafc8851 227#endif
a0d0e21e
LW
228 return input;
229}
230
0f5d15d6 231STATIC char *
cea2e8a9 232S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
233{
234 I32 tmp = PL_savestack_ix;
235
236 PL_savestack_ix = ss;
237 regcppop();
238 PL_savestack_ix = tmp;
942e002e 239 return Nullch;
0f5d15d6
IZ
240}
241
242typedef struct re_cc_state
243{
244 I32 ss;
245 regnode *node;
246 struct re_cc_state *prev;
247 CURCUR *cc;
248 regexp *re;
249} re_cc_state;
250
02db2b7b 251#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 252
29d1e993
HS
253#define TRYPAREN(paren, n, input) { \
254 if (paren) { \
255 if (n) { \
256 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
257 PL_regendp[paren] = input - PL_bostr; \
258 } \
259 else \
260 PL_regendp[paren] = -1; \
261 } \
262 if (regmatch(next)) \
263 sayYES; \
264 if (paren && n) \
265 PL_regendp[paren] = -1; \
266}
267
268
a687059c 269/*
e50aee73 270 * pregexec and friends
a687059c
LW
271 */
272
273/*
c277df42 274 - pregexec - match a regexp against a string
a687059c 275 */
c277df42 276I32
864dbfa3 277Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 278 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
279/* strend: pointer to null at end of string */
280/* strbeg: real beginning of string */
281/* minend: end of match must be >=minend after stringarg. */
282/* nosave: For optimizations. */
283{
284 return
9041c2e3 285 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
286 nosave ? 0 : REXEC_COPY_STR);
287}
0f5d15d6
IZ
288
289STATIC void
cea2e8a9 290S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
291{
292 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
293#ifdef DEBUGGING
294 PL_regprogram = prog->program;
295#endif
296 PL_regnpar = prog->nparens;
9041c2e3
NIS
297 PL_regdata = prog->data;
298 PL_reg_re = prog;
0f5d15d6 299}
22e551b9 300
9041c2e3 301/*
cad2e5aa
JH
302 * Need to implement the following flags for reg_anch:
303 *
304 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
305 * USE_INTUIT_ML
306 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
307 * INTUIT_AUTORITATIVE_ML
308 * INTUIT_ONCE_NOML - Intuit can match in one location only.
309 * INTUIT_ONCE_ML
310 *
311 * Another flag for this function: SECOND_TIME (so that float substrs
312 * with giant delta may be not rechecked).
313 */
314
315/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
316
2c2d71f5 317/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
318 Otherwise, only SvCUR(sv) is used to get strbeg. */
319
320/* XXXX We assume that strpos is strbeg unless sv. */
321
6eb5f6b9
JH
322/* XXXX Some places assume that there is a fixed substring.
323 An update may be needed if optimizer marks as "INTUITable"
324 RExen without fixed substrings. Similarly, it is assumed that
325 lengths of all the strings are no more than minlen, thus they
326 cannot come from lookahead.
327 (Or minlen should take into account lookahead.) */
328
2c2d71f5
JH
329/* A failure to find a constant substring means that there is no need to make
330 an expensive call to REx engine, thus we celebrate a failure. Similarly,
331 finding a substring too deep into the string means that less calls to
30944b6d
IZ
332 regtry() should be needed.
333
334 REx compiler's optimizer found 4 possible hints:
335 a) Anchored substring;
336 b) Fixed substring;
337 c) Whether we are anchored (beginning-of-line or \G);
338 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 339 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
340 string which does not contradict any of them.
341 */
2c2d71f5 342
6eb5f6b9
JH
343/* Most of decisions we do here should have been done at compile time.
344 The nodes of the REx which we used for the search should have been
345 deleted from the finite automaton. */
346
cad2e5aa
JH
347char *
348Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
349 char *strend, U32 flags, re_scream_pos_data *data)
350{
2c2d71f5 351 register I32 start_shift;
cad2e5aa 352 /* Should be nonnegative! */
2c2d71f5
JH
353 register I32 end_shift;
354 register char *s;
355 register SV *check;
a1933d95 356 char *strbeg;
cad2e5aa
JH
357 char *t;
358 I32 ml_anch;
2c2d71f5 359 char *tmp;
6eb5f6b9
JH
360 register char *other_last = Nullch; /* other substr checked before this */
361 char *check_at; /* check substr found at this pos */
30944b6d
IZ
362#ifdef DEBUGGING
363 char *i_strpos = strpos;
364#endif
cad2e5aa
JH
365
366 DEBUG_r( if (!PL_colorset) reginitcolors() );
367 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 368 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
369 PL_colors[4],PL_colors[5],PL_colors[0],
370 prog->precomp,
371 PL_colors[1],
372 (strlen(prog->precomp) > 60 ? "..." : ""),
373 PL_colors[0],
b900a521 374 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
375 strpos, PL_colors[1],
376 (strend - strpos > 60 ? "..." : ""))
377 );
378
1aa99e6b
IH
379 if (prog->reganch & ROPT_UTF8)
380 PL_reg_flags |= RF_utf8;
381
382 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 383 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 384 goto fail;
2c2d71f5 385 }
a1933d95 386 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 387 PL_regeol = strend;
653099ff 388 check = prog->check_substr;
2c2d71f5 389 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
390 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
391 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 392 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 393
7e25d62c
JH
394 if (!ml_anch) {
395 if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */
396 | ROPT_IMPLICIT)) /* not a real BOL */
397 /* SvCUR is not set on references: SvRV and SvPVX overlap */
398 && sv && !SvROK(sv)
399 && (strpos != strbeg)) {
400 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
401 goto fail;
402 }
403 if (prog->check_offset_min == prog->check_offset_max &&
404 !(prog->reganch & ROPT_SANY_SEEN)) {
2c2d71f5 405 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
406 I32 slen;
407
1aa99e6b 408 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
409 if (SvTAIL(check)) {
410 slen = SvCUR(check); /* >= 1 */
cad2e5aa 411
9041c2e3 412 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
413 || (strend - s == slen && strend[-1] != '\n')) {
414 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
415 goto fail_finish;
cad2e5aa
JH
416 }
417 /* Now should match s[0..slen-2] */
418 slen--;
653099ff 419 if (slen && (*SvPVX(check) != *s
cad2e5aa 420 || (slen > 1
653099ff 421 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
422 report_neq:
423 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
424 goto fail_finish;
425 }
cad2e5aa 426 }
653099ff
GS
427 else if (*SvPVX(check) != *s
428 || ((slen = SvCUR(check)) > 1
429 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
430 goto report_neq;
431 goto success_at_start;
7e25d62c 432 }
cad2e5aa 433 }
2c2d71f5 434 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 435 s = strpos;
2c2d71f5 436 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 437 end_shift = prog->minlen - start_shift -
653099ff 438 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 439 if (!ml_anch) {
653099ff
GS
440 I32 end = prog->check_offset_max + CHR_SVLEN(check)
441 - (SvTAIL(check) != 0);
1aa99e6b 442 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
443
444 if (end_shift < eshift)
445 end_shift = eshift;
446 }
cad2e5aa 447 }
2c2d71f5 448 else { /* Can match at random position */
cad2e5aa
JH
449 ml_anch = 0;
450 s = strpos;
2c2d71f5
JH
451 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
452 /* Should be nonnegative! */
453 end_shift = prog->minlen - start_shift -
653099ff 454 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
455 }
456
2c2d71f5 457#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 458 if (end_shift < 0)
6bbae5e6 459 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
460#endif
461
2c2d71f5
JH
462 restart:
463 /* Find a possible match in the region s..strend by looking for
464 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 465 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
466 I32 p = -1; /* Internal iterator of scream. */
467 I32 *pp = data ? data->scream_pos : &p;
468
2c2d71f5
JH
469 if (PL_screamfirst[BmRARE(check)] >= 0
470 || ( BmRARE(check) == '\n'
471 && (BmPREVIOUS(check) == SvCUR(check) - 1)
472 && SvTAIL(check) ))
9041c2e3 473 s = screaminstr(sv, check,
2c2d71f5 474 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 475 else
2c2d71f5 476 goto fail_finish;
cad2e5aa
JH
477 if (data)
478 *data->scream_olds = s;
479 }
3baa4c62
JH
480 else if (prog->reganch & ROPT_SANY_SEEN)
481 s = fbm_instr((U8*)(s + start_shift),
482 (U8*)(strend - end_shift),
483 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 484 else
1aa99e6b
IH
485 s = fbm_instr(HOP3(s, start_shift, strend),
486 HOP3(strend, -end_shift, strbeg),
2c2d71f5 487 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
488
489 /* Update the count-of-usability, remove useless subpatterns,
490 unshift s. */
2c2d71f5
JH
491
492 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
493 (s ? "Found" : "Did not find"),
494 ((check == prog->anchored_substr) ? "anchored" : "floating"),
495 PL_colors[0],
7b0972df
JH
496 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
497 SvPVX(check),
2c2d71f5
JH
498 PL_colors[1], (SvTAIL(check) ? "$" : ""),
499 (s ? " at offset " : "...\n") ) );
500
501 if (!s)
502 goto fail_finish;
503
6eb5f6b9
JH
504 check_at = s;
505
2c2d71f5 506 /* Finish the diagnostic message */
30944b6d 507 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
508
509 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
510 Start with the other substr.
511 XXXX no SCREAM optimization yet - and a very coarse implementation
512 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
513 *always* match. Probably should be marked during compile...
514 Probably it is right to do no SCREAM here...
515 */
516
517 if (prog->float_substr && prog->anchored_substr) {
30944b6d 518 /* Take into account the "other" substring. */
2c2d71f5
JH
519 /* XXXX May be hopelessly wrong for UTF... */
520 if (!other_last)
6eb5f6b9 521 other_last = strpos;
2c2d71f5 522 if (check == prog->float_substr) {
30944b6d
IZ
523 do_other_anchored:
524 {
1aa99e6b 525 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
526 char *s1 = s;
527
2c2d71f5
JH
528 t = s - prog->check_offset_max;
529 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
530 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 531 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 532 && t > strpos)))
30944b6d 533 /* EMPTY */;
2c2d71f5
JH
534 else
535 t = strpos;
1aa99e6b 536 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
537 if (t < other_last) /* These positions already checked */
538 t = other_last;
1aa99e6b 539 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
540 if (last < last1)
541 last1 = last;
542 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
543 /* On end-of-str: see comment below. */
544 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
545 HOP3(HOP3(last1, prog->anchored_offset, strend)
546 + SvCUR(prog->anchored_substr),
547 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
548 prog->anchored_substr,
549 PL_multiline ? FBMrf_MULTILINE : 0);
550 DEBUG_r(PerlIO_printf(Perl_debug_log,
551 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
552 (s ? "Found" : "Contradicts"),
553 PL_colors[0],
7b0972df
JH
554 (int)(SvCUR(prog->anchored_substr)
555 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
556 SvPVX(prog->anchored_substr),
557 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
558 if (!s) {
559 if (last1 >= last2) {
560 DEBUG_r(PerlIO_printf(Perl_debug_log,
561 ", giving up...\n"));
562 goto fail_finish;
563 }
564 DEBUG_r(PerlIO_printf(Perl_debug_log,
565 ", trying floating at offset %ld...\n",
1aa99e6b
IH
566 (long)(HOP3c(s1, 1, strend) - i_strpos)));
567 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
568 s = HOP3c(last, 1, strend);
2c2d71f5
JH
569 goto restart;
570 }
571 else {
572 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 573 (long)(s - i_strpos)));
1aa99e6b
IH
574 t = HOP3c(s, -prog->anchored_offset, strbeg);
575 other_last = HOP3c(s, 1, strend);
30944b6d 576 s = s1;
2c2d71f5
JH
577 if (t == strpos)
578 goto try_at_start;
2c2d71f5
JH
579 goto try_at_offset;
580 }
30944b6d 581 }
2c2d71f5
JH
582 }
583 else { /* Take into account the floating substring. */
584 char *last, *last1;
585 char *s1 = s;
586
1aa99e6b
IH
587 t = HOP3c(s, -start_shift, strbeg);
588 last1 = last =
589 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
590 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
591 last = HOP3c(t, prog->float_max_offset, strend);
592 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
593 if (s < other_last)
594 s = other_last;
2c2d71f5
JH
595 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
596 /* fbm_instr() takes into account exact value of end-of-str
597 if the check is SvTAIL(ed). Since false positives are OK,
598 and end-of-str is not later than strend we are OK. */
599 s = fbm_instr((unsigned char*)s,
600 (unsigned char*)last + SvCUR(prog->float_substr)
601 - (SvTAIL(prog->float_substr)!=0),
602 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
603 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
604 (s ? "Found" : "Contradicts"),
605 PL_colors[0],
7b0972df
JH
606 (int)(SvCUR(prog->float_substr)
607 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
608 SvPVX(prog->float_substr),
609 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
610 if (!s) {
611 if (last1 == last) {
612 DEBUG_r(PerlIO_printf(Perl_debug_log,
613 ", giving up...\n"));
614 goto fail_finish;
615 }
616 DEBUG_r(PerlIO_printf(Perl_debug_log,
617 ", trying anchored starting at offset %ld...\n",
30944b6d 618 (long)(s1 + 1 - i_strpos)));
803ff556 619 other_last = last;
1aa99e6b 620 s = HOP3c(t, 1, strend);
2c2d71f5
JH
621 goto restart;
622 }
623 else {
624 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 625 (long)(s - i_strpos)));
803ff556 626 other_last = s; /* Fix this later. --Hugo */
30944b6d 627 s = s1;
2c2d71f5
JH
628 if (t == strpos)
629 goto try_at_start;
2c2d71f5
JH
630 goto try_at_offset;
631 }
632 }
cad2e5aa 633 }
2c2d71f5
JH
634
635 t = s - prog->check_offset_max;
2c2d71f5
JH
636 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
637 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
638 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
639 && t > strpos))) {
2c2d71f5
JH
640 /* Fixed substring is found far enough so that the match
641 cannot start at strpos. */
642 try_at_offset:
cad2e5aa 643 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
644 /* Eventually fbm_*() should handle this, but often
645 anchored_offset is not 0, so this check will not be wasted. */
646 /* XXXX In the code below we prefer to look for "^" even in
647 presence of anchored substrings. And we search even
648 beyond the found float position. These pessimizations
649 are historical artefacts only. */
650 find_anchor:
2c2d71f5 651 while (t < strend - prog->minlen) {
cad2e5aa 652 if (*t == '\n') {
4ee3650e 653 if (t < check_at - prog->check_offset_min) {
30944b6d 654 if (prog->anchored_substr) {
4ee3650e
GS
655 /* Since we moved from the found position,
656 we definitely contradict the found anchored
30944b6d
IZ
657 substr. Due to the above check we do not
658 contradict "check" substr.
659 Thus we can arrive here only if check substr
660 is float. Redo checking for "other"=="fixed".
661 */
9041c2e3 662 strpos = t + 1;
30944b6d
IZ
663 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
664 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
665 goto do_other_anchored;
666 }
4ee3650e
GS
667 /* We don't contradict the found floating substring. */
668 /* XXXX Why not check for STCLASS? */
cad2e5aa 669 s = t + 1;
2c2d71f5 670 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 671 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
672 goto set_useful;
673 }
4ee3650e
GS
674 /* Position contradicts check-string */
675 /* XXXX probably better to look for check-string
676 than for "\n", so one should lower the limit for t? */
677 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 678 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 679 other_last = strpos = s = t + 1;
cad2e5aa
JH
680 goto restart;
681 }
682 t++;
683 }
2c2d71f5
JH
684 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
685 PL_colors[0],PL_colors[1]));
686 goto fail_finish;
cad2e5aa 687 }
f5952150
GS
688 else {
689 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
690 PL_colors[0],PL_colors[1]));
691 }
cad2e5aa
JH
692 s = t;
693 set_useful:
2c2d71f5 694 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
695 }
696 else {
f5952150 697 /* The found string does not prohibit matching at strpos,
2c2d71f5 698 - no optimization of calling REx engine can be performed,
f5952150
GS
699 unless it was an MBOL and we are not after MBOL,
700 or a future STCLASS check will fail this. */
2c2d71f5
JH
701 try_at_start:
702 /* Even in this situation we may use MBOL flag if strpos is offset
703 wrt the start of the string. */
05b4157f 704 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 705 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
706 /* May be due to an implicit anchor of m{.*foo} */
707 && !(prog->reganch & ROPT_IMPLICIT))
708 {
cad2e5aa
JH
709 t = strpos;
710 goto find_anchor;
711 }
30944b6d 712 DEBUG_r( if (ml_anch)
f5952150
GS
713 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
714 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 715 );
2c2d71f5 716 success_at_start:
30944b6d 717 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 718 && prog->check_substr /* Could be deleted already */
cad2e5aa 719 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
720 && prog->check_substr == prog->float_substr)
721 {
cad2e5aa 722 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 723 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
724 SvREFCNT_dec(prog->check_substr);
725 prog->check_substr = Nullsv; /* disable */
726 prog->float_substr = Nullsv; /* clear */
5e39e1e5 727 check = Nullsv; /* abort */
cad2e5aa 728 s = strpos;
3cf5c195
IZ
729 /* XXXX This is a remnant of the old implementation. It
730 looks wasteful, since now INTUIT can use many
6eb5f6b9 731 other heuristics. */
cad2e5aa
JH
732 prog->reganch &= ~RE_USE_INTUIT;
733 }
734 else
735 s = strpos;
736 }
737
6eb5f6b9
JH
738 /* Last resort... */
739 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
740 if (prog->regstclass) {
741 /* minlen == 0 is possible if regstclass is \b or \B,
742 and the fixed substr is ''$.
743 Since minlen is already taken into account, s+1 is before strend;
744 accidentally, minlen >= 1 guaranties no false positives at s + 1
745 even for \b or \B. But (minlen? 1 : 0) below assumes that
746 regstclass does not come from lookahead... */
747 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
748 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 749 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 750 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 751 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 752 : 1);
6eb5f6b9 753 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
754 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
755 : (prog->float_substr
756 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
757 cl_l, strend)
758 : strend);
a1933d95 759 char *startpos = strbeg;
6eb5f6b9
JH
760
761 t = s;
76384e4a 762 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 763 PL_regdata = prog->data;
76384e4a
GS
764 PL_bostr = startpos;
765 }
6eb5f6b9
JH
766 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
767 if (!s) {
768#ifdef DEBUGGING
769 char *what;
770#endif
771 if (endpos == strend) {
772 DEBUG_r( PerlIO_printf(Perl_debug_log,
773 "Could not match STCLASS...\n") );
774 goto fail;
775 }
66e933ab
GS
776 DEBUG_r( PerlIO_printf(Perl_debug_log,
777 "This position contradicts STCLASS...\n") );
653099ff
GS
778 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
779 goto fail;
6eb5f6b9
JH
780 /* Contradict one of substrings */
781 if (prog->anchored_substr) {
6eb5f6b9
JH
782 if (prog->anchored_substr == check) {
783 DEBUG_r( what = "anchored" );
784 hop_and_restart:
1aa99e6b 785 s = HOP3c(t, 1, strend);
66e933ab
GS
786 if (s + start_shift + end_shift > strend) {
787 /* XXXX Should be taken into account earlier? */
788 DEBUG_r( PerlIO_printf(Perl_debug_log,
789 "Could not match STCLASS...\n") );
790 goto fail;
791 }
5e39e1e5
HS
792 if (!check)
793 goto giveup;
6eb5f6b9 794 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 795 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
796 what, (long)(s + start_shift - i_strpos)) );
797 goto restart;
798 }
66e933ab 799 /* Have both, check_string is floating */
6eb5f6b9
JH
800 if (t + start_shift >= check_at) /* Contradicts floating=check */
801 goto retry_floating_check;
802 /* Recheck anchored substring, but not floating... */
9041c2e3 803 s = check_at;
5e39e1e5
HS
804 if (!check)
805 goto giveup;
6eb5f6b9 806 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 807 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
808 (long)(other_last - i_strpos)) );
809 goto do_other_anchored;
810 }
60e71179
GS
811 /* Another way we could have checked stclass at the
812 current position only: */
813 if (ml_anch) {
814 s = t = t + 1;
5e39e1e5
HS
815 if (!check)
816 goto giveup;
60e71179 817 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
818 "Looking for /%s^%s/m starting at offset %ld...\n",
819 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 820 goto try_at_offset;
66e933ab 821 }
60e71179
GS
822 if (!prog->float_substr) /* Could have been deleted */
823 goto fail;
6eb5f6b9
JH
824 /* Check is floating subtring. */
825 retry_floating_check:
826 t = check_at - start_shift;
827 DEBUG_r( what = "floating" );
828 goto hop_and_restart;
829 }
830 DEBUG_r( if (t != s)
9041c2e3 831 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
832 "By STCLASS: moving %ld --> %ld\n",
833 (long)(t - i_strpos), (long)(s - i_strpos));
834 else
9041c2e3 835 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
836 "Does not contradict STCLASS...\n") );
837 }
5e39e1e5
HS
838 giveup:
839 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
840 PL_colors[4], (check ? "Guessed" : "Giving up"),
841 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 842 return s;
2c2d71f5
JH
843
844 fail_finish: /* Substring not found */
66e933ab
GS
845 if (prog->check_substr) /* could be removed already */
846 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 847 fail:
2c2d71f5 848 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
849 PL_colors[4],PL_colors[5]));
850 return Nullch;
851}
9661b544 852
6eb5f6b9 853/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
854STATIC char *
855S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 856{
6eb5f6b9
JH
857 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
858 char *m;
d8093b23
G
859 STRLEN ln;
860 unsigned int c1;
861 unsigned int c2;
6eb5f6b9
JH
862 char *e;
863 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 864 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 865
6eb5f6b9
JH
866 /* We know what class it must start with. */
867 switch (OP(c)) {
6eb5f6b9 868 case ANYOF:
a0ed51b3 869 while (s < strend) {
ffc61ed2 870 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
871 if (tmp && (norun || regtry(prog, s)))
872 goto got_it;
873 else
874 tmp = doevery;
a0ed51b3 875 }
6eb5f6b9
JH
876 else
877 tmp = 1;
ffc61ed2 878 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 879 }
6eb5f6b9
JH
880 break;
881 case EXACTF:
882 m = STRING(c);
883 ln = STR_LEN(c);
1aa99e6b
IH
884 if (UTF) {
885 c1 = to_utf8_lower((U8*)m);
886 c2 = to_utf8_upper((U8*)m);
887 }
888 else {
889 c1 = *(U8*)m;
890 c2 = PL_fold[c1];
891 }
6eb5f6b9
JH
892 goto do_exactf;
893 case EXACTFL:
894 m = STRING(c);
895 ln = STR_LEN(c);
d8093b23 896 c1 = *(U8*)m;
6eb5f6b9
JH
897 c2 = PL_fold_locale[c1];
898 do_exactf:
899 e = strend - ln;
b3c9acc1 900
6eb5f6b9
JH
901 if (norun && e < s)
902 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
903
904 if (do_utf8) {
905 STRLEN len;
906 if (c1 == c2)
907 while (s <= e) {
9041c2e3 908 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
909 && regtry(prog, s) )
910 goto got_it;
911 s += len;
912 }
913 else
914 while (s <= e) {
9041c2e3 915 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
916 if ( (c == c1 || c == c2) && regtry(prog, s) )
917 goto got_it;
918 s += len;
919 }
920 }
921 else {
922 if (c1 == c2)
923 while (s <= e) {
924 if ( *(U8*)s == c1
925 && (ln == 1 || !(OP(c) == EXACTF
926 ? ibcmp(s, m, ln)
927 : ibcmp_locale(s, m, ln)))
928 && (norun || regtry(prog, s)) )
929 goto got_it;
930 s++;
931 }
932 else
933 while (s <= e) {
934 if ( (*(U8*)s == c1 || *(U8*)s == c2)
935 && (ln == 1 || !(OP(c) == EXACTF
936 ? ibcmp(s, m, ln)
937 : ibcmp_locale(s, m, ln)))
938 && (norun || regtry(prog, s)) )
939 goto got_it;
940 s++;
941 }
b3c9acc1
IZ
942 }
943 break;
bbce6d69 944 case BOUNDL:
3280af22 945 PL_reg_flags |= RF_tainted;
bbce6d69 946 /* FALL THROUGH */
a0d0e21e 947 case BOUND:
ffc61ed2 948 if (do_utf8) {
12d33761 949 if (s == PL_bostr)
ffc61ed2
JH
950 tmp = '\n';
951 else {
1aa99e6b 952 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 953
2b9d42f0 954 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
955 }
956 tmp = ((OP(c) == BOUND ?
9041c2e3 957 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 958 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
959 while (s < strend) {
960 if (tmp == !(OP(c) == BOUND ?
3568d838 961 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
962 isALNUM_LC_utf8((U8*)s)))
963 {
964 tmp = !tmp;
965 if ((norun || regtry(prog, s)))
966 goto got_it;
967 }
968 s += UTF8SKIP(s);
a687059c 969 }
a0d0e21e 970 }
667bb95a 971 else {
12d33761 972 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
973 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
974 while (s < strend) {
975 if (tmp ==
976 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
977 tmp = !tmp;
978 if ((norun || regtry(prog, s)))
979 goto got_it;
980 }
981 s++;
a0ed51b3 982 }
a0ed51b3 983 }
6eb5f6b9 984 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
985 goto got_it;
986 break;
bbce6d69 987 case NBOUNDL:
3280af22 988 PL_reg_flags |= RF_tainted;
bbce6d69 989 /* FALL THROUGH */
a0d0e21e 990 case NBOUND:
ffc61ed2 991 if (do_utf8) {
12d33761 992 if (s == PL_bostr)
ffc61ed2
JH
993 tmp = '\n';
994 else {
1aa99e6b 995 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 996
2b9d42f0 997 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
998 }
999 tmp = ((OP(c) == NBOUND ?
9041c2e3 1000 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 1001 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1002 while (s < strend) {
1003 if (tmp == !(OP(c) == NBOUND ?
3568d838 1004 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
ffc61ed2
JH
1005 isALNUM_LC_utf8((U8*)s)))
1006 tmp = !tmp;
1007 else if ((norun || regtry(prog, s)))
1008 goto got_it;
1009 s += UTF8SKIP(s);
1010 }
a0d0e21e 1011 }
667bb95a 1012 else {
12d33761 1013 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
ffc61ed2
JH
1014 tmp = ((OP(c) == NBOUND ?
1015 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1016 while (s < strend) {
1017 if (tmp ==
1018 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1019 tmp = !tmp;
1020 else if ((norun || regtry(prog, s)))
1021 goto got_it;
1022 s++;
1023 }
a0ed51b3 1024 }
6eb5f6b9 1025 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1026 goto got_it;
1027 break;
a0d0e21e 1028 case ALNUM:
ffc61ed2 1029 if (do_utf8) {
8269fa76 1030 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1031 while (s < strend) {
3568d838 1032 if (swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1033 if (tmp && (norun || regtry(prog, s)))
1034 goto got_it;
1035 else
1036 tmp = doevery;
1037 }
bbce6d69 1038 else
ffc61ed2
JH
1039 tmp = 1;
1040 s += UTF8SKIP(s);
bbce6d69 1041 }
bbce6d69 1042 }
ffc61ed2
JH
1043 else {
1044 while (s < strend) {
1045 if (isALNUM(*s)) {
1046 if (tmp && (norun || regtry(prog, s)))
1047 goto got_it;
1048 else
1049 tmp = doevery;
1050 }
a0ed51b3 1051 else
ffc61ed2
JH
1052 tmp = 1;
1053 s++;
a0ed51b3 1054 }
a0ed51b3
LW
1055 }
1056 break;
bbce6d69 1057 case ALNUML:
3280af22 1058 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1059 if (do_utf8) {
1060 while (s < strend) {
1061 if (isALNUM_LC_utf8((U8*)s)) {
1062 if (tmp && (norun || regtry(prog, s)))
1063 goto got_it;
1064 else
1065 tmp = doevery;
1066 }
a687059c 1067 else
ffc61ed2
JH
1068 tmp = 1;
1069 s += UTF8SKIP(s);
a0d0e21e 1070 }
a0d0e21e 1071 }
ffc61ed2
JH
1072 else {
1073 while (s < strend) {
1074 if (isALNUM_LC(*s)) {
1075 if (tmp && (norun || regtry(prog, s)))
1076 goto got_it;
1077 else
1078 tmp = doevery;
1079 }
a0ed51b3 1080 else
ffc61ed2
JH
1081 tmp = 1;
1082 s++;
a0ed51b3 1083 }
a0ed51b3
LW
1084 }
1085 break;
a0d0e21e 1086 case NALNUM:
ffc61ed2 1087 if (do_utf8) {
8269fa76 1088 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 1089 while (s < strend) {
3568d838 1090 if (!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8)) {
ffc61ed2
JH
1091 if (tmp && (norun || regtry(prog, s)))
1092 goto got_it;
1093 else
1094 tmp = doevery;
1095 }
bbce6d69 1096 else
ffc61ed2
JH
1097 tmp = 1;
1098 s += UTF8SKIP(s);
bbce6d69 1099 }
bbce6d69 1100 }
ffc61ed2
JH
1101 else {
1102 while (s < strend) {
1103 if (!isALNUM(*s)) {
1104 if (tmp && (norun || regtry(prog, s)))
1105 goto got_it;
1106 else
1107 tmp = doevery;
1108 }
a0ed51b3 1109 else
ffc61ed2
JH
1110 tmp = 1;
1111 s++;
a0ed51b3 1112 }
a0ed51b3
LW
1113 }
1114 break;
bbce6d69 1115 case NALNUML:
3280af22 1116 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1117 if (do_utf8) {
1118 while (s < strend) {
1119 if (!isALNUM_LC_utf8((U8*)s)) {
1120 if (tmp && (norun || regtry(prog, s)))
1121 goto got_it;
1122 else
1123 tmp = doevery;
1124 }
a687059c 1125 else
ffc61ed2
JH
1126 tmp = 1;
1127 s += UTF8SKIP(s);
a687059c 1128 }
a0d0e21e 1129 }
ffc61ed2
JH
1130 else {
1131 while (s < strend) {
1132 if (!isALNUM_LC(*s)) {
1133 if (tmp && (norun || regtry(prog, s)))
1134 goto got_it;
1135 else
1136 tmp = doevery;
1137 }
a0ed51b3 1138 else
ffc61ed2
JH
1139 tmp = 1;
1140 s++;
a0ed51b3 1141 }
a0ed51b3
LW
1142 }
1143 break;
a0d0e21e 1144 case SPACE:
ffc61ed2 1145 if (do_utf8) {
8269fa76 1146 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1147 while (s < strend) {
3568d838 1148 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)) {
ffc61ed2
JH
1149 if (tmp && (norun || regtry(prog, s)))
1150 goto got_it;
1151 else
1152 tmp = doevery;
1153 }
a0d0e21e 1154 else
ffc61ed2
JH
1155 tmp = 1;
1156 s += UTF8SKIP(s);
2304df62 1157 }
a0d0e21e 1158 }
ffc61ed2
JH
1159 else {
1160 while (s < strend) {
1161 if (isSPACE(*s)) {
1162 if (tmp && (norun || regtry(prog, s)))
1163 goto got_it;
1164 else
1165 tmp = doevery;
1166 }
a0ed51b3 1167 else
ffc61ed2
JH
1168 tmp = 1;
1169 s++;
a0ed51b3 1170 }
a0ed51b3
LW
1171 }
1172 break;
bbce6d69 1173 case SPACEL:
3280af22 1174 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1175 if (do_utf8) {
1176 while (s < strend) {
1177 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1178 if (tmp && (norun || regtry(prog, s)))
1179 goto got_it;
1180 else
1181 tmp = doevery;
1182 }
bbce6d69 1183 else
ffc61ed2
JH
1184 tmp = 1;
1185 s += UTF8SKIP(s);
bbce6d69 1186 }
bbce6d69 1187 }
ffc61ed2
JH
1188 else {
1189 while (s < strend) {
1190 if (isSPACE_LC(*s)) {
1191 if (tmp && (norun || regtry(prog, s)))
1192 goto got_it;
1193 else
1194 tmp = doevery;
1195 }
a0ed51b3 1196 else
ffc61ed2
JH
1197 tmp = 1;
1198 s++;
a0ed51b3 1199 }
a0ed51b3
LW
1200 }
1201 break;
a0d0e21e 1202 case NSPACE:
ffc61ed2 1203 if (do_utf8) {
8269fa76 1204 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 1205 while (s < strend) {
3568d838 1206 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8))) {
ffc61ed2
JH
1207 if (tmp && (norun || regtry(prog, s)))
1208 goto got_it;
1209 else
1210 tmp = doevery;
1211 }
a0d0e21e 1212 else
ffc61ed2
JH
1213 tmp = 1;
1214 s += UTF8SKIP(s);
a687059c 1215 }
a0d0e21e 1216 }
ffc61ed2
JH
1217 else {
1218 while (s < strend) {
1219 if (!isSPACE(*s)) {
1220 if (tmp && (norun || regtry(prog, s)))
1221 goto got_it;
1222 else
1223 tmp = doevery;
1224 }
a0ed51b3 1225 else
ffc61ed2
JH
1226 tmp = 1;
1227 s++;
a0ed51b3 1228 }
a0ed51b3
LW
1229 }
1230 break;
bbce6d69 1231 case NSPACEL:
3280af22 1232 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1233 if (do_utf8) {
1234 while (s < strend) {
1235 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1236 if (tmp && (norun || regtry(prog, s)))
1237 goto got_it;
1238 else
1239 tmp = doevery;
1240 }
bbce6d69 1241 else
ffc61ed2
JH
1242 tmp = 1;
1243 s += UTF8SKIP(s);
bbce6d69 1244 }
bbce6d69 1245 }
ffc61ed2
JH
1246 else {
1247 while (s < strend) {
1248 if (!isSPACE_LC(*s)) {
1249 if (tmp && (norun || regtry(prog, s)))
1250 goto got_it;
1251 else
1252 tmp = doevery;
1253 }
a0ed51b3 1254 else
ffc61ed2
JH
1255 tmp = 1;
1256 s++;
a0ed51b3 1257 }
a0ed51b3
LW
1258 }
1259 break;
a0d0e21e 1260 case DIGIT:
ffc61ed2 1261 if (do_utf8) {
8269fa76 1262 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1263 while (s < strend) {
3568d838 1264 if (swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1265 if (tmp && (norun || regtry(prog, s)))
1266 goto got_it;
1267 else
1268 tmp = doevery;
1269 }
a0d0e21e 1270 else
ffc61ed2
JH
1271 tmp = 1;
1272 s += UTF8SKIP(s);
2b69d0c2 1273 }
a0d0e21e 1274 }
ffc61ed2
JH
1275 else {
1276 while (s < strend) {
1277 if (isDIGIT(*s)) {
1278 if (tmp && (norun || regtry(prog, s)))
1279 goto got_it;
1280 else
1281 tmp = doevery;
1282 }
a0ed51b3 1283 else
ffc61ed2
JH
1284 tmp = 1;
1285 s++;
a0ed51b3 1286 }
a0ed51b3
LW
1287 }
1288 break;
b8c5462f
JH
1289 case DIGITL:
1290 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1291 if (do_utf8) {
1292 while (s < strend) {
1293 if (isDIGIT_LC_utf8((U8*)s)) {
1294 if (tmp && (norun || regtry(prog, s)))
1295 goto got_it;
1296 else
1297 tmp = doevery;
1298 }
b8c5462f 1299 else
ffc61ed2
JH
1300 tmp = 1;
1301 s += UTF8SKIP(s);
b8c5462f 1302 }
b8c5462f 1303 }
ffc61ed2
JH
1304 else {
1305 while (s < strend) {
1306 if (isDIGIT_LC(*s)) {
1307 if (tmp && (norun || regtry(prog, s)))
1308 goto got_it;
1309 else
1310 tmp = doevery;
1311 }
b8c5462f 1312 else
ffc61ed2
JH
1313 tmp = 1;
1314 s++;
b8c5462f 1315 }
b8c5462f
JH
1316 }
1317 break;
a0d0e21e 1318 case NDIGIT:
ffc61ed2 1319 if (do_utf8) {
8269fa76 1320 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 1321 while (s < strend) {
3568d838 1322 if (!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8)) {
ffc61ed2
JH
1323 if (tmp && (norun || regtry(prog, s)))
1324 goto got_it;
1325 else
1326 tmp = doevery;
1327 }
a0d0e21e 1328 else
ffc61ed2
JH
1329 tmp = 1;
1330 s += UTF8SKIP(s);
a687059c 1331 }
a0d0e21e 1332 }
ffc61ed2
JH
1333 else {
1334 while (s < strend) {
1335 if (!isDIGIT(*s)) {
1336 if (tmp && (norun || regtry(prog, s)))
1337 goto got_it;
1338 else
1339 tmp = doevery;
1340 }
a0ed51b3 1341 else
ffc61ed2
JH
1342 tmp = 1;
1343 s++;
a0ed51b3 1344 }
a0ed51b3
LW
1345 }
1346 break;
b8c5462f
JH
1347 case NDIGITL:
1348 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1349 if (do_utf8) {
1350 while (s < strend) {
1351 if (!isDIGIT_LC_utf8((U8*)s)) {
1352 if (tmp && (norun || regtry(prog, s)))
1353 goto got_it;
1354 else
1355 tmp = doevery;
1356 }
b8c5462f 1357 else
ffc61ed2
JH
1358 tmp = 1;
1359 s += UTF8SKIP(s);
b8c5462f 1360 }
a0ed51b3 1361 }
ffc61ed2
JH
1362 else {
1363 while (s < strend) {
1364 if (!isDIGIT_LC(*s)) {
1365 if (tmp && (norun || regtry(prog, s)))
1366 goto got_it;
1367 else
1368 tmp = doevery;
1369 }
cf93c79d 1370 else
ffc61ed2
JH
1371 tmp = 1;
1372 s++;
b8c5462f 1373 }
b8c5462f
JH
1374 }
1375 break;
b3c9acc1 1376 default:
3c3eec57
GS
1377 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1378 break;
d6a28714 1379 }
6eb5f6b9
JH
1380 return 0;
1381 got_it:
1382 return s;
1383}
1384
1385/*
1386 - regexec_flags - match a regexp against a string
1387 */
1388I32
1389Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1390 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1391/* strend: pointer to null at end of string */
1392/* strbeg: real beginning of string */
1393/* minend: end of match must be >=minend after stringarg. */
1394/* data: May be used for some additional optimizations. */
1395/* nosave: For optimizations. */
1396{
6eb5f6b9
JH
1397 register char *s;
1398 register regnode *c;
1399 register char *startpos = stringarg;
6eb5f6b9
JH
1400 I32 minlen; /* must match at least this many chars */
1401 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1402 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1403 constant substr. */ /* CC */
1404 I32 end_shift = 0; /* Same for the end. */ /* CC */
1405 I32 scream_pos = -1; /* Internal iterator of scream. */
1406 char *scream_olds;
1407 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1408 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1409
1410 PL_regcc = 0;
1411
1412 cache_re(prog);
1413#ifdef DEBUGGING
aea4f609 1414 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1415#endif
1416
1417 /* Be paranoid... */
1418 if (prog == NULL || startpos == NULL) {
1419 Perl_croak(aTHX_ "NULL regexp parameter");
1420 return 0;
1421 }
1422
1423 minlen = prog->minlen;
1aa99e6b 1424 if (do_utf8) {
3baa4c62
JH
1425 if (!(prog->reganch & ROPT_SANY_SEEN))
1426 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1427 }
1428 else {
1429 if (strend - startpos < minlen) goto phooey;
1430 }
6eb5f6b9 1431
6eb5f6b9
JH
1432 /* Check validity of program. */
1433 if (UCHARAT(prog->program) != REG_MAGIC) {
1434 Perl_croak(aTHX_ "corrupted regexp program");
1435 }
1436
1437 PL_reg_flags = 0;
1438 PL_reg_eval_set = 0;
1439 PL_reg_maxiter = 0;
1440
1441 if (prog->reganch & ROPT_UTF8)
1442 PL_reg_flags |= RF_utf8;
1443
1444 /* Mark beginning of line for ^ and lookbehind. */
1445 PL_regbol = startpos;
1446 PL_bostr = strbeg;
1447 PL_reg_sv = sv;
1448
1449 /* Mark end of line for $ (and such) */
1450 PL_regeol = strend;
1451
1452 /* see how far we have to get to not match where we matched before */
1453 PL_regtill = startpos+minend;
1454
1455 /* We start without call_cc context. */
1456 PL_reg_call_cc = 0;
1457
1458 /* If there is a "must appear" string, look for it. */
1459 s = startpos;
1460
1461 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1462 MAGIC *mg;
1463
1464 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1465 PL_reg_ganch = startpos;
1466 else if (sv && SvTYPE(sv) >= SVt_PVMG
1467 && SvMAGIC(sv)
1468 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1469 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1470 if (prog->reganch & ROPT_ANCH_GPOS) {
1471 if (s > PL_reg_ganch)
1472 goto phooey;
1473 s = PL_reg_ganch;
1474 }
1475 }
1476 else /* pos() not defined */
1477 PL_reg_ganch = strbeg;
1478 }
1479
699c3c34
JH
1480 if (do_utf8 == (UTF!=0) &&
1481 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1482 re_scream_pos_data d;
1483
1484 d.scream_olds = &scream_olds;
1485 d.scream_pos = &scream_pos;
1486 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1487 if (!s)
1488 goto phooey; /* not present */
1489 }
1490
1491 DEBUG_r( if (!PL_colorset) reginitcolors() );
1492 DEBUG_r(PerlIO_printf(Perl_debug_log,
1493 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1494 PL_colors[4],PL_colors[5],PL_colors[0],
1495 prog->precomp,
1496 PL_colors[1],
1497 (strlen(prog->precomp) > 60 ? "..." : ""),
1498 PL_colors[0],
1499 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1500 startpos, PL_colors[1],
1501 (strend - startpos > 60 ? "..." : ""))
1502 );
1503
1504 /* Simplest case: anchored match need be tried only once. */
1505 /* [unless only anchor is BOL and multiline is set] */
1506 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1507 if (s == startpos && regtry(prog, startpos))
1508 goto got_it;
1509 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1510 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1511 {
1512 char *end;
1513
1514 if (minlen)
1515 dontbother = minlen - 1;
1aa99e6b 1516 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1517 /* for multiline we only have to try after newlines */
1518 if (prog->check_substr) {
1519 if (s == startpos)
1520 goto after_try;
1521 while (1) {
1522 if (regtry(prog, s))
1523 goto got_it;
1524 after_try:
1525 if (s >= end)
1526 goto phooey;
1527 if (prog->reganch & RE_USE_INTUIT) {
1528 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1529 if (!s)
1530 goto phooey;
1531 }
1532 else
1533 s++;
1534 }
1535 } else {
1536 if (s > startpos)
1537 s--;
1538 while (s < end) {
1539 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1540 if (regtry(prog, s))
1541 goto got_it;
1542 }
1543 }
1544 }
1545 }
1546 goto phooey;
1547 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1548 if (regtry(prog, PL_reg_ganch))
1549 goto got_it;
1550 goto phooey;
1551 }
1552
1553 /* Messy cases: unanchored match. */
9041c2e3 1554 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1555 /* we have /x+whatever/ */
1556 /* it must be a one character string (XXXX Except UTF?) */
1557 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1558#ifdef DEBUGGING
1559 int did_match = 0;
1560#endif
1561
1aa99e6b 1562 if (do_utf8) {
6eb5f6b9
JH
1563 while (s < strend) {
1564 if (*s == ch) {
bf93d4cc 1565 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1566 if (regtry(prog, s)) goto got_it;
1567 s += UTF8SKIP(s);
1568 while (s < strend && *s == ch)
1569 s += UTF8SKIP(s);
1570 }
1571 s += UTF8SKIP(s);
1572 }
1573 }
1574 else {
1575 while (s < strend) {
1576 if (*s == ch) {
bf93d4cc 1577 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1578 if (regtry(prog, s)) goto got_it;
1579 s++;
1580 while (s < strend && *s == ch)
1581 s++;
1582 }
1583 s++;
1584 }
1585 }
bf93d4cc
GS
1586 DEBUG_r(did_match ||
1587 PerlIO_printf(Perl_debug_log,
1588 "Did not find anchored character...\n"));
6eb5f6b9
JH
1589 }
1590 /*SUPPRESS 560*/
1aa99e6b
IH
1591 else if (do_utf8 == (UTF!=0) &&
1592 (prog->anchored_substr != Nullsv
9041c2e3 1593 || (prog->float_substr != Nullsv
1aa99e6b 1594 && prog->float_max_offset < strend - s))) {
9041c2e3 1595 SV *must = prog->anchored_substr
6eb5f6b9 1596 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1597 I32 back_max =
6eb5f6b9 1598 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1599 I32 back_min =
6eb5f6b9 1600 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1601 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1602 -(I32)(CHR_SVLEN(must)
1aa99e6b 1603 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1604 char *last1; /* Last position checked before */
bf93d4cc
GS
1605#ifdef DEBUGGING
1606 int did_match = 0;
1607#endif
6eb5f6b9
JH
1608
1609 if (s > PL_bostr)
1610 last1 = HOPc(s, -1);
1611 else
1612 last1 = s - 1; /* bogus */
1613
1614 /* XXXX check_substr already used to find `s', can optimize if
1615 check_substr==must. */
1616 scream_pos = -1;
1617 dontbother = end_shift;
1618 strend = HOPc(strend, -dontbother);
1619 while ( (s <= last) &&
9041c2e3 1620 ((flags & REXEC_SCREAM)
1aa99e6b 1621 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1622 end_shift, &scream_pos, 0))
1aa99e6b 1623 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1624 (unsigned char*)strend, must,
6eb5f6b9 1625 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1626 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1627 if (HOPc(s, -back_max) > last1) {
1628 last1 = HOPc(s, -back_min);
1629 s = HOPc(s, -back_max);
1630 }
1631 else {
1632 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1633
1634 last1 = HOPc(s, -back_min);
1635 s = t;
1636 }
1aa99e6b 1637 if (do_utf8) {
6eb5f6b9
JH
1638 while (s <= last1) {
1639 if (regtry(prog, s))
1640 goto got_it;
1641 s += UTF8SKIP(s);
1642 }
1643 }
1644 else {
1645 while (s <= last1) {
1646 if (regtry(prog, s))
1647 goto got_it;
1648 s++;
1649 }
1650 }
1651 }
bf93d4cc
GS
1652 DEBUG_r(did_match ||
1653 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1654 ((must == prog->anchored_substr)
1655 ? "anchored" : "floating"),
1656 PL_colors[0],
1657 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1658 SvPVX(must),
1659 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1660 goto phooey;
1661 }
155aba94 1662 else if ((c = prog->regstclass)) {
66e933ab
GS
1663 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1664 /* don't bother with what can't match */
6eb5f6b9 1665 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1666 DEBUG_r({
1667 SV *prop = sv_newmortal();
1668 regprop(prop, c);
1669 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1670 });
6eb5f6b9
JH
1671 if (find_byclass(prog, c, s, strend, startpos, 0))
1672 goto got_it;
bf93d4cc 1673 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1674 }
1675 else {
1676 dontbother = 0;
1677 if (prog->float_substr != Nullsv) { /* Trim the end. */
1678 char *last;
d6a28714
JH
1679
1680 if (flags & REXEC_SCREAM) {
1681 last = screaminstr(sv, prog->float_substr, s - strbeg,
1682 end_shift, &scream_pos, 1); /* last one */
1683 if (!last)
ffc61ed2 1684 last = scream_olds; /* Only one occurrence. */
b8c5462f 1685 }
d6a28714
JH
1686 else {
1687 STRLEN len;
1688 char *little = SvPV(prog->float_substr, len);
1689
1690 if (SvTAIL(prog->float_substr)) {
1691 if (memEQ(strend - len + 1, little, len - 1))
1692 last = strend - len + 1;
1693 else if (!PL_multiline)
9041c2e3 1694 last = memEQ(strend - len, little, len)
d6a28714 1695 ? strend - len : Nullch;
b8c5462f 1696 else
d6a28714
JH
1697 goto find_last;
1698 } else {
1699 find_last:
9041c2e3 1700 if (len)
d6a28714 1701 last = rninstr(s, strend, little, little + len);
b8c5462f 1702 else
d6a28714 1703 last = strend; /* matching `$' */
b8c5462f 1704 }
b8c5462f 1705 }
bf93d4cc
GS
1706 if (last == NULL) {
1707 DEBUG_r(PerlIO_printf(Perl_debug_log,
1708 "%sCan't trim the tail, match fails (should not happen)%s\n",
1709 PL_colors[4],PL_colors[5]));
1710 goto phooey; /* Should not happen! */
1711 }
d6a28714
JH
1712 dontbother = strend - last + prog->float_min_offset;
1713 }
1714 if (minlen && (dontbother < minlen))
1715 dontbother = minlen - 1;
1716 strend -= dontbother; /* this one's always in bytes! */
1717 /* We don't know much -- general case. */
1aa99e6b 1718 if (do_utf8) {
d6a28714
JH
1719 for (;;) {
1720 if (regtry(prog, s))
1721 goto got_it;
1722 if (s >= strend)
1723 break;
b8c5462f 1724 s += UTF8SKIP(s);
d6a28714
JH
1725 };
1726 }
1727 else {
1728 do {
1729 if (regtry(prog, s))
1730 goto got_it;
1731 } while (s++ < strend);
1732 }
1733 }
1734
1735 /* Failure. */
1736 goto phooey;
1737
1738got_it:
1739 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1740
1741 if (PL_reg_eval_set) {
1742 /* Preserve the current value of $^R */
1743 if (oreplsv != GvSV(PL_replgv))
1744 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1745 restored, the value remains
1746 the same. */
1747 restore_pos(aTHXo_ 0);
1748 }
1749
1750 /* make sure $`, $&, $', and $digit will work later */
1751 if ( !(flags & REXEC_NOT_FIRST) ) {
1752 if (RX_MATCH_COPIED(prog)) {
1753 Safefree(prog->subbeg);
1754 RX_MATCH_COPIED_off(prog);
1755 }
1756 if (flags & REXEC_COPY_STR) {
1757 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1758
1759 s = savepvn(strbeg, i);
1760 prog->subbeg = s;
1761 prog->sublen = i;
1762 RX_MATCH_COPIED_on(prog);
1763 }
1764 else {
1765 prog->subbeg = strbeg;
1766 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1767 }
1768 }
9041c2e3 1769
d6a28714
JH
1770 return 1;
1771
1772phooey:
bf93d4cc
GS
1773 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1774 PL_colors[4],PL_colors[5]));
d6a28714
JH
1775 if (PL_reg_eval_set)
1776 restore_pos(aTHXo_ 0);
1777 return 0;
1778}
1779
1780/*
1781 - regtry - try match at specific point
1782 */
1783STATIC I32 /* 0 failure, 1 success */
1784S_regtry(pTHX_ regexp *prog, char *startpos)
1785{
d6a28714
JH
1786 register I32 i;
1787 register I32 *sp;
1788 register I32 *ep;
1789 CHECKPOINT lastcp;
1790
02db2b7b
IZ
1791#ifdef DEBUGGING
1792 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1793#endif
d6a28714
JH
1794 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1795 MAGIC *mg;
1796
1797 PL_reg_eval_set = RS_init;
1798 DEBUG_r(DEBUG_s(
b900a521
JH
1799 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1800 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1801 ));
e8347627 1802 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1803 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1804 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1805 SAVETMPS;
1806 /* Apparently this is not needed, judging by wantarray. */
e8347627 1807 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1808 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1809
1810 if (PL_reg_sv) {
1811 /* Make $_ available to executed code. */
1812 if (PL_reg_sv != DEFSV) {
1813 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1814 SAVESPTR(DEFSV);
1815 DEFSV = PL_reg_sv;
b8c5462f 1816 }
d6a28714 1817
9041c2e3 1818 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
d6a28714
JH
1819 && (mg = mg_find(PL_reg_sv, 'g')))) {
1820 /* prepare for quick setting of pos */
1821 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1822 mg = mg_find(PL_reg_sv, 'g');
1823 mg->mg_len = -1;
b8c5462f 1824 }
d6a28714
JH
1825 PL_reg_magic = mg;
1826 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1827 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1828 }
1829 if (!PL_reg_curpm)
0f79a09d 1830 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1831 PL_reg_curpm->op_pmregexp = prog;
1832 PL_reg_oldcurpm = PL_curpm;
1833 PL_curpm = PL_reg_curpm;
1834 if (RX_MATCH_COPIED(prog)) {
1835 /* Here is a serious problem: we cannot rewrite subbeg,
1836 since it may be needed if this match fails. Thus
1837 $` inside (?{}) could fail... */
1838 PL_reg_oldsaved = prog->subbeg;
1839 PL_reg_oldsavedlen = prog->sublen;
1840 RX_MATCH_COPIED_off(prog);
1841 }
1842 else
1843 PL_reg_oldsaved = Nullch;
1844 prog->subbeg = PL_bostr;
1845 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1846 }
1847 prog->startp[0] = startpos - PL_bostr;
1848 PL_reginput = startpos;
1849 PL_regstartp = prog->startp;
1850 PL_regendp = prog->endp;
1851 PL_reglastparen = &prog->lastparen;
1852 prog->lastparen = 0;
1853 PL_regsize = 0;
1854 DEBUG_r(PL_reg_starttry = startpos);
1855 if (PL_reg_start_tmpl <= prog->nparens) {
1856 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1857 if(PL_reg_start_tmp)
1858 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1859 else
1860 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1861 }
1862
1863 /* XXXX What this code is doing here?!!! There should be no need
1864 to do this again and again, PL_reglastparen should take care of
3dd2943c 1865 this! --ilya*/
dafc8851
JH
1866
1867 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1868 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1869 * PL_reglastparen), is not needed at all by the test suite
1870 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1871 * enough, for building DynaLoader, or otherwise this
1872 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1873 * will happen. Meanwhile, this code *is* needed for the
1874 * above-mentioned test suite tests to succeed. The common theme
1875 * on those tests seems to be returning null fields from matches.
1876 * --jhi */
dafc8851 1877#if 1
d6a28714
JH
1878 sp = prog->startp;
1879 ep = prog->endp;
1880 if (prog->nparens) {
09e8ae3b 1881 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1882 *++sp = -1;
1883 *++ep = -1;
1884 }
1885 }
dafc8851 1886#endif
02db2b7b 1887 REGCP_SET(lastcp);
d6a28714
JH
1888 if (regmatch(prog->program + 1)) {
1889 prog->endp[0] = PL_reginput - PL_bostr;
1890 return 1;
1891 }
02db2b7b 1892 REGCP_UNWIND(lastcp);
d6a28714
JH
1893 return 0;
1894}
1895
02db2b7b
IZ
1896#define RE_UNWIND_BRANCH 1
1897#define RE_UNWIND_BRANCHJ 2
1898
1899union re_unwind_t;
1900
1901typedef struct { /* XX: makes sense to enlarge it... */
1902 I32 type;
1903 I32 prev;
1904 CHECKPOINT lastcp;
1905} re_unwind_generic_t;
1906
1907typedef struct {
1908 I32 type;
1909 I32 prev;
1910 CHECKPOINT lastcp;
1911 I32 lastparen;
1912 regnode *next;
1913 char *locinput;
1914 I32 nextchr;
1915#ifdef DEBUGGING
1916 int regindent;
1917#endif
1918} re_unwind_branch_t;
1919
1920typedef union re_unwind_t {
1921 I32 type;
1922 re_unwind_generic_t generic;
1923 re_unwind_branch_t branch;
1924} re_unwind_t;
1925
d6a28714
JH
1926/*
1927 - regmatch - main matching routine
1928 *
1929 * Conceptually the strategy is simple: check to see whether the current
1930 * node matches, call self recursively to see whether the rest matches,
1931 * and then act accordingly. In practice we make some effort to avoid
1932 * recursion, in particular by going through "ordinary" nodes (that don't
1933 * need to know whether the rest of the match failed) by a loop instead of
1934 * by recursion.
1935 */
1936/* [lwall] I've hoisted the register declarations to the outer block in order to
1937 * maybe save a little bit of pushing and popping on the stack. It also takes
1938 * advantage of machines that use a register save mask on subroutine entry.
1939 */
1940STATIC I32 /* 0 failure, 1 success */
1941S_regmatch(pTHX_ regnode *prog)
1942{
d6a28714
JH
1943 register regnode *scan; /* Current node. */
1944 regnode *next; /* Next node. */
1945 regnode *inner; /* Next node in internal branch. */
1946 register I32 nextchr; /* renamed nextchr - nextchar colides with
1947 function of same name */
1948 register I32 n; /* no or next */
1949 register I32 ln; /* len or last */
1950 register char *s; /* operand or save */
1951 register char *locinput = PL_reginput;
1952 register I32 c1, c2, paren; /* case fold search, parenth */
1953 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1954 I32 unwind = 0;
1955 I32 firstcp = PL_savestack_ix;
ffc61ed2 1956 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1957
d6a28714
JH
1958#ifdef DEBUGGING
1959 PL_regindent++;
1960#endif
1961
1962 /* Note that nextchr is a byte even in UTF */
1963 nextchr = UCHARAT(locinput);
1964 scan = prog;
1965 while (scan != NULL) {
1966#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1967#if 1
d6a28714
JH
1968# define sayYES goto yes
1969# define sayNO goto no
7821416a
IZ
1970# define sayYES_FINAL goto yes_final
1971# define sayYES_LOUD goto yes_loud
1972# define sayNO_FINAL goto no_final
1973# define sayNO_SILENT goto do_no
d6a28714
JH
1974# define saySAME(x) if (x) goto yes; else goto no
1975# define REPORT_CODE_OFF 24
1976#else
1977# define sayYES return 1
1978# define sayNO return 0
7821416a
IZ
1979# define sayYES_FINAL return 1
1980# define sayYES_LOUD return 1
1981# define sayNO_FINAL return 0
1982# define sayNO_SILENT return 0
d6a28714
JH
1983# define saySAME(x) return x
1984#endif
1985 DEBUG_r( {
1986 SV *prop = sv_newmortal();
1987 int docolor = *PL_colors[0];
1988 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 1989 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
1990 /* The part of the string before starttry has one color
1991 (pref0_len chars), between starttry and current
1992 position another one (pref_len - pref0_len chars),
1993 after the current position the third one.
1994 We assume that pref0_len <= pref_len, otherwise we
1995 decrease pref0_len. */
9041c2e3 1996 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
1997 ? (5 + taill) - l : locinput - PL_bostr;
1998 int pref0_len;
d6a28714 1999
1aa99e6b
IH
2000 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2001 pref_len++;
2002 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2003 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2004 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2005 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2006 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2007 l--;
d6a28714
JH
2008 if (pref0_len < 0)
2009 pref0_len = 0;
2010 if (pref0_len > pref_len)
2011 pref0_len = pref_len;
2012 regprop(prop, scan);
9041c2e3 2013 PerlIO_printf(Perl_debug_log,
b900a521 2014 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2015 (IV)(locinput - PL_bostr),
2016 PL_colors[4], pref0_len,
d6a28714 2017 locinput - pref_len, PL_colors[5],
9041c2e3 2018 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2019 locinput - pref_len + pref0_len, PL_colors[3],
2020 (docolor ? "" : "> <"),
2021 PL_colors[0], l, locinput, PL_colors[1],
2022 15 - l - pref_len + 1,
2023 "",
b900a521 2024 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2025 SvPVX(prop));
2026 } );
2027
2028 next = scan + NEXT_OFF(scan);
2029 if (next == scan)
2030 next = NULL;
2031
2032 switch (OP(scan)) {
2033 case BOL:
12d33761
HS
2034 if (locinput == PL_bostr || (PL_multiline &&
2035 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
d6a28714
JH
2036 {
2037 /* regtill = regbol; */
b8c5462f
JH
2038 break;
2039 }
d6a28714
JH
2040 sayNO;
2041 case MBOL:
12d33761
HS
2042 if (locinput == PL_bostr ||
2043 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
d6a28714 2044 {
b8c5462f
JH
2045 break;
2046 }
d6a28714
JH
2047 sayNO;
2048 case SBOL:
c2a73568 2049 if (locinput == PL_bostr)
b8c5462f 2050 break;
d6a28714
JH
2051 sayNO;
2052 case GPOS:
2053 if (locinput == PL_reg_ganch)
2054 break;
2055 sayNO;
2056 case EOL:
2057 if (PL_multiline)
2058 goto meol;
2059 else
2060 goto seol;
2061 case MEOL:
2062 meol:
2063 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2064 sayNO;
b8c5462f 2065 break;
d6a28714
JH
2066 case SEOL:
2067 seol:
2068 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2069 sayNO;
d6a28714 2070 if (PL_regeol - locinput > 1)
b8c5462f 2071 sayNO;
b8c5462f 2072 break;
d6a28714
JH
2073 case EOS:
2074 if (PL_regeol != locinput)
b8c5462f 2075 sayNO;
d6a28714 2076 break;
ffc61ed2 2077 case SANY:
d6a28714 2078 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2079 sayNO;
b8c5462f 2080 nextchr = UCHARAT(++locinput);
a0d0e21e 2081 break;
ffc61ed2 2082 case REG_ANY:
1aa99e6b
IH
2083 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2084 sayNO;
2085 if (do_utf8) {
b8c5462f 2086 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2087 if (locinput > PL_regeol)
2088 sayNO;
a0ed51b3 2089 nextchr = UCHARAT(locinput);
a0ed51b3 2090 }
1aa99e6b
IH
2091 else
2092 nextchr = UCHARAT(++locinput);
a0ed51b3 2093 break;
d6a28714 2094 case EXACT:
cd439c50
IZ
2095 s = STRING(scan);
2096 ln = STR_LEN(scan);
1aa99e6b
IH
2097 if (do_utf8 != (UTF!=0)) {
2098 char *l = locinput;
2099 char *e = s + ln;
2100 STRLEN len;
2101 if (do_utf8)
2102 while (s < e) {
2103 if (l >= PL_regeol)
2104 sayNO;
9041c2e3 2105 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2106 sayNO;
2107 s++;
2108 l += len;
2109 }
2110 else
2111 while (s < e) {
2112 if (l >= PL_regeol)
2113 sayNO;
9041c2e3 2114 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2115 sayNO;
2116 s += len;
2117 l++;
2118 }
2119 locinput = l;
2120 nextchr = UCHARAT(locinput);
2121 break;
2122 }
d6a28714
JH
2123 /* Inline the first character, for speed. */
2124 if (UCHARAT(s) != nextchr)
2125 sayNO;
2126 if (PL_regeol - locinput < ln)
2127 sayNO;
2128 if (ln > 1 && memNE(s, locinput, ln))
2129 sayNO;
2130 locinput += ln;
2131 nextchr = UCHARAT(locinput);
2132 break;
2133 case EXACTFL:
b8c5462f
JH
2134 PL_reg_flags |= RF_tainted;
2135 /* FALL THROUGH */
d6a28714 2136 case EXACTF:
cd439c50
IZ
2137 s = STRING(scan);
2138 ln = STR_LEN(scan);
d6a28714 2139
1aa99e6b 2140 if (do_utf8) {
d6a28714 2141 char *l = locinput;
1aa99e6b
IH
2142 char *e;
2143 e = s + ln;
d6a28714
JH
2144 c1 = OP(scan) == EXACTF;
2145 while (s < e) {
1aa99e6b 2146 if (l >= PL_regeol) {
d6a28714
JH
2147 sayNO;
2148 }
2b9d42f0 2149 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2150 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2151 sayNO;
2152 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2153 l += UTF8SKIP(l);
b8c5462f 2154 }
d6a28714 2155 locinput = l;
a0ed51b3
LW
2156 nextchr = UCHARAT(locinput);
2157 break;
2158 }
d6a28714
JH
2159
2160 /* Inline the first character, for speed. */
2161 if (UCHARAT(s) != nextchr &&
2162 UCHARAT(s) != ((OP(scan) == EXACTF)
2163 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2164 sayNO;
d6a28714 2165 if (PL_regeol - locinput < ln)
b8c5462f 2166 sayNO;
d6a28714
JH
2167 if (ln > 1 && (OP(scan) == EXACTF
2168 ? ibcmp(s, locinput, ln)
2169 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2170 sayNO;
d6a28714
JH
2171 locinput += ln;
2172 nextchr = UCHARAT(locinput);
a0d0e21e 2173 break;
d6a28714 2174 case ANYOF:
ffc61ed2
JH
2175 if (do_utf8) {
2176 if (!reginclass(scan, (U8*)locinput, do_utf8))
2177 sayNO;
2178 if (locinput >= PL_regeol)
2179 sayNO;
2180 locinput += PL_utf8skip[nextchr];
b8c5462f 2181 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2182 }
2183 else {
2184 if (nextchr < 0)
2185 nextchr = UCHARAT(locinput);
2186 if (!reginclass(scan, (U8*)locinput, do_utf8))
2187 sayNO;
2188 if (!nextchr && locinput >= PL_regeol)
2189 sayNO;
2190 nextchr = UCHARAT(++locinput);
2191 }
b8c5462f 2192 break;
d6a28714 2193 case ALNUML:
b8c5462f
JH
2194 PL_reg_flags |= RF_tainted;
2195 /* FALL THROUGH */
d6a28714 2196 case ALNUM:
b8c5462f 2197 if (!nextchr)
4633a7c4 2198 sayNO;
ffc61ed2
JH
2199 if (do_utf8) {
2200 if (!(OP(scan) == ALNUM
3568d838 2201 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714 2202 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2203 {
2204 sayNO;
a0ed51b3 2205 }
b8c5462f 2206 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2207 nextchr = UCHARAT(locinput);
2208 break;
2209 }
ffc61ed2 2210 if (!(OP(scan) == ALNUM
d6a28714 2211 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2212 sayNO;
b8c5462f 2213 nextchr = UCHARAT(++locinput);
a0d0e21e 2214 break;
d6a28714 2215 case NALNUML:
b8c5462f
JH
2216 PL_reg_flags |= RF_tainted;
2217 /* FALL THROUGH */
d6a28714
JH
2218 case NALNUM:
2219 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2220 sayNO;
ffc61ed2 2221 if (do_utf8) {
8269fa76 2222 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2223 if (OP(scan) == NALNUM
3568d838 2224 ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
d6a28714
JH
2225 : isALNUM_LC_utf8((U8*)locinput))
2226 {
b8c5462f 2227 sayNO;
d6a28714 2228 }
b8c5462f
JH
2229 locinput += PL_utf8skip[nextchr];
2230 nextchr = UCHARAT(locinput);
2231 break;
2232 }
ffc61ed2 2233 if (OP(scan) == NALNUM
d6a28714 2234 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2235 sayNO;
76e3520e 2236 nextchr = UCHARAT(++locinput);
a0d0e21e 2237 break;
d6a28714
JH
2238 case BOUNDL:
2239 case NBOUNDL:
3280af22 2240 PL_reg_flags |= RF_tainted;
bbce6d69 2241 /* FALL THROUGH */
d6a28714
JH
2242 case BOUND:
2243 case NBOUND:
2244 /* was last char in word? */
ffc61ed2 2245 if (do_utf8) {
12d33761
HS
2246 if (locinput == PL_bostr)
2247 ln = '\n';
ffc61ed2
JH
2248 else {
2249 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2250
2b9d42f0 2251 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2252 }
2253 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2254 ln = isALNUM_uni(ln);
8269fa76 2255 LOAD_UTF8_CHARCLASS(alnum,"a");
3568d838 2256 n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
ffc61ed2
JH
2257 }
2258 else {
9041c2e3 2259 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2260 n = isALNUM_LC_utf8((U8*)locinput);
2261 }
a0ed51b3 2262 }
d6a28714 2263 else {
12d33761
HS
2264 ln = (locinput != PL_bostr) ?
2265 UCHARAT(locinput - 1) : '\n';
ffc61ed2
JH
2266 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2267 ln = isALNUM(ln);
2268 n = isALNUM(nextchr);
2269 }
2270 else {
2271 ln = isALNUM_LC(ln);
2272 n = isALNUM_LC(nextchr);
2273 }
d6a28714 2274 }
ffc61ed2
JH
2275 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2276 OP(scan) == BOUNDL))
2277 sayNO;
a0ed51b3 2278 break;
d6a28714 2279 case SPACEL:
3280af22 2280 PL_reg_flags |= RF_tainted;
bbce6d69 2281 /* FALL THROUGH */
d6a28714 2282 case SPACE:
9442cb0e 2283 if (!nextchr)
4633a7c4 2284 sayNO;
1aa99e6b 2285 if (do_utf8) {
fd400ab9 2286 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2287 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2288 if (!(OP(scan) == SPACE
3568d838 2289 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
ffc61ed2
JH
2290 : isSPACE_LC_utf8((U8*)locinput)))
2291 {
2292 sayNO;
2293 }
2294 locinput += PL_utf8skip[nextchr];
2295 nextchr = UCHARAT(locinput);
2296 break;
d6a28714 2297 }
ffc61ed2
JH
2298 if (!(OP(scan) == SPACE
2299 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2300 sayNO;
2301 nextchr = UCHARAT(++locinput);
2302 }
2303 else {
2304 if (!(OP(scan) == SPACE
2305 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2306 sayNO;
2307 nextchr = UCHARAT(++locinput);
a0ed51b3 2308 }
a0ed51b3 2309 break;
d6a28714 2310 case NSPACEL:
3280af22 2311 PL_reg_flags |= RF_tainted;
bbce6d69 2312 /* FALL THROUGH */
d6a28714 2313 case NSPACE:
9442cb0e 2314 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2315 sayNO;
1aa99e6b 2316 if (do_utf8) {
8269fa76 2317 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2318 if (OP(scan) == NSPACE
3568d838 2319 ? swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
d6a28714 2320 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2321 {
2322 sayNO;
2323 }
2324 locinput += PL_utf8skip[nextchr];
2325 nextchr = UCHARAT(locinput);
2326 break;
a0ed51b3 2327 }
ffc61ed2 2328 if (OP(scan) == NSPACE
d6a28714 2329 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2330 sayNO;
76e3520e 2331 nextchr = UCHARAT(++locinput);
a0d0e21e 2332 break;
d6a28714 2333 case DIGITL:
a0ed51b3
LW
2334 PL_reg_flags |= RF_tainted;
2335 /* FALL THROUGH */
d6a28714 2336 case DIGIT:
9442cb0e 2337 if (!nextchr)
a0ed51b3 2338 sayNO;
1aa99e6b 2339 if (do_utf8) {
8269fa76 2340 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2341 if (!(OP(scan) == DIGIT
3568d838 2342 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e 2343 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2344 {
a0ed51b3 2345 sayNO;
dfe13c55 2346 }
6f06b55f 2347 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2348 nextchr = UCHARAT(locinput);
2349 break;
2350 }
ffc61ed2 2351 if (!(OP(scan) == DIGIT
9442cb0e 2352 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2353 sayNO;
2354 nextchr = UCHARAT(++locinput);
2355 break;
d6a28714 2356 case NDIGITL:
b8c5462f
JH
2357 PL_reg_flags |= RF_tainted;
2358 /* FALL THROUGH */
d6a28714 2359 case NDIGIT:
9442cb0e 2360 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2361 sayNO;
1aa99e6b 2362 if (do_utf8) {
8269fa76 2363 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2364 if (OP(scan) == NDIGIT
3568d838 2365 ? swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
9442cb0e
GS
2366 : isDIGIT_LC_utf8((U8*)locinput))
2367 {
a0ed51b3 2368 sayNO;
9442cb0e 2369 }
6f06b55f 2370 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2371 nextchr = UCHARAT(locinput);
2372 break;
2373 }
ffc61ed2 2374 if (OP(scan) == NDIGIT
9442cb0e 2375 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2376 sayNO;
2377 nextchr = UCHARAT(++locinput);
2378 break;
2379 case CLUMP:
8269fa76 2380 LOAD_UTF8_CHARCLASS(mark,"~");
3568d838
JH
2381 if (locinput >= PL_regeol ||
2382 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3 2383 sayNO;
6f06b55f 2384 locinput += PL_utf8skip[nextchr];
3568d838
JH
2385 while (locinput < PL_regeol &&
2386 swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
a0ed51b3
LW
2387 locinput += UTF8SKIP(locinput);
2388 if (locinput > PL_regeol)
2389 sayNO;
2390 nextchr = UCHARAT(locinput);
2391 break;
c8756f30 2392 case REFFL:
3280af22 2393 PL_reg_flags |= RF_tainted;
c8756f30 2394 /* FALL THROUGH */
c277df42 2395 case REF:
c8756f30 2396 case REFF:
c277df42 2397 n = ARG(scan); /* which paren pair */
cf93c79d 2398 ln = PL_regstartp[n];
2c2d71f5 2399 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2400 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2401 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2402 if (ln == PL_regendp[n])
a0d0e21e 2403 break;
a0ed51b3 2404
cf93c79d 2405 s = PL_bostr + ln;
1aa99e6b 2406 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2407 char *l = locinput;
cf93c79d 2408 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2409 /*
2410 * Note that we can't do the "other character" lookup trick as
2411 * in the 8-bit case (no pun intended) because in Unicode we
2412 * have to map both upper and title case to lower case.
2413 */
2414 if (OP(scan) == REFF) {
2415 while (s < e) {
2416 if (l >= PL_regeol)
2417 sayNO;
dfe13c55 2418 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2419 sayNO;
2420 s += UTF8SKIP(s);
2421 l += UTF8SKIP(l);
2422 }
2423 }
2424 else {
2425 while (s < e) {
2426 if (l >= PL_regeol)
2427 sayNO;
dfe13c55 2428 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2429 sayNO;
2430 s += UTF8SKIP(s);
2431 l += UTF8SKIP(l);
2432 }
2433 }
2434 locinput = l;
2435 nextchr = UCHARAT(locinput);
2436 break;
2437 }
2438
a0d0e21e 2439 /* Inline the first character, for speed. */
76e3520e 2440 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2441 (OP(scan) == REF ||
2442 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2443 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2444 sayNO;
cf93c79d 2445 ln = PL_regendp[n] - ln;
3280af22 2446 if (locinput + ln > PL_regeol)
4633a7c4 2447 sayNO;
c8756f30
AK
2448 if (ln > 1 && (OP(scan) == REF
2449 ? memNE(s, locinput, ln)
2450 : (OP(scan) == REFF
2451 ? ibcmp(s, locinput, ln)
2452 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2453 sayNO;
a0d0e21e 2454 locinput += ln;
76e3520e 2455 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2456 break;
2457
2458 case NOTHING:
c277df42 2459 case TAIL:
a0d0e21e
LW
2460 break;
2461 case BACK:
2462 break;
c277df42
IZ
2463 case EVAL:
2464 {
2465 dSP;
533c011a 2466 OP_4tree *oop = PL_op;
3280af22
NIS
2467 COP *ocurcop = PL_curcop;
2468 SV **ocurpad = PL_curpad;
c277df42 2469 SV *ret;
9041c2e3 2470
c277df42 2471 n = ARG(scan);
533c011a 2472 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2473 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2474 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2475 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2476
cea2e8a9 2477 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2478 SPAGAIN;
2479 ret = POPs;
2480 PUTBACK;
9041c2e3 2481
0f5d15d6
IZ
2482 PL_op = oop;
2483 PL_curpad = ocurpad;
2484 PL_curcop = ocurcop;
c277df42 2485 if (logical) {
0f5d15d6
IZ
2486 if (logical == 2) { /* Postponed subexpression. */
2487 regexp *re;
22c35a8c 2488 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2489 re_cc_state state;
0f5d15d6
IZ
2490 CHECKPOINT cp, lastcp;
2491
2492 if(SvROK(ret) || SvRMAGICAL(ret)) {
2493 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2494
2495 if(SvMAGICAL(sv))
2496 mg = mg_find(sv, 'r');
2497 }
2498 if (mg) {
2499 re = (regexp *)mg->mg_obj;
df0003d4 2500 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2501 }
2502 else {
2503 STRLEN len;
2504 char *t = SvPV(ret, len);
2505 PMOP pm;
2506 char *oprecomp = PL_regprecomp;
2507 I32 osize = PL_regsize;
2508 I32 onpar = PL_regnpar;
2509
5fcd1c1b 2510 Zero(&pm, 1, PMOP);
cea2e8a9 2511 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2512 if (!(SvFLAGS(ret)
0f5d15d6
IZ
2513 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2514 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2515 PL_regprecomp = oprecomp;
2516 PL_regsize = osize;
2517 PL_regnpar = onpar;
2518 }
2519 DEBUG_r(
9041c2e3 2520 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2521 "Entering embedded `%s%.60s%s%s'\n",
2522 PL_colors[0],
2523 re->precomp,
2524 PL_colors[1],
2525 (strlen(re->precomp) > 60 ? "..." : ""))
2526 );
2527 state.node = next;
2528 state.prev = PL_reg_call_cc;
2529 state.cc = PL_regcc;
2530 state.re = PL_reg_re;
2531
2ab05381 2532 PL_regcc = 0;
9041c2e3 2533
0f5d15d6 2534 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2535 REGCP_SET(lastcp);
0f5d15d6
IZ
2536 cache_re(re);
2537 state.ss = PL_savestack_ix;
2538 *PL_reglastparen = 0;
2539 PL_reg_call_cc = &state;
2540 PL_reginput = locinput;
2c2d71f5
JH
2541
2542 /* XXXX This is too dramatic a measure... */
2543 PL_reg_maxiter = 0;
2544
0f5d15d6 2545 if (regmatch(re->program + 1)) {
2c914db6
IZ
2546 /* Even though we succeeded, we need to restore
2547 global variables, since we may be wrapped inside
2548 SUSPEND, thus the match may be not finished yet. */
2549
2550 /* XXXX Do this only if SUSPENDed? */
2551 PL_reg_call_cc = state.prev;
2552 PL_regcc = state.cc;
2553 PL_reg_re = state.re;
2554 cache_re(PL_reg_re);
2555
2556 /* XXXX This is too dramatic a measure... */
2557 PL_reg_maxiter = 0;
2558
2559 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2560 ReREFCNT_dec(re);
2561 regcpblow(cp);
2562 sayYES;
2563 }
0f5d15d6 2564 ReREFCNT_dec(re);
02db2b7b 2565 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2566 regcppop();
2567 PL_reg_call_cc = state.prev;
2568 PL_regcc = state.cc;
2569 PL_reg_re = state.re;
d3790889 2570 cache_re(PL_reg_re);
2c2d71f5
JH
2571
2572 /* XXXX This is too dramatic a measure... */
2573 PL_reg_maxiter = 0;
2574
0f5d15d6
IZ
2575 sayNO;
2576 }
c277df42 2577 sw = SvTRUE(ret);
0f5d15d6 2578 logical = 0;
a0ed51b3
LW
2579 }
2580 else
3280af22 2581 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2582 break;
2583 }
a0d0e21e 2584 case OPEN:
c277df42 2585 n = ARG(scan); /* which paren pair */
3280af22
NIS
2586 PL_reg_start_tmp[n] = locinput;
2587 if (n > PL_regsize)
2588 PL_regsize = n;
a0d0e21e
LW
2589 break;
2590 case CLOSE:
c277df42 2591 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2592 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2593 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2594 if (n > *PL_reglastparen)
2595 *PL_reglastparen = n;
a0d0e21e 2596 break;
c277df42
IZ
2597 case GROUPP:
2598 n = ARG(scan); /* which paren pair */
cf93c79d 2599 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2600 break;
2601 case IFTHEN:
2c2d71f5 2602 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2603 if (sw)
2604 next = NEXTOPER(NEXTOPER(scan));
2605 else {
2606 next = scan + ARG(scan);
2607 if (OP(next) == IFTHEN) /* Fake one. */
2608 next = NEXTOPER(NEXTOPER(next));
2609 }
2610 break;
2611 case LOGICAL:
0f5d15d6 2612 logical = scan->flags;
c277df42 2613 break;
2ab05381
IZ
2614/*******************************************************************
2615 PL_regcc contains infoblock about the innermost (...)* loop, and
2616 a pointer to the next outer infoblock.
2617
2618 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2619
2620 1) After matching X, regnode for CURLYX is processed;
2621
9041c2e3 2622 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2623 regmatch() recursively with the starting point at WHILEM node;
2624
2625 3) Each hit of WHILEM node tries to match A and Z (in the order
2626 depending on the current iteration, min/max of {min,max} and
2627 greediness). The information about where are nodes for "A"
2628 and "Z" is read from the infoblock, as is info on how many times "A"
2629 was already matched, and greediness.
2630
2631 4) After A matches, the same WHILEM node is hit again.
2632
2633 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2634 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2635 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2636 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2637 of the external loop.
2638
2639 Currently present infoblocks form a tree with a stem formed by PL_curcc
2640 and whatever it mentions via ->next, and additional attached trees
2641 corresponding to temporarily unset infoblocks as in "5" above.
2642
9041c2e3 2643 In the following picture infoblocks for outer loop of
2ab05381
IZ
2644 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2645 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2646 infoblocks are drawn below the "reset" infoblock.
2647
2648 In fact in the picture below we do not show failed matches for Z and T
2649 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2650 more obvious *why* one needs to *temporary* unset infoblocks.]
2651
2652 Matched REx position InfoBlocks Comment
2653 (Y(A)*?Z)*?T x
2654 Y(A)*?Z)*?T x <- O
2655 Y (A)*?Z)*?T x <- O
2656 Y A)*?Z)*?T x <- O <- I
2657 YA )*?Z)*?T x <- O <- I
2658 YA A)*?Z)*?T x <- O <- I
2659 YAA )*?Z)*?T x <- O <- I
2660 YAA Z)*?T x <- O # Temporary unset I
2661 I
2662
2663 YAAZ Y(A)*?Z)*?T x <- O
2664 I
2665
2666 YAAZY (A)*?Z)*?T x <- O
2667 I
2668
2669 YAAZY A)*?Z)*?T x <- O <- I
2670 I
2671
2672 YAAZYA )*?Z)*?T x <- O <- I
2673 I
2674
2675 YAAZYA Z)*?T x <- O # Temporary unset I
2676 I,I
2677
2678 YAAZYAZ )*?T x <- O
2679 I,I
2680
2681 YAAZYAZ T x # Temporary unset O
2682 O
2683 I,I
2684
2685 YAAZYAZT x
2686 O
2687 I,I
2688 *******************************************************************/
a0d0e21e
LW
2689 case CURLYX: {
2690 CURCUR cc;
3280af22 2691 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2692 /* No need to save/restore up to this paren */
2693 I32 parenfloor = scan->flags;
c277df42
IZ
2694
2695 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2696 next += ARG(next);
3280af22
NIS
2697 cc.oldcc = PL_regcc;
2698 PL_regcc = &cc;
cb434fcc
IZ
2699 /* XXXX Probably it is better to teach regpush to support
2700 parenfloor > PL_regsize... */
2701 if (parenfloor > *PL_reglastparen)
2702 parenfloor = *PL_reglastparen; /* Pessimization... */
2703 cc.parenfloor = parenfloor;
a0d0e21e
LW
2704 cc.cur = -1;
2705 cc.min = ARG1(scan);
2706 cc.max = ARG2(scan);
c277df42 2707 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2708 cc.next = next;
2709 cc.minmod = minmod;
2710 cc.lastloc = 0;
3280af22 2711 PL_reginput = locinput;
a0d0e21e
LW
2712 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2713 regcpblow(cp);
3280af22 2714 PL_regcc = cc.oldcc;
4633a7c4 2715 saySAME(n);
a0d0e21e
LW
2716 }
2717 /* NOT REACHED */
2718 case WHILEM: {
2719 /*
2720 * This is really hard to understand, because after we match
2721 * what we're trying to match, we must make sure the rest of
2c2d71f5 2722 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2723 * to go back UP the parse tree by recursing ever deeper. And
2724 * if it fails, we have to reset our parent's current state
2725 * that we can try again after backing off.
2726 */
2727
c277df42 2728 CHECKPOINT cp, lastcp;
3280af22 2729 CURCUR* cc = PL_regcc;
c277df42
IZ
2730 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2731
4633a7c4 2732 n = cc->cur + 1; /* how many we know we matched */
3280af22 2733 PL_reginput = locinput;
a0d0e21e 2734
c277df42 2735 DEBUG_r(
9041c2e3
NIS
2736 PerlIO_printf(Perl_debug_log,
2737 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2738 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2739 (long)n, (long)cc->min,
c277df42
IZ
2740 (long)cc->max, (long)cc)
2741 );
4633a7c4 2742
a0d0e21e
LW
2743 /* If degenerate scan matches "", assume scan done. */
2744
579cf2c3 2745 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2746 PL_regcc = cc->oldcc;
2ab05381
IZ
2747 if (PL_regcc)
2748 ln = PL_regcc->cur;
c277df42 2749 DEBUG_r(
c3464db5
DD
2750 PerlIO_printf(Perl_debug_log,
2751 "%*s empty match detected, try continuation...\n",
3280af22 2752 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2753 );
a0d0e21e 2754 if (regmatch(cc->next))
4633a7c4 2755 sayYES;
2ab05381
IZ
2756 if (PL_regcc)
2757 PL_regcc->cur = ln;
3280af22 2758 PL_regcc = cc;
4633a7c4 2759 sayNO;
a0d0e21e
LW
2760 }
2761
2762 /* First just match a string of min scans. */
2763
2764 if (n < cc->min) {
2765 cc->cur = n;
2766 cc->lastloc = locinput;
4633a7c4
LW
2767 if (regmatch(cc->scan))
2768 sayYES;
2769 cc->cur = n - 1;
c277df42 2770 cc->lastloc = lastloc;
4633a7c4 2771 sayNO;
a0d0e21e
LW
2772 }
2773
2c2d71f5
JH
2774 if (scan->flags) {
2775 /* Check whether we already were at this position.
2776 Postpone detection until we know the match is not
2777 *that* much linear. */
2778 if (!PL_reg_maxiter) {
2779 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2780 PL_reg_leftiter = PL_reg_maxiter;
2781 }
2782 if (PL_reg_leftiter-- == 0) {
2783 I32 size = (PL_reg_maxiter + 7)/8;
2784 if (PL_reg_poscache) {
2785 if (PL_reg_poscache_size < size) {
2786 Renew(PL_reg_poscache, size, char);
2787 PL_reg_poscache_size = size;
2788 }
2789 Zero(PL_reg_poscache, size, char);
2790 }
2791 else {
2792 PL_reg_poscache_size = size;
2793 Newz(29, PL_reg_poscache, size, char);
2794 }
2795 DEBUG_r(
2796 PerlIO_printf(Perl_debug_log,
2797 "%sDetected a super-linear match, switching on caching%s...\n",
2798 PL_colors[4], PL_colors[5])
2799 );
2800 }
2801 if (PL_reg_leftiter < 0) {
2802 I32 o = locinput - PL_bostr, b;
2803
2804 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2805 b = o % 8;
2806 o /= 8;
2807 if (PL_reg_poscache[o] & (1<<b)) {
2808 DEBUG_r(
2809 PerlIO_printf(Perl_debug_log,
2810 "%*s already tried at this position...\n",
2811 REPORT_CODE_OFF+PL_regindent*2, "")
2812 );
7821416a 2813 sayNO_SILENT;
2c2d71f5
JH
2814 }
2815 PL_reg_poscache[o] |= (1<<b);
2816 }
2817 }
2818
a0d0e21e
LW
2819 /* Prefer next over scan for minimal matching. */
2820
2821 if (cc->minmod) {
3280af22 2822 PL_regcc = cc->oldcc;
2ab05381
IZ
2823 if (PL_regcc)
2824 ln = PL_regcc->cur;
5f05dabc 2825 cp = regcppush(cc->parenfloor);
02db2b7b 2826 REGCP_SET(lastcp);
5f05dabc 2827 if (regmatch(cc->next)) {
c277df42 2828 regcpblow(cp);
4633a7c4 2829 sayYES; /* All done. */
5f05dabc 2830 }
02db2b7b 2831 REGCP_UNWIND(lastcp);
5f05dabc 2832 regcppop();
2ab05381
IZ
2833 if (PL_regcc)
2834 PL_regcc->cur = ln;
3280af22 2835 PL_regcc = cc;
a0d0e21e 2836
c277df42 2837 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2838 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2839 && !(PL_reg_flags & RF_warned)) {
2840 PL_reg_flags |= RF_warned;
e476b1b5 2841 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2842 "Complex regular subexpression recursion",
2843 REG_INFTY - 1);
c277df42 2844 }
4633a7c4 2845 sayNO;
c277df42 2846 }
a687059c 2847
c277df42 2848 DEBUG_r(
c3464db5
DD
2849 PerlIO_printf(Perl_debug_log,
2850 "%*s trying longer...\n",
3280af22 2851 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2852 );
a0d0e21e 2853 /* Try scanning more and see if it helps. */
3280af22 2854 PL_reginput = locinput;
a0d0e21e
LW
2855 cc->cur = n;
2856 cc->lastloc = locinput;
5f05dabc 2857 cp = regcppush(cc->parenfloor);
02db2b7b 2858 REGCP_SET(lastcp);
5f05dabc 2859 if (regmatch(cc->scan)) {
c277df42 2860 regcpblow(cp);
4633a7c4 2861 sayYES;
5f05dabc 2862 }
02db2b7b 2863 REGCP_UNWIND(lastcp);
5f05dabc 2864 regcppop();
4633a7c4 2865 cc->cur = n - 1;
c277df42 2866 cc->lastloc = lastloc;
4633a7c4 2867 sayNO;
a0d0e21e
LW
2868 }
2869
2870 /* Prefer scan over next for maximal matching. */
2871
2872 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2873 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2874 cc->cur = n;
2875 cc->lastloc = locinput;
02db2b7b 2876 REGCP_SET(lastcp);
5f05dabc 2877 if (regmatch(cc->scan)) {
c277df42 2878 regcpblow(cp);
4633a7c4 2879 sayYES;
5f05dabc 2880 }
02db2b7b 2881 REGCP_UNWIND(lastcp);
a0d0e21e 2882 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2883 PL_reginput = locinput;
c277df42 2884 DEBUG_r(
c3464db5
DD
2885 PerlIO_printf(Perl_debug_log,
2886 "%*s failed, try continuation...\n",
3280af22 2887 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2888 );
2889 }
9041c2e3 2890 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2891 && !(PL_reg_flags & RF_warned)) {
3280af22 2892 PL_reg_flags |= RF_warned;
e476b1b5 2893 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2894 "Complex regular subexpression recursion",
2895 REG_INFTY - 1);
a0d0e21e
LW
2896 }
2897
2898 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2899 PL_regcc = cc->oldcc;
2ab05381
IZ
2900 if (PL_regcc)
2901 ln = PL_regcc->cur;
a0d0e21e 2902 if (regmatch(cc->next))
4633a7c4 2903 sayYES;
2ab05381
IZ
2904 if (PL_regcc)
2905 PL_regcc->cur = ln;
3280af22 2906 PL_regcc = cc;
4633a7c4 2907 cc->cur = n - 1;
c277df42 2908 cc->lastloc = lastloc;
4633a7c4 2909 sayNO;
a0d0e21e
LW
2910 }
2911 /* NOT REACHED */
9041c2e3 2912 case BRANCHJ:
c277df42
IZ
2913 next = scan + ARG(scan);
2914 if (next == scan)
2915 next = NULL;
2916 inner = NEXTOPER(NEXTOPER(scan));
2917 goto do_branch;
9041c2e3 2918 case BRANCH:
c277df42
IZ
2919 inner = NEXTOPER(scan);
2920 do_branch:
2921 {
2922 CHECKPOINT lastcp;
2923 c1 = OP(scan);
2924 if (OP(next) != c1) /* No choice. */
2925 next = inner; /* Avoid recursion. */
a0d0e21e 2926 else {
02db2b7b
IZ
2927 I32 lastparen = *PL_reglastparen;
2928 I32 unwind1;
2929 re_unwind_branch_t *uw;
2930
2931 /* Put unwinding data on stack */
2932 unwind1 = SSNEWt(1,re_unwind_branch_t);
2933 uw = SSPTRt(unwind1,re_unwind_branch_t);
2934 uw->prev = unwind;
2935 unwind = unwind1;
2936 uw->type = ((c1 == BRANCH)
2937 ? RE_UNWIND_BRANCH
2938 : RE_UNWIND_BRANCHJ);
2939 uw->lastparen = lastparen;
2940 uw->next = next;
2941 uw->locinput = locinput;
2942 uw->nextchr = nextchr;
2943#ifdef DEBUGGING
2944 uw->regindent = ++PL_regindent;
2945#endif
c277df42 2946
02db2b7b
IZ
2947 REGCP_SET(uw->lastcp);
2948
2949 /* Now go into the first branch */
2950 next = inner;
a687059c 2951 }
a0d0e21e
LW
2952 }
2953 break;
2954 case MINMOD:
2955 minmod = 1;
2956 break;
c277df42
IZ
2957 case CURLYM:
2958 {
00db4c45 2959 I32 l = 0;
c277df42 2960 CHECKPOINT lastcp;
9041c2e3 2961
c277df42
IZ
2962 /* We suppose that the next guy does not need
2963 backtracking: in particular, it is of constant length,
2964 and has no parenths to influence future backrefs. */
2965 ln = ARG1(scan); /* min to match */
2966 n = ARG2(scan); /* max to match */
c277df42
IZ
2967 paren = scan->flags;
2968 if (paren) {
3280af22
NIS
2969 if (paren > PL_regsize)
2970 PL_regsize = paren;
2971 if (paren > *PL_reglastparen)
2972 *PL_reglastparen = paren;
c277df42 2973 }
dc45a647 2974 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2975 if (paren)
2976 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2977 PL_reginput = locinput;
c277df42
IZ
2978 if (minmod) {
2979 minmod = 0;
2980 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2981 sayNO;
5f4b28b2 2982 if (ln && l == 0 && n >= ln
c277df42
IZ
2983 /* In fact, this is tricky. If paren, then the
2984 fact that we did/didnot match may influence
2985 future execution. */
2986 && !(paren && ln == 0))
2987 ln = n;
3280af22 2988 locinput = PL_reginput;
22c35a8c 2989 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 2990 c1 = (U8)*STRING(next);
c277df42 2991 if (OP(next) == EXACTF)
22c35a8c 2992 c2 = PL_fold[c1];
c277df42 2993 else if (OP(next) == EXACTFL)
22c35a8c 2994 c2 = PL_fold_locale[c1];
c277df42
IZ
2995 else
2996 c2 = c1;
a0ed51b3
LW
2997 }
2998 else
c277df42 2999 c1 = c2 = -1000;
02db2b7b 3000 REGCP_SET(lastcp);
5f4b28b2 3001 /* This may be improved if l == 0. */
c277df42
IZ
3002 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3003 /* If it could work, try it. */
3004 if (c1 == -1000 ||
3280af22
NIS
3005 UCHARAT(PL_reginput) == c1 ||
3006 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3007 {
3008 if (paren) {
3009 if (n) {
cf93c79d
IZ
3010 PL_regstartp[paren] =
3011 HOPc(PL_reginput, -l) - PL_bostr;
3012 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3013 }
3014 else
cf93c79d 3015 PL_regendp[paren] = -1;
c277df42
IZ
3016 }
3017 if (regmatch(next))
3018 sayYES;
02db2b7b 3019 REGCP_UNWIND(lastcp);
c277df42
IZ
3020 }
3021 /* Couldn't or didn't -- move forward. */
3280af22 3022 PL_reginput = locinput;
c277df42
IZ
3023 if (regrepeat_hard(scan, 1, &l)) {
3024 ln++;
3280af22 3025 locinput = PL_reginput;
c277df42
IZ
3026 }
3027 else
3028 sayNO;
3029 }
a0ed51b3
LW
3030 }
3031 else {
c277df42
IZ
3032 n = regrepeat_hard(scan, n, &l);
3033 if (n != 0 && l == 0
3034 /* In fact, this is tricky. If paren, then the
3035 fact that we did/didnot match may influence
3036 future execution. */
3037 && !(paren && ln == 0))
3038 ln = n;
3280af22 3039 locinput = PL_reginput;
c277df42 3040 DEBUG_r(
5c0ca799 3041 PerlIO_printf(Perl_debug_log,
faccc32b 3042 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3043 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3044 (IV) n, (IV)l)
c277df42
IZ
3045 );
3046 if (n >= ln) {
22c35a8c 3047 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3048 c1 = (U8)*STRING(next);
c277df42 3049 if (OP(next) == EXACTF)
22c35a8c 3050 c2 = PL_fold[c1];
c277df42 3051 else if (OP(next) == EXACTFL)
22c35a8c 3052 c2 = PL_fold_locale[c1];
c277df42
IZ
3053 else
3054 c2 = c1;
a0ed51b3
LW
3055 }
3056 else
c277df42
IZ
3057 c1 = c2 = -1000;
3058 }
02db2b7b 3059 REGCP_SET(lastcp);
c277df42
IZ
3060 while (n >= ln) {
3061 /* If it could work, try it. */
3062 if (c1 == -1000 ||
3280af22
NIS
3063 UCHARAT(PL_reginput) == c1 ||
3064 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3065 {
3066 DEBUG_r(
c3464db5 3067 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3068 "%*s trying tail with n=%"IVdf"...\n",
3069 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3070 );
3071 if (paren) {
3072 if (n) {
cf93c79d
IZ
3073 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3074 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3075 }
a0ed51b3 3076 else
cf93c79d 3077 PL_regendp[paren] = -1;
c277df42 3078 }
a0ed51b3
LW
3079 if (regmatch(next))
3080 sayYES;
02db2b7b 3081 REGCP_UNWIND(lastcp);
a0ed51b3 3082 }
c277df42
IZ
3083 /* Couldn't or didn't -- back up. */
3084 n--;
dfe13c55 3085 locinput = HOPc(locinput, -l);
3280af22 3086 PL_reginput = locinput;
c277df42
IZ
3087 }
3088 }
3089 sayNO;
3090 break;
3091 }
3092 case CURLYN:
3093 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3094 if (paren > PL_regsize)
3095 PL_regsize = paren;
3096 if (paren > *PL_reglastparen)
3097 *PL_reglastparen = paren;
c277df42
IZ
3098 ln = ARG1(scan); /* min to match */
3099 n = ARG2(scan); /* max to match */
dc45a647 3100 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3101 goto repeat;
a0d0e21e 3102 case CURLY:
c277df42 3103 paren = 0;
a0d0e21e
LW
3104 ln = ARG1(scan); /* min to match */
3105 n = ARG2(scan); /* max to match */
dc45a647 3106 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3107 goto repeat;
3108 case STAR:
3109 ln = 0;
c277df42 3110 n = REG_INFTY;
a0d0e21e 3111 scan = NEXTOPER(scan);
c277df42 3112 paren = 0;
a0d0e21e
LW
3113 goto repeat;
3114 case PLUS:
c277df42
IZ
3115 ln = 1;
3116 n = REG_INFTY;
3117 scan = NEXTOPER(scan);
3118 paren = 0;
3119 repeat:
a0d0e21e
LW
3120 /*
3121 * Lookahead to avoid useless match attempts
3122 * when we know what character comes next.
3123 */
22c35a8c 3124 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3125 U8 *s = (U8*)STRING(next);
3126 if (!UTF) {
3127 c2 = c1 = *s;
3128 if (OP(next) == EXACTF)
3129 c2 = PL_fold[c1];
3130 else if (OP(next) == EXACTFL)
3131 c2 = PL_fold_locale[c1];
3132 }
3133 else { /* UTF */
3134 if (OP(next) == EXACTF) {
3135 c1 = to_utf8_lower(s);
3136 c2 = to_utf8_upper(s);
3137 }
3138 else {
9041c2e3 3139 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3140 }
3141 }
bbce6d69 3142 }
a0d0e21e 3143 else
bbce6d69 3144 c1 = c2 = -1000;
3280af22 3145 PL_reginput = locinput;
a0d0e21e 3146 if (minmod) {
c277df42 3147 CHECKPOINT lastcp;
a0d0e21e
LW
3148 minmod = 0;
3149 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3150 sayNO;
a0ed51b3 3151 locinput = PL_reginput;
02db2b7b 3152 REGCP_SET(lastcp);
0fe9bf95 3153 if (c1 != -1000) {
1aa99e6b 3154 char *e; /* Should not check after this */
0fe9bf95
IZ
3155 char *old = locinput;
3156
1aa99e6b 3157 if (n == REG_INFTY) {
0fe9bf95 3158 e = PL_regeol - 1;
1aa99e6b
IH
3159 if (do_utf8)
3160 while (UTF8_IS_CONTINUATION(*(U8*)e))
3161 e--;
3162 }
3163 else if (do_utf8) {
3164 int m = n - ln;
3165 for (e = locinput;
3166 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3167 e += UTF8SKIP(e);
3168 }
3169 else {
3170 e = locinput + n - ln;
3171 if (e >= PL_regeol)
3172 e = PL_regeol - 1;
3173 }
0fe9bf95 3174 while (1) {
1aa99e6b 3175 int count;
0fe9bf95 3176 /* Find place 'next' could work */
1aa99e6b
IH
3177 if (!do_utf8) {
3178 if (c1 == c2) {
3179 while (locinput <= e && *locinput != c1)
3180 locinput++;
3181 } else {
9041c2e3 3182 while (locinput <= e
1aa99e6b
IH
3183 && *locinput != c1
3184 && *locinput != c2)
3185 locinput++;
3186 }
3187 count = locinput - old;
3188 }
3189 else {
3190 STRLEN len;
3191 if (c1 == c2) {
3192 for (count = 0;
3193 locinput <= e &&
9041c2e3 3194 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3195 count++)
3196 locinput += len;
3197
3198 } else {
3199 for (count = 0; locinput <= e; count++) {
9041c2e3 3200 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3201 if (c == c1 || c == c2)
3202 break;
9041c2e3 3203 locinput += len;
1aa99e6b
IH
3204 }
3205 }
0fe9bf95 3206 }
9041c2e3 3207 if (locinput > e)
0fe9bf95
IZ
3208 sayNO;
3209 /* PL_reginput == old now */
3210 if (locinput != old) {
3211 ln = 1; /* Did some */
1aa99e6b 3212 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3213 sayNO;
3214 }
3215 /* PL_reginput == locinput now */
29d1e993 3216 TRYPAREN(paren, ln, locinput);
0fe9bf95 3217 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3218 REGCP_UNWIND(lastcp);
0fe9bf95 3219 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3220 old = locinput;
3221 if (do_utf8)
3222 locinput += UTF8SKIP(locinput);
3223 else
3224 locinput++;
0fe9bf95
IZ
3225 }
3226 }
3227 else
c277df42 3228 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3229 UV c;
3230 if (c1 != -1000) {
3231 if (do_utf8)
9041c2e3 3232 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3233 else
9041c2e3 3234 c = UCHARAT(PL_reginput);
2390ecbc
PP
3235 /* If it could work, try it. */
3236 if (c == c1 || c == c2)
3237 {
3238 TRYPAREN(paren, n, PL_reginput);
3239 REGCP_UNWIND(lastcp);
3240 }
1aa99e6b 3241 }
a0d0e21e 3242 /* If it could work, try it. */
2390ecbc 3243 else if (c1 == -1000)
bbce6d69 3244 {
29d1e993 3245 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3246 REGCP_UNWIND(lastcp);
bbce6d69 3247 }
c277df42 3248 /* Couldn't or didn't -- move forward. */
a0ed51b3 3249 PL_reginput = locinput;
a0d0e21e
LW
3250 if (regrepeat(scan, 1)) {
3251 ln++;
a0ed51b3
LW
3252 locinput = PL_reginput;
3253 }
3254 else
4633a7c4 3255 sayNO;
a0d0e21e
LW
3256 }
3257 }
3258 else {
c277df42 3259 CHECKPOINT lastcp;
a0d0e21e 3260 n = regrepeat(scan, n);
a0ed51b3 3261 locinput = PL_reginput;
22c35a8c 3262 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3263 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3264 ln = n; /* why back off? */
1aeab75a
GS
3265 /* ...because $ and \Z can match before *and* after
3266 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3267 We should back off by one in this case. */
3268 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3269 ln--;
3270 }
02db2b7b 3271 REGCP_SET(lastcp);
c277df42 3272 if (paren) {
1aa99e6b 3273 UV c;
c277df42 3274 while (n >= ln) {
1aa99e6b
IH
3275 if (c1 != -1000) {
3276 if (do_utf8)
9041c2e3 3277 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3278 else
9041c2e3 3279 c = UCHARAT(PL_reginput);
1aa99e6b 3280 }
c277df42 3281 /* If it could work, try it. */
1aa99e6b 3282 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3283 {
29d1e993 3284 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3285 REGCP_UNWIND(lastcp);
c277df42
IZ
3286 }
3287 /* Couldn't or didn't -- back up. */
3288 n--;
dfe13c55 3289 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3290 }
a0ed51b3
LW
3291 }
3292 else {
1aa99e6b 3293 UV c;
c277df42 3294 while (n >= ln) {
1aa99e6b
IH
3295 if (c1 != -1000) {
3296 if (do_utf8)
9041c2e3 3297 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3298 else
9041c2e3 3299 c = UCHARAT(PL_reginput);
1aa99e6b 3300 }
c277df42 3301 /* If it could work, try it. */
1aa99e6b 3302 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3303 {
29d1e993 3304 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3305 REGCP_UNWIND(lastcp);
c277df42
IZ
3306 }
3307 /* Couldn't or didn't -- back up. */
3308 n--;
dfe13c55 3309 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3310 }
a0d0e21e
LW
3311 }
3312 }
4633a7c4 3313 sayNO;
c277df42 3314 break;
a0d0e21e 3315 case END:
0f5d15d6
IZ
3316 if (PL_reg_call_cc) {
3317 re_cc_state *cur_call_cc = PL_reg_call_cc;
3318 CURCUR *cctmp = PL_regcc;
3319 regexp *re = PL_reg_re;
3320 CHECKPOINT cp, lastcp;
3321
3322 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3323 REGCP_SET(lastcp);
0f5d15d6
IZ
3324 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3325 the caller. */
3326 PL_reginput = locinput; /* Make position available to
3327 the callcc. */
3328 cache_re(PL_reg_call_cc->re);
3329 PL_regcc = PL_reg_call_cc->cc;
3330 PL_reg_call_cc = PL_reg_call_cc->prev;
3331 if (regmatch(cur_call_cc->node)) {
3332 PL_reg_call_cc = cur_call_cc;
3333 regcpblow(cp);
3334 sayYES;
3335 }
02db2b7b 3336 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3337 regcppop();
3338 PL_reg_call_cc = cur_call_cc;
3339 PL_regcc = cctmp;
3340 PL_reg_re = re;
3341 cache_re(re);
3342
3343 DEBUG_r(
3344 PerlIO_printf(Perl_debug_log,
3345 "%*s continuation failed...\n",
3346 REPORT_CODE_OFF+PL_regindent*2, "")
3347 );
7821416a 3348 sayNO_SILENT;
0f5d15d6 3349 }
7821416a
IZ
3350 if (locinput < PL_regtill) {
3351 DEBUG_r(PerlIO_printf(Perl_debug_log,
3352 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3353 PL_colors[4],
3354 (long)(locinput - PL_reg_starttry),
3355 (long)(PL_regtill - PL_reg_starttry),
3356 PL_colors[5]));
3357 sayNO_FINAL; /* Cannot match: too short. */
3358 }
3359 PL_reginput = locinput; /* put where regtry can find it */
3360 sayYES_FINAL; /* Success! */
7e5428c5 3361 case SUCCEED:
3280af22 3362 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3363 sayYES_LOUD; /* Success! */
c277df42
IZ
3364 case SUSPEND:
3365 n = 1;
9fe1d20c 3366 PL_reginput = locinput;
9041c2e3 3367 goto do_ifmatch;
a0d0e21e 3368 case UNLESSM:
c277df42 3369 n = 0;
a0ed51b3 3370 if (scan->flags) {
0fe9bf95
IZ
3371 if (UTF) { /* XXXX This is absolutely
3372 broken, we read before
3373 start of string. */
3374 s = HOPMAYBEc(locinput, -scan->flags);
3375 if (!s)
3376 goto say_yes;
3377 PL_reginput = s;
3378 }
3379 else {
9041c2e3 3380 if (locinput < PL_bostr + scan->flags)
0fe9bf95
IZ
3381 goto say_yes;
3382 PL_reginput = locinput - scan->flags;
3383 goto do_ifmatch;
3384 }
a0ed51b3
LW
3385 }
3386 else
3387 PL_reginput = locinput;
c277df42
IZ
3388 goto do_ifmatch;
3389 case IFMATCH:
3390 n = 1;
a0ed51b3 3391 if (scan->flags) {
0fe9bf95
IZ
3392 if (UTF) { /* XXXX This is absolutely
3393 broken, we read before
3394 start of string. */
3395 s = HOPMAYBEc(locinput, -scan->flags);
3396 if (!s || s < PL_bostr)
3397 goto say_no;
3398 PL_reginput = s;
3399 }
3400 else {
9041c2e3 3401 if (locinput < PL_bostr + scan->flags)
0fe9bf95
IZ
3402 goto say_no;
3403 PL_reginput = locinput - scan->flags;
3404 goto do_ifmatch;
3405 }
a0ed51b3
LW
3406 }
3407 else
3408 PL_reginput = locinput;
3409
c277df42 3410 do_ifmatch:
c277df42
IZ
3411 inner = NEXTOPER(NEXTOPER(scan));
3412 if (regmatch(inner) != n) {
3413 say_no:
3414 if (logical) {
3415 logical = 0;
3416 sw = 0;
3417 goto do_longjump;
a0ed51b3
LW
3418 }
3419 else
c277df42
IZ
3420 sayNO;
3421 }
3422 say_yes:
3423 if (logical) {
3424 logical = 0;
3425 sw = 1;
3426 }
fe44a5e8 3427 if (OP(scan) == SUSPEND) {
3280af22 3428 locinput = PL_reginput;
565764a8 3429 nextchr = UCHARAT(locinput);
fe44a5e8 3430 }
c277df42
IZ
3431 /* FALL THROUGH. */
3432 case LONGJMP:
3433 do_longjump:
3434 next = scan + ARG(scan);
3435 if (next == scan)
3436 next = NULL;
a0d0e21e
LW
3437 break;
3438 default:
b900a521 3439 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3440 PTR2UV(scan), OP(scan));
cea2e8a9 3441 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3442 }
02db2b7b 3443 reenter:
a0d0e21e
LW
3444 scan = next;
3445 }
a687059c 3446
a0d0e21e
LW
3447 /*
3448 * We get here only if there's trouble -- normally "case END" is
3449 * the terminating point.
3450 */
cea2e8a9 3451 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3452 /*NOTREACHED*/
4633a7c4
LW
3453 sayNO;
3454
7821416a
IZ
3455yes_loud:
3456 DEBUG_r(
3457 PerlIO_printf(Perl_debug_log,
3458 "%*s %scould match...%s\n",
3459 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3460 );
3461 goto yes;
3462yes_final:
3463 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3464 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3465yes:
3466#ifdef DEBUGGING
3280af22 3467 PL_regindent--;
4633a7c4 3468#endif
02db2b7b
IZ
3469
3470#if 0 /* Breaks $^R */
3471 if (unwind)
3472 regcpblow(firstcp);
3473#endif
4633a7c4
LW
3474 return 1;
3475
3476no:
7821416a
IZ
3477 DEBUG_r(
3478 PerlIO_printf(Perl_debug_log,
3479 "%*s %sfailed...%s\n",
3480 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3481 );
3482 goto do_no;
3483no_final:
3484do_no:
02db2b7b
IZ
3485 if (unwind) {
3486 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3487
3488 switch (uw->type) {
3489 case RE_UNWIND_BRANCH:
3490 case RE_UNWIND_BRANCHJ:
3491 {
3492 re_unwind_branch_t *uwb = &(uw->branch);
3493 I32 lastparen = uwb->lastparen;
9041c2e3 3494
02db2b7b
IZ
3495 REGCP_UNWIND(uwb->lastcp);
3496 for (n = *PL_reglastparen; n > lastparen; n--)
3497 PL_regendp[n] = -1;
3498 *PL_reglastparen = n;
3499 scan = next = uwb->next;
9041c2e3
NIS
3500 if ( !scan ||
3501 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3502 ? BRANCH : BRANCHJ) ) { /* Failure */
3503 unwind = uwb->prev;
3504#ifdef DEBUGGING
3505 PL_regindent--;
3506#endif
3507 goto do_no;
3508 }
3509 /* Have more choice yet. Reuse the same uwb. */
3510 /*SUPPRESS 560*/
3511 if ((n = (uwb->type == RE_UNWIND_BRANCH
3512 ? NEXT_OFF(next) : ARG(next))))
3513 next += n;
3514 else
3515 next = NULL; /* XXXX Needn't unwinding in this case... */
3516 uwb->next = next;
3517 next = NEXTOPER(scan);
3518 if (uwb->type == RE_UNWIND_BRANCHJ)
3519 next = NEXTOPER(next);
3520 locinput = uwb->locinput;
3521 nextchr = uwb->nextchr;
3522#ifdef DEBUGGING
3523 PL_regindent = uwb->regindent;
3524#endif
3525
3526 goto reenter;
3527 }
3528 /* NOT REACHED */
3529 default:
3530 Perl_croak(aTHX_ "regexp unwind memory corruption");
3531 }
3532 /* NOT REACHED */
3533 }
4633a7c4 3534#ifdef DEBUGGING
3280af22 3535 PL_regindent--;
4633a7c4 3536#endif
a0d0e21e 3537 return 0;
a687059c
LW
3538}
3539
3540/*
3541 - regrepeat - repeatedly match something simple, report how many
3542 */
3543/*
3544 * [This routine now assumes that it will only match on things of length 1.
3545 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3546 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3547 */
76e3520e 3548STATIC I32
cea2e8a9 3549S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3550{
a0d0e21e 3551 register char *scan;
a0d0e21e 3552 register I32 c;
3280af22 3553 register char *loceol = PL_regeol;
a0ed51b3 3554 register I32 hardcount = 0;
ffc61ed2 3555 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3556
3280af22 3557 scan = PL_reginput;
c277df42 3558 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3559 loceol = scan + max;
a0d0e21e 3560 switch (OP(p)) {
22c35a8c 3561 case REG_ANY:
1aa99e6b 3562 if (do_utf8) {
ffc61ed2 3563 loceol = PL_regeol;
1aa99e6b 3564 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3565 scan += UTF8SKIP(scan);
3566 hardcount++;
3567 }
3568 } else {
3569 while (scan < loceol && *scan != '\n')
3570 scan++;
a0ed51b3
LW
3571 }
3572 break;
ffc61ed2 3573 case SANY:
3baa4c62 3574 scan = loceol;
a0ed51b3 3575 break;
bbce6d69 3576 case EXACT: /* length of string is 1 */
cd439c50 3577 c = (U8)*STRING(p);
bbce6d69 3578 while (scan < loceol && UCHARAT(scan) == c)
3579 scan++;
3580 break;
3581 case EXACTF: /* length of string is 1 */
cd439c50 3582 c = (U8)*STRING(p);
bbce6d69 3583 while (scan < loceol &&
22c35a8c 3584 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3585 scan++;
3586 break;
3587 case EXACTFL: /* length of string is 1 */
3280af22 3588 PL_reg_flags |= RF_tainted;
cd439c50 3589 c = (U8)*STRING(p);
bbce6d69 3590 while (scan < loceol &&
22c35a8c 3591 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3592 scan++;
3593 break;
3594 case ANYOF:
ffc61ed2
JH
3595 if (do_utf8) {
3596 loceol = PL_regeol;
1aa99e6b
IH
3597 while (hardcount < max && scan < loceol &&
3598 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3599 scan += UTF8SKIP(scan);
3600 hardcount++;
3601 }
3602 } else {
3603 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3604 scan++;
3605 }
a0d0e21e
LW
3606 break;
3607 case ALNUM:
1aa99e6b 3608 if (do_utf8) {
ffc61ed2 3609 loceol = PL_regeol;
8269fa76 3610 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3611 while (hardcount < max && scan < loceol &&
3568d838 3612 swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3613 scan += UTF8SKIP(scan);
3614 hardcount++;
3615 }
3616 } else {
3617 while (scan < loceol && isALNUM(*scan))
3618 scan++;
a0ed51b3
LW
3619 }
3620 break;
bbce6d69 3621 case ALNUML:
3280af22 3622 PL_reg_flags |= RF_tainted;
1aa99e6b 3623 if (do_utf8) {
ffc61ed2 3624 loceol = PL_regeol;
1aa99e6b
IH
3625 while (hardcount < max && scan < loceol &&
3626 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3627 scan += UTF8SKIP(scan);
3628 hardcount++;
3629 }
3630 } else {
3631 while (scan < loceol && isALNUM_LC(*scan))
3632 scan++;
a0ed51b3
LW
3633 }
3634 break;
a0d0e21e 3635 case NALNUM:
1aa99e6b 3636 if (do_utf8) {
ffc61ed2 3637 loceol = PL_regeol;
8269fa76 3638 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b 3639 while (hardcount < max && scan < loceol &&
3568d838 3640 !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3641 scan += UTF8SKIP(scan);
3642 hardcount++;
3643 }
3644 } else {
3645 while (scan < loceol && !isALNUM(*scan))
3646 scan++;
a0ed51b3
LW
3647 }
3648 break;
bbce6d69 3649 case NALNUML:
3280af22 3650 PL_reg_flags |= RF_tainted;
1aa99e6b 3651 if (do_utf8) {
ffc61ed2 3652 loceol = PL_regeol;
1aa99e6b
IH
3653 while (hardcount < max && scan < loceol &&
3654 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3655 scan += UTF8SKIP(scan);
3656 hardcount++;
3657 }
3658 } else {
3659 while (scan < loceol && !isALNUM_LC(*scan))
3660 scan++;
a0ed51b3
LW
3661 }
3662 break;
a0d0e21e 3663 case SPACE:
1aa99e6b 3664 if (do_utf8) {
ffc61ed2 3665 loceol = PL_regeol;
8269fa76 3666 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3667 while (hardcount < max && scan < loceol &&
3568d838
JH
3668 (*scan == ' ' ||
3669 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3670 scan += UTF8SKIP(scan);
3671 hardcount++;
3672 }
3673 } else {
3674 while (scan < loceol && isSPACE(*scan))
3675 scan++;
a0ed51b3
LW
3676 }
3677 break;
bbce6d69 3678 case SPACEL:
3280af22 3679 PL_reg_flags |= RF_tainted;
1aa99e6b 3680 if (do_utf8) {
ffc61ed2 3681 loceol = PL_regeol;
1aa99e6b 3682 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3683 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3684 scan += UTF8SKIP(scan);
3685 hardcount++;
3686 }
3687 } else {
3688 while (scan < loceol && isSPACE_LC(*scan))
3689 scan++;
a0ed51b3
LW
3690 }
3691 break;
a0d0e21e 3692 case NSPACE:
1aa99e6b 3693 if (do_utf8) {
ffc61ed2 3694 loceol = PL_regeol;
8269fa76 3695 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3696 while (hardcount < max && scan < loceol &&
3568d838
JH
3697 !(*scan == ' ' ||
3698 swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
ffc61ed2
JH
3699 scan += UTF8SKIP(scan);
3700 hardcount++;
3701 }
3702 } else {
3703 while (scan < loceol && !isSPACE(*scan))
3704 scan++;
3705 break;
a0ed51b3 3706 }
bbce6d69 3707 case NSPACEL:
3280af22 3708 PL_reg_flags |= RF_tainted;
1aa99e6b 3709 if (do_utf8) {
ffc61ed2 3710 loceol = PL_regeol;
1aa99e6b 3711 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3712 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3713 scan += UTF8SKIP(scan);
3714 hardcount++;
3715 }
3716 } else {
3717 while (scan < loceol && !isSPACE_LC(*scan))
3718 scan++;
a0ed51b3
LW
3719 }
3720 break;
a0d0e21e 3721 case DIGIT:
1aa99e6b 3722 if (do_utf8) {
ffc61ed2 3723 loceol = PL_regeol;
8269fa76 3724 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3725 while (hardcount < max && scan < loceol &&
3568d838 3726 swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3727 scan += UTF8SKIP(scan);
3728 hardcount++;
3729 }
3730 } else {
3731 while (scan < loceol && isDIGIT(*scan))
3732 scan++;
a0ed51b3
LW
3733 }
3734 break;
a0d0e21e 3735 case NDIGIT:
1aa99e6b 3736 if (do_utf8) {
ffc61ed2 3737 loceol = PL_regeol;
8269fa76 3738 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b 3739 while (hardcount < max && scan < loceol &&
3568d838 3740 !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3741 scan += UTF8SKIP(scan);
3742 hardcount++;
3743 }
3744 } else {
3745 while (scan < loceol && !isDIGIT(*scan))
3746 scan++;
a0ed51b3
LW
3747 }
3748 break;
a0d0e21e
LW
3749 default: /* Called on something of 0 width. */
3750 break; /* So match right here or not at all. */
3751 }
a687059c 3752
a0ed51b3
LW
3753 if (hardcount)
3754 c = hardcount;
3755 else
3756 c = scan - PL_reginput;
3280af22 3757 PL_reginput = scan;
a687059c 3758
9041c2e3 3759 DEBUG_r(
c277df42
IZ
3760 {
3761 SV *prop = sv_newmortal();
3762
3763 regprop(prop, p);
9041c2e3
NIS
3764 PerlIO_printf(Perl_debug_log,
3765 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3766 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3767 });
9041c2e3 3768
a0d0e21e 3769 return(c);
a687059c
LW
3770}
3771
3772/*
c277df42 3773 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3774 *
c277df42
IZ
3775 * The repeater is supposed to have constant length.
3776 */
3777
76e3520e 3778STATIC I32
cea2e8a9 3779S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3780{
3781 register char *scan;
3782 register char *start;
3280af22 3783 register char *loceol = PL_regeol;
a0ed51b3 3784 I32 l = 0;
708e3b05 3785 I32 count = 0, res = 1;
a0ed51b3
LW
3786
3787 if (!max)
3788 return 0;
c277df42 3789
3280af22 3790 start = PL_reginput;
1aa99e6b 3791 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3792 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3793 if (!count++) {
3794 l = 0;
3795 while (start < PL_reginput) {
3796 l++;
3797 start += UTF8SKIP(start);
3798 }
3799 *lp = l;
3800 if (l == 0)
3801 return max;
3802 }
3803 if (count == max)
3804 return count;
3805 }
3806 }
3807 else {
708e3b05 3808 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3809 if (!count++) {
3810 *lp = l = PL_reginput - start;
3811 if (max != REG_INFTY && l*max < loceol - scan)
3812 loceol = scan + l*max;
3813 if (l == 0)
3814 return max;
c277df42
IZ
3815 }
3816 }
3817 }
708e3b05 3818 if (!res)
3280af22 3819 PL_reginput = scan;
9041c2e3 3820
a0ed51b3 3821 return count;
c277df42
IZ
3822}
3823
3824/*
ffc61ed2
JH
3825- regclass_swash - prepare the utf8 swash
3826*/
3827
3828SV *
3829Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3830{
3831 SV *sw = NULL;
3832 SV *si = NULL;
3833
3834 if (PL_regdata && PL_regdata->count) {
3835 U32 n = ARG(node);
3836
3837 if (PL_regdata->what[n] == 's') {
3838 SV *rv = (SV*)PL_regdata->data[n];
3839 AV *av = (AV*)SvRV((SV*)rv);
3840 SV **a;
9041c2e3 3841
ffc61ed2
JH
3842 si = *av_fetch(av, 0, FALSE);
3843 a = av_fetch(av, 1, FALSE);
9041c2e3 3844
ffc61ed2
JH
3845 if (a)
3846 sw = *a;
3847 else if (si && doinit) {
3848 sw = swash_init("utf8", "", si, 1, 0);
3849 (void)av_store(av, 1, sw);
3850 }
3851 }
3852 }
3853
3854 if (initsvp)
3855 *initsvp = si;
3856
3857 return sw;
3858}
3859
3860/*
cb8d8820 3861 - reginclass - determine if a character falls into a character class
bbce6d69 3862 */
3863
76e3520e 3864STATIC bool
ffc61ed2 3865S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3866{
ffc61ed2 3867 char flags = ANYOF_FLAGS(n);
bbce6d69 3868 bool match = FALSE;
1aa99e6b 3869 UV c;
3568d838 3870 STRLEN len = 0;
1aa99e6b 3871
3568d838 3872 c = do_utf8 ? utf8_to_uvchr(p, &len) : *p;
bbce6d69 3873
ffc61ed2
JH
3874 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3875 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3876 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3877 match = TRUE;
bbce6d69 3878 }
3568d838 3879 if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
1aa99e6b 3880 match = TRUE;
ffc61ed2
JH
3881 if (!match) {
3882 SV *sw = regclass_swash(n, TRUE, 0);
3883
3884 if (sw) {
3568d838 3885 if (swash_fetch(sw, p, do_utf8))
ffc61ed2
JH
3886 match = TRUE;
3887 else if (flags & ANYOF_FOLD) {
3888 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3889
ffc61ed2
JH
3890 if (flags & ANYOF_LOCALE) {
3891 PL_reg_flags |= RF_tainted;
9041c2e3 3892 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3893 }
1aa99e6b 3894 else
9041c2e3 3895 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
3568d838 3896 if (swash_fetch(sw, tmpbuf, do_utf8))
ffc61ed2
JH
3897 match = TRUE;
3898 }
3899 }
bbce6d69 3900 }
3901 }
1aa99e6b 3902 if (!match && c < 256) {
ffc61ed2
JH
3903 if (ANYOF_BITMAP_TEST(n, c))
3904 match = TRUE;
3905 else if (flags & ANYOF_FOLD) {
3568d838 3906 I32 f;
a0ed51b3 3907
ffc61ed2
JH
3908 if (flags & ANYOF_LOCALE) {
3909 PL_reg_flags |= RF_tainted;
3910 f = PL_fold_locale[c];
3911 }
3912 else
3913 f = PL_fold[c];
3914 if (f != c && ANYOF_BITMAP_TEST(n, f))
3915 match = TRUE;
3916 }
3917
3918 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3919 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3920 if (
3921 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3922 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3923 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3924 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3925 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3926 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3927 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3928 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3929 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3930 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3931 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3932 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3933 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3943 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3944 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3945 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3946 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3947 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3948 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3949 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3950 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3951 ) /* How's that for a conditional? */
3952 {
3953 match = TRUE;
3954 }
a0ed51b3 3955 }
a0ed51b3
LW
3956 }
3957
a0ed51b3
LW
3958 return (flags & ANYOF_INVERT) ? !match : match;
3959}
161b471a 3960
dfe13c55 3961STATIC U8 *
cea2e8a9 3962S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3963{
1aa99e6b
IH
3964 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3965}
3966
3967STATIC U8 *
3968S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3969{
a0ed51b3 3970 if (off >= 0) {
1aa99e6b 3971 while (off-- && s < lim) {
ffc61ed2 3972 /* XXX could check well-formedness here */
a0ed51b3 3973 s += UTF8SKIP(s);
ffc61ed2 3974 }
a0ed51b3
LW
3975 }
3976 else {
3977 while (off++) {
1aa99e6b 3978 if (s > lim) {
a0ed51b3 3979 s--;
ffc61ed2 3980 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3981 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3982 s--;
ffc61ed2
JH
3983 }
3984 /* XXX could check well-formedness here */
a0ed51b3
LW
3985 }
3986 }
3987 }
3988 return s;
3989}
161b471a 3990
dfe13c55 3991STATIC U8 *
1aa99e6b 3992S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 3993{
1aa99e6b
IH
3994 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3995}
3996
3997STATIC U8 *
3998S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
3999{
4000 if (off >= 0) {
1aa99e6b 4001 while (off-- && s < lim) {
ffc61ed2 4002 /* XXX could check well-formedness here */
a0ed51b3 4003 s += UTF8SKIP(s);
ffc61ed2 4004 }
a0ed51b3
LW
4005 if (off >= 0)
4006 return 0;
4007 }
4008 else {
4009 while (off++) {
1aa99e6b 4010 if (s > lim) {
a0ed51b3 4011 s--;
ffc61ed2 4012 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4013 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4014 s--;
ffc61ed2
JH
4015 }
4016 /* XXX could check well-formedness here */
a0ed51b3
LW
4017 }
4018 else
4019 break;
4020 }
4021 if (off <= 0)
4022 return 0;
4023 }
4024 return s;
4025}
51371543
GS
4026
4027#ifdef PERL_OBJECT
51371543
GS
4028#include "XSUB.h"
4029#endif
4030
4031static void
4032restore_pos(pTHXo_ void *arg)
4033{
51371543
GS
4034 if (PL_reg_eval_set) {
4035 if (PL_reg_oldsaved) {
4036 PL_reg_re->subbeg = PL_reg_oldsaved;
4037 PL_reg_re->sublen = PL_reg_oldsavedlen;
4038 RX_MATCH_COPIED_on(PL_reg_re);
4039 }
4040 PL_reg_magic->mg_len = PL_reg_oldpos;
4041 PL_reg_eval_set = 0;
4042 PL_curpm = PL_reg_oldcurpm;
4043 }
4044}