This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate changes #9678,9679 from maintline into mainperl.
[perl5.git] / regexec.c
CommitLineData
a0d0e21e
LW
1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c
LW
8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73
AD
17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e
AD
22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e
AD
29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603
IZ
35# define Perl_regexec_flags my_regexec
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
cad2e5aa 38# define Perl_re_intuit_start my_re_intuit_start
d06ea78c
GS
39/* *These* symbols are masked to allow static link. */
40# define Perl_pregexec my_pregexec
9041c2e3 41# define Perl_reginitcolors my_reginitcolors
490a3f88 42# define Perl_regclass_swash my_regclass_swash
c5be433b
GS
43
44# define PERL_NO_GET_CONTEXT
9041c2e3 45#endif
56953603 46
f0fcb552 47/*SUPPRESS 112*/
a687059c 48/*
e50aee73 49 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c
LW
50 *
51 * Copyright (c) 1986 by University of Toronto.
52 * Written by Henry Spencer. Not derived from licensed software.
53 *
54 * Permission is granted to anyone to use this software for any
55 * purpose on any computer system, and to redistribute it freely,
56 * subject to the following restrictions:
57 *
58 * 1. The author is not responsible for the consequences of use of
59 * this software, no matter how awful, even if they arise
60 * from defects in it.
61 *
62 * 2. The origin of this software must not be misrepresented, either
63 * by explicit claim or by omission.
64 *
65 * 3. Altered versions must be plainly marked as such, and must not
66 * be misrepresented as being the original software.
67 *
68 **** Alterations to Henry's code are...
69 ****
bc89e66f 70 **** Copyright (c) 1991-2001, Larry Wall
a687059c 71 ****
9ef589d8
LW
72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
a687059c
LW
74 *
75 * Beware that some of this code is subtly aware of the way operator
76 * precedence is structured in regular expressions. Serious changes in
77 * regular-expression syntax might require a total rethink.
78 */
79#include "EXTERN.h"
864dbfa3 80#define PERL_IN_REGEXEC_C
a687059c 81#include "perl.h"
0f5d15d6 82
c5be433b
GS
83#ifdef PERL_IN_XSUB_RE
84# if defined(PERL_CAPI) || defined(PERL_OBJECT)
85# include "XSUB.h"
86# endif
87#endif
88
a687059c
LW
89#include "regcomp.h"
90
c277df42
IZ
91#define RF_tainted 1 /* tainted information used? */
92#define RF_warned 2 /* warned about big count? */
ce862d02 93#define RF_evaled 4 /* Did an EVAL with setting? */
a0ed51b3
LW
94#define RF_utf8 8 /* String contains multibyte chars? */
95
96#define UTF (PL_reg_flags & RF_utf8)
ce862d02
IZ
97
98#define RS_init 1 /* eval environment created */
99#define RS_set 2 /* replsv value is set */
c277df42 100
a687059c
LW
101#ifndef STATIC
102#define STATIC static
103#endif
104
c277df42
IZ
105/*
106 * Forwards.
107 */
108
a0ed51b3 109#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
1aa99e6b 110#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b)
a0ed51b3 111
dfe13c55
GS
112#define reghop_c(pos,off) ((char*)reghop((U8*)pos, off))
113#define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off))
1aa99e6b
IH
114#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off))
115#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off))
dfe13c55
GS
116#define HOPc(pos,off) ((char*)HOP(pos,off))
117#define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off))
a0d0e21e 118
1aa99e6b
IH
119#define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim))
120#define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim))
121#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
122#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off))
123#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
124#define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim))
125
8269fa76 126#define LOAD_UTF8_CHARCLASS(a,b) STMT_START { if (!CAT2(PL_utf8_,a)) (void)CAT2(is_utf8_, a)((U8*)b); } STMT_END
51371543 127
8269fa76 128static void restore_pos(pTHXo_ void *arg);
51371543 129
76e3520e 130STATIC CHECKPOINT
cea2e8a9 131S_regcppush(pTHX_ I32 parenfloor)
a0d0e21e 132{
3280af22 133 int retval = PL_savestack_ix;
b1ce53c5
JH
134#define REGCP_PAREN_ELEMS 4
135 int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
a0d0e21e
LW
136 int p;
137
b1ce53c5
JH
138#define REGCP_OTHER_ELEMS 5
139 SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
3280af22 140 for (p = PL_regsize; p > parenfloor; p--) {
b1ce53c5 141/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
cf93c79d
IZ
142 SSPUSHINT(PL_regendp[p]);
143 SSPUSHINT(PL_regstartp[p]);
3280af22 144 SSPUSHPTR(PL_reg_start_tmp[p]);
a0d0e21e
LW
145 SSPUSHINT(p);
146 }
b1ce53c5 147/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
3280af22
NIS
148 SSPUSHINT(PL_regsize);
149 SSPUSHINT(*PL_reglastparen);
150 SSPUSHPTR(PL_reginput);
41123dfd
JH
151#define REGCP_FRAME_ELEMS 2
152/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
153 * are needed for the regexp context stack bookkeeping. */
154 SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
b1ce53c5 155 SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
41123dfd 156
a0d0e21e
LW
157 return retval;
158}
159
c277df42 160/* These are needed since we do not localize EVAL nodes: */
02db2b7b 161# define REGCP_SET(cp) DEBUG_r(PerlIO_printf(Perl_debug_log, \
faccc32b 162 " Setting an EVAL scope, savestack=%"IVdf"\n", \
02db2b7b 163 (IV)PL_savestack_ix)); cp = PL_savestack_ix
c3464db5 164
02db2b7b 165# define REGCP_UNWIND(cp) DEBUG_r(cp != PL_savestack_ix ? \
c3464db5 166 PerlIO_printf(Perl_debug_log, \
faccc32b 167 " Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
02db2b7b 168 (IV)(cp), (IV)PL_savestack_ix) : 0); regcpblow(cp)
c277df42 169
76e3520e 170STATIC char *
cea2e8a9 171S_regcppop(pTHX)
a0d0e21e 172{
b1ce53c5 173 I32 i;
a0d0e21e
LW
174 U32 paren = 0;
175 char *input;
cf93c79d 176 I32 tmps;
b1ce53c5
JH
177
178 /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
a0d0e21e 179 i = SSPOPINT;
b1ce53c5
JH
180 assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
181 i = SSPOPINT; /* Parentheses elements to pop. */
a0d0e21e 182 input = (char *) SSPOPPTR;
3280af22
NIS
183 *PL_reglastparen = SSPOPINT;
184 PL_regsize = SSPOPINT;
b1ce53c5
JH
185
186 /* Now restore the parentheses context. */
41123dfd
JH
187 for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
188 i > 0; i -= REGCP_PAREN_ELEMS) {
a0d0e21e 189 paren = (U32)SSPOPINT;
3280af22 190 PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
cf93c79d
IZ
191 PL_regstartp[paren] = SSPOPINT;
192 tmps = SSPOPINT;
3280af22
NIS
193 if (paren <= *PL_reglastparen)
194 PL_regendp[paren] = tmps;
c277df42 195 DEBUG_r(
c3464db5 196 PerlIO_printf(Perl_debug_log,
b900a521 197 " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
9041c2e3 198 (UV)paren, (IV)PL_regstartp[paren],
b900a521 199 (IV)(PL_reg_start_tmp[paren] - PL_bostr),
9041c2e3 200 (IV)PL_regendp[paren],
3280af22 201 (paren > *PL_reglastparen ? "(no)" : ""));
c277df42 202 );
a0d0e21e 203 }
c277df42 204 DEBUG_r(
3280af22 205 if (*PL_reglastparen + 1 <= PL_regnpar) {
c3464db5 206 PerlIO_printf(Perl_debug_log,
faccc32b
JH
207 " restoring \\%"IVdf"..\\%"IVdf" to undef\n",
208 (IV)(*PL_reglastparen + 1), (IV)PL_regnpar);
c277df42
IZ
209 }
210 );
daf18116 211#if 1
dafc8851
JH
212 /* It would seem that the similar code in regtry()
213 * already takes care of this, and in fact it is in
214 * a better location to since this code can #if 0-ed out
215 * but the code in regtry() is needed or otherwise tests
216 * requiring null fields (pat.t#187 and split.t#{13,14}
daf18116
JH
217 * (as of patchlevel 7877) will fail. Then again,
218 * this code seems to be necessary or otherwise
219 * building DynaLoader will fail:
220 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
221 * --jhi */
3280af22
NIS
222 for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
223 if (paren > PL_regsize)
cf93c79d
IZ
224 PL_regstartp[paren] = -1;
225 PL_regendp[paren] = -1;
a0d0e21e 226 }
dafc8851 227#endif
a0d0e21e
LW
228 return input;
229}
230
0f5d15d6 231STATIC char *
cea2e8a9 232S_regcp_set_to(pTHX_ I32 ss)
0f5d15d6
IZ
233{
234 I32 tmp = PL_savestack_ix;
235
236 PL_savestack_ix = ss;
237 regcppop();
238 PL_savestack_ix = tmp;
942e002e 239 return Nullch;
0f5d15d6
IZ
240}
241
242typedef struct re_cc_state
243{
244 I32 ss;
245 regnode *node;
246 struct re_cc_state *prev;
247 CURCUR *cc;
248 regexp *re;
249} re_cc_state;
250
02db2b7b 251#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
a0d0e21e 252
29d1e993
HS
253#define TRYPAREN(paren, n, input) { \
254 if (paren) { \
255 if (n) { \
256 PL_regstartp[paren] = HOPc(input, -1) - PL_bostr; \
257 PL_regendp[paren] = input - PL_bostr; \
258 } \
259 else \
260 PL_regendp[paren] = -1; \
261 } \
262 if (regmatch(next)) \
263 sayYES; \
264 if (paren && n) \
265 PL_regendp[paren] = -1; \
266}
267
268
a687059c 269/*
e50aee73 270 * pregexec and friends
a687059c
LW
271 */
272
273/*
c277df42 274 - pregexec - match a regexp against a string
a687059c 275 */
c277df42 276I32
864dbfa3 277Perl_pregexec(pTHX_ register regexp *prog, char *stringarg, register char *strend,
c3464db5 278 char *strbeg, I32 minend, SV *screamer, U32 nosave)
c277df42
IZ
279/* strend: pointer to null at end of string */
280/* strbeg: real beginning of string */
281/* minend: end of match must be >=minend after stringarg. */
282/* nosave: For optimizations. */
283{
284 return
9041c2e3 285 regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
c277df42
IZ
286 nosave ? 0 : REXEC_COPY_STR);
287}
0f5d15d6
IZ
288
289STATIC void
cea2e8a9 290S_cache_re(pTHX_ regexp *prog)
0f5d15d6
IZ
291{
292 PL_regprecomp = prog->precomp; /* Needed for FAIL. */
293#ifdef DEBUGGING
294 PL_regprogram = prog->program;
295#endif
296 PL_regnpar = prog->nparens;
9041c2e3
NIS
297 PL_regdata = prog->data;
298 PL_reg_re = prog;
0f5d15d6 299}
22e551b9 300
9041c2e3 301/*
cad2e5aa
JH
302 * Need to implement the following flags for reg_anch:
303 *
304 * USE_INTUIT_NOML - Useful to call re_intuit_start() first
305 * USE_INTUIT_ML
306 * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
307 * INTUIT_AUTORITATIVE_ML
308 * INTUIT_ONCE_NOML - Intuit can match in one location only.
309 * INTUIT_ONCE_ML
310 *
311 * Another flag for this function: SECOND_TIME (so that float substrs
312 * with giant delta may be not rechecked).
313 */
314
315/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
316
2c2d71f5 317/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
cad2e5aa
JH
318 Otherwise, only SvCUR(sv) is used to get strbeg. */
319
320/* XXXX We assume that strpos is strbeg unless sv. */
321
6eb5f6b9
JH
322/* XXXX Some places assume that there is a fixed substring.
323 An update may be needed if optimizer marks as "INTUITable"
324 RExen without fixed substrings. Similarly, it is assumed that
325 lengths of all the strings are no more than minlen, thus they
326 cannot come from lookahead.
327 (Or minlen should take into account lookahead.) */
328
2c2d71f5
JH
329/* A failure to find a constant substring means that there is no need to make
330 an expensive call to REx engine, thus we celebrate a failure. Similarly,
331 finding a substring too deep into the string means that less calls to
30944b6d
IZ
332 regtry() should be needed.
333
334 REx compiler's optimizer found 4 possible hints:
335 a) Anchored substring;
336 b) Fixed substring;
337 c) Whether we are anchored (beginning-of-line or \G);
338 d) First node (of those at offset 0) which may distingush positions;
6eb5f6b9 339 We use a)b)d) and multiline-part of c), and try to find a position in the
30944b6d
IZ
340 string which does not contradict any of them.
341 */
2c2d71f5 342
6eb5f6b9
JH
343/* Most of decisions we do here should have been done at compile time.
344 The nodes of the REx which we used for the search should have been
345 deleted from the finite automaton. */
346
cad2e5aa
JH
347char *
348Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
349 char *strend, U32 flags, re_scream_pos_data *data)
350{
2c2d71f5 351 register I32 start_shift;
cad2e5aa 352 /* Should be nonnegative! */
2c2d71f5
JH
353 register I32 end_shift;
354 register char *s;
355 register SV *check;
a1933d95 356 char *strbeg;
cad2e5aa
JH
357 char *t;
358 I32 ml_anch;
2c2d71f5 359 char *tmp;
6eb5f6b9
JH
360 register char *other_last = Nullch; /* other substr checked before this */
361 char *check_at; /* check substr found at this pos */
30944b6d
IZ
362#ifdef DEBUGGING
363 char *i_strpos = strpos;
364#endif
cad2e5aa
JH
365
366 DEBUG_r( if (!PL_colorset) reginitcolors() );
367 DEBUG_r(PerlIO_printf(Perl_debug_log,
2c2d71f5 368 "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
cad2e5aa
JH
369 PL_colors[4],PL_colors[5],PL_colors[0],
370 prog->precomp,
371 PL_colors[1],
372 (strlen(prog->precomp) > 60 ? "..." : ""),
373 PL_colors[0],
b900a521 374 (int)(strend - strpos > 60 ? 60 : strend - strpos),
cad2e5aa
JH
375 strpos, PL_colors[1],
376 (strend - strpos > 60 ? "..." : ""))
377 );
378
1aa99e6b
IH
379 if (prog->reganch & ROPT_UTF8)
380 PL_reg_flags |= RF_utf8;
381
382 if (prog->minlen > CHR_DIST((U8*)strend, (U8*)strpos)) {
2c2d71f5 383 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
cad2e5aa 384 goto fail;
2c2d71f5 385 }
a1933d95 386 strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
1aa99e6b 387 PL_regeol = strend;
653099ff 388 check = prog->check_substr;
2c2d71f5 389 if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
cad2e5aa
JH
390 ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
391 || ( (prog->reganch & ROPT_ANCH_BOL)
2c2d71f5 392 && !PL_multiline ) ); /* Check after \n? */
cad2e5aa 393
b8ef571c 394 if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
2c2d71f5 395 /* Substring at constant offset from beg-of-str... */
cad2e5aa
JH
396 I32 slen;
397
b8ef571c
JH
398 if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
399 /* SvCUR is not set on references: SvRV and SvPVX overlap */
400 && sv && !SvROK(sv)
401 && (strpos != strbeg)) {
402 DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
403 goto fail;
404 }
405 PL_regeol = strend; /* Used in HOP() */
1aa99e6b 406 s = HOP3c(strpos, prog->check_offset_min, strend);
653099ff
GS
407 if (SvTAIL(check)) {
408 slen = SvCUR(check); /* >= 1 */
cad2e5aa 409
9041c2e3 410 if ( strend - s > slen || strend - s < slen - 1
2c2d71f5
JH
411 || (strend - s == slen && strend[-1] != '\n')) {
412 DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
413 goto fail_finish;
cad2e5aa
JH
414 }
415 /* Now should match s[0..slen-2] */
416 slen--;
653099ff 417 if (slen && (*SvPVX(check) != *s
cad2e5aa 418 || (slen > 1
653099ff 419 && memNE(SvPVX(check), s, slen)))) {
2c2d71f5
JH
420 report_neq:
421 DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
422 goto fail_finish;
423 }
cad2e5aa 424 }
653099ff
GS
425 else if (*SvPVX(check) != *s
426 || ((slen = SvCUR(check)) > 1
427 && memNE(SvPVX(check), s, slen)))
2c2d71f5
JH
428 goto report_neq;
429 goto success_at_start;
cad2e5aa 430 }
2c2d71f5 431 /* Match is anchored, but substr is not anchored wrt beg-of-str. */
cad2e5aa 432 s = strpos;
2c2d71f5 433 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
2c2d71f5 434 end_shift = prog->minlen - start_shift -
653099ff 435 CHR_SVLEN(check) + (SvTAIL(check) != 0);
2c2d71f5 436 if (!ml_anch) {
653099ff
GS
437 I32 end = prog->check_offset_max + CHR_SVLEN(check)
438 - (SvTAIL(check) != 0);
1aa99e6b 439 I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
2c2d71f5
JH
440
441 if (end_shift < eshift)
442 end_shift = eshift;
443 }
cad2e5aa 444 }
2c2d71f5 445 else { /* Can match at random position */
cad2e5aa
JH
446 ml_anch = 0;
447 s = strpos;
2c2d71f5
JH
448 start_shift = prog->check_offset_min; /* okay to underestimate on CC */
449 /* Should be nonnegative! */
450 end_shift = prog->minlen - start_shift -
653099ff 451 CHR_SVLEN(check) + (SvTAIL(check) != 0);
cad2e5aa
JH
452 }
453
2c2d71f5 454#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
0033605d 455 if (end_shift < 0)
6bbae5e6 456 Perl_croak(aTHX_ "panic: end_shift");
2c2d71f5
JH
457#endif
458
2c2d71f5
JH
459 restart:
460 /* Find a possible match in the region s..strend by looking for
461 the "check" substring in the region corrected by start/end_shift. */
cad2e5aa 462 if (flags & REXEC_SCREAM) {
cad2e5aa
JH
463 I32 p = -1; /* Internal iterator of scream. */
464 I32 *pp = data ? data->scream_pos : &p;
465
2c2d71f5
JH
466 if (PL_screamfirst[BmRARE(check)] >= 0
467 || ( BmRARE(check) == '\n'
468 && (BmPREVIOUS(check) == SvCUR(check) - 1)
469 && SvTAIL(check) ))
9041c2e3 470 s = screaminstr(sv, check,
2c2d71f5 471 start_shift + (s - strbeg), end_shift, pp, 0);
cad2e5aa 472 else
2c2d71f5 473 goto fail_finish;
cad2e5aa
JH
474 if (data)
475 *data->scream_olds = s;
476 }
3baa4c62
JH
477 else if (prog->reganch & ROPT_SANY_SEEN)
478 s = fbm_instr((U8*)(s + start_shift),
479 (U8*)(strend - end_shift),
480 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa 481 else
1aa99e6b
IH
482 s = fbm_instr(HOP3(s, start_shift, strend),
483 HOP3(strend, -end_shift, strbeg),
2c2d71f5 484 check, PL_multiline ? FBMrf_MULTILINE : 0);
cad2e5aa
JH
485
486 /* Update the count-of-usability, remove useless subpatterns,
487 unshift s. */
2c2d71f5
JH
488
489 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
490 (s ? "Found" : "Did not find"),
491 ((check == prog->anchored_substr) ? "anchored" : "floating"),
492 PL_colors[0],
7b0972df
JH
493 (int)(SvCUR(check) - (SvTAIL(check)!=0)),
494 SvPVX(check),
2c2d71f5
JH
495 PL_colors[1], (SvTAIL(check) ? "$" : ""),
496 (s ? " at offset " : "...\n") ) );
497
498 if (!s)
499 goto fail_finish;
500
6eb5f6b9
JH
501 check_at = s;
502
2c2d71f5 503 /* Finish the diagnostic message */
30944b6d 504 DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
2c2d71f5
JH
505
506 /* Got a candidate. Check MBOL anchoring, and the *other* substr.
507 Start with the other substr.
508 XXXX no SCREAM optimization yet - and a very coarse implementation
509 XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
510 *always* match. Probably should be marked during compile...
511 Probably it is right to do no SCREAM here...
512 */
513
514 if (prog->float_substr && prog->anchored_substr) {
30944b6d 515 /* Take into account the "other" substring. */
2c2d71f5
JH
516 /* XXXX May be hopelessly wrong for UTF... */
517 if (!other_last)
6eb5f6b9 518 other_last = strpos;
2c2d71f5 519 if (check == prog->float_substr) {
30944b6d
IZ
520 do_other_anchored:
521 {
1aa99e6b 522 char *last = HOP3c(s, -start_shift, strbeg), *last1, *last2;
2c2d71f5
JH
523 char *s1 = s;
524
2c2d71f5
JH
525 t = s - prog->check_offset_max;
526 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
527 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b 528 || ((t = reghopmaybe3_c(s, -(prog->check_offset_max), strpos))
2c2d71f5 529 && t > strpos)))
30944b6d 530 /* EMPTY */;
2c2d71f5
JH
531 else
532 t = strpos;
1aa99e6b 533 t = HOP3c(t, prog->anchored_offset, strend);
6eb5f6b9
JH
534 if (t < other_last) /* These positions already checked */
535 t = other_last;
1aa99e6b 536 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
2c2d71f5
JH
537 if (last < last1)
538 last1 = last;
539 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
540 /* On end-of-str: see comment below. */
541 s = fbm_instr((unsigned char*)t,
1aa99e6b
IH
542 HOP3(HOP3(last1, prog->anchored_offset, strend)
543 + SvCUR(prog->anchored_substr),
544 -(SvTAIL(prog->anchored_substr)!=0), strbeg),
545 prog->anchored_substr,
546 PL_multiline ? FBMrf_MULTILINE : 0);
547 DEBUG_r(PerlIO_printf(Perl_debug_log,
548 "%s anchored substr `%s%.*s%s'%s",
2c2d71f5
JH
549 (s ? "Found" : "Contradicts"),
550 PL_colors[0],
7b0972df
JH
551 (int)(SvCUR(prog->anchored_substr)
552 - (SvTAIL(prog->anchored_substr)!=0)),
2c2d71f5
JH
553 SvPVX(prog->anchored_substr),
554 PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
555 if (!s) {
556 if (last1 >= last2) {
557 DEBUG_r(PerlIO_printf(Perl_debug_log,
558 ", giving up...\n"));
559 goto fail_finish;
560 }
561 DEBUG_r(PerlIO_printf(Perl_debug_log,
562 ", trying floating at offset %ld...\n",
1aa99e6b
IH
563 (long)(HOP3c(s1, 1, strend) - i_strpos)));
564 other_last = HOP3c(last1, prog->anchored_offset+1, strend);
565 s = HOP3c(last, 1, strend);
2c2d71f5
JH
566 goto restart;
567 }
568 else {
569 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 570 (long)(s - i_strpos)));
1aa99e6b
IH
571 t = HOP3c(s, -prog->anchored_offset, strbeg);
572 other_last = HOP3c(s, 1, strend);
30944b6d 573 s = s1;
2c2d71f5
JH
574 if (t == strpos)
575 goto try_at_start;
2c2d71f5
JH
576 goto try_at_offset;
577 }
30944b6d 578 }
2c2d71f5
JH
579 }
580 else { /* Take into account the floating substring. */
581 char *last, *last1;
582 char *s1 = s;
583
1aa99e6b
IH
584 t = HOP3c(s, -start_shift, strbeg);
585 last1 = last =
586 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
587 if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
588 last = HOP3c(t, prog->float_max_offset, strend);
589 s = HOP3c(t, prog->float_min_offset, strend);
6eb5f6b9
JH
590 if (s < other_last)
591 s = other_last;
2c2d71f5
JH
592 /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
593 /* fbm_instr() takes into account exact value of end-of-str
594 if the check is SvTAIL(ed). Since false positives are OK,
595 and end-of-str is not later than strend we are OK. */
596 s = fbm_instr((unsigned char*)s,
597 (unsigned char*)last + SvCUR(prog->float_substr)
598 - (SvTAIL(prog->float_substr)!=0),
599 prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
600 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
601 (s ? "Found" : "Contradicts"),
602 PL_colors[0],
7b0972df
JH
603 (int)(SvCUR(prog->float_substr)
604 - (SvTAIL(prog->float_substr)!=0)),
2c2d71f5
JH
605 SvPVX(prog->float_substr),
606 PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
607 if (!s) {
608 if (last1 == last) {
609 DEBUG_r(PerlIO_printf(Perl_debug_log,
610 ", giving up...\n"));
611 goto fail_finish;
612 }
613 DEBUG_r(PerlIO_printf(Perl_debug_log,
614 ", trying anchored starting at offset %ld...\n",
30944b6d 615 (long)(s1 + 1 - i_strpos)));
803ff556 616 other_last = last;
1aa99e6b 617 s = HOP3c(t, 1, strend);
2c2d71f5
JH
618 goto restart;
619 }
620 else {
621 DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
30944b6d 622 (long)(s - i_strpos)));
803ff556 623 other_last = s; /* Fix this later. --Hugo */
30944b6d 624 s = s1;
2c2d71f5
JH
625 if (t == strpos)
626 goto try_at_start;
2c2d71f5
JH
627 goto try_at_offset;
628 }
629 }
cad2e5aa 630 }
2c2d71f5
JH
631
632 t = s - prog->check_offset_max;
2c2d71f5
JH
633 if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
634 && (!(prog->reganch & ROPT_UTF8)
1aa99e6b
IH
635 || ((t = reghopmaybe3_c(s, -prog->check_offset_max, strpos))
636 && t > strpos))) {
2c2d71f5
JH
637 /* Fixed substring is found far enough so that the match
638 cannot start at strpos. */
639 try_at_offset:
cad2e5aa 640 if (ml_anch && t[-1] != '\n') {
30944b6d
IZ
641 /* Eventually fbm_*() should handle this, but often
642 anchored_offset is not 0, so this check will not be wasted. */
643 /* XXXX In the code below we prefer to look for "^" even in
644 presence of anchored substrings. And we search even
645 beyond the found float position. These pessimizations
646 are historical artefacts only. */
647 find_anchor:
2c2d71f5 648 while (t < strend - prog->minlen) {
cad2e5aa 649 if (*t == '\n') {
4ee3650e 650 if (t < check_at - prog->check_offset_min) {
30944b6d 651 if (prog->anchored_substr) {
4ee3650e
GS
652 /* Since we moved from the found position,
653 we definitely contradict the found anchored
30944b6d
IZ
654 substr. Due to the above check we do not
655 contradict "check" substr.
656 Thus we can arrive here only if check substr
657 is float. Redo checking for "other"=="fixed".
658 */
9041c2e3 659 strpos = t + 1;
30944b6d
IZ
660 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
661 PL_colors[0],PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
662 goto do_other_anchored;
663 }
4ee3650e
GS
664 /* We don't contradict the found floating substring. */
665 /* XXXX Why not check for STCLASS? */
cad2e5aa 666 s = t + 1;
2c2d71f5 667 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
30944b6d 668 PL_colors[0],PL_colors[1], (long)(s - i_strpos)));
cad2e5aa
JH
669 goto set_useful;
670 }
4ee3650e
GS
671 /* Position contradicts check-string */
672 /* XXXX probably better to look for check-string
673 than for "\n", so one should lower the limit for t? */
674 DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
30944b6d 675 PL_colors[0],PL_colors[1], (long)(t + 1 - i_strpos)));
0e41cd87 676 other_last = strpos = s = t + 1;
cad2e5aa
JH
677 goto restart;
678 }
679 t++;
680 }
2c2d71f5
JH
681 DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
682 PL_colors[0],PL_colors[1]));
683 goto fail_finish;
cad2e5aa 684 }
f5952150
GS
685 else {
686 DEBUG_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
687 PL_colors[0],PL_colors[1]));
688 }
cad2e5aa
JH
689 s = t;
690 set_useful:
2c2d71f5 691 ++BmUSEFUL(prog->check_substr); /* hooray/5 */
cad2e5aa
JH
692 }
693 else {
f5952150 694 /* The found string does not prohibit matching at strpos,
2c2d71f5 695 - no optimization of calling REx engine can be performed,
f5952150
GS
696 unless it was an MBOL and we are not after MBOL,
697 or a future STCLASS check will fail this. */
2c2d71f5
JH
698 try_at_start:
699 /* Even in this situation we may use MBOL flag if strpos is offset
700 wrt the start of the string. */
05b4157f 701 if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
a1933d95 702 && (strpos != strbeg) && strpos[-1] != '\n'
d506a20d
IZ
703 /* May be due to an implicit anchor of m{.*foo} */
704 && !(prog->reganch & ROPT_IMPLICIT))
705 {
cad2e5aa
JH
706 t = strpos;
707 goto find_anchor;
708 }
30944b6d 709 DEBUG_r( if (ml_anch)
f5952150
GS
710 PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
711 (long)(strpos - i_strpos), PL_colors[0],PL_colors[1]);
30944b6d 712 );
2c2d71f5 713 success_at_start:
30944b6d 714 if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */
66e933ab 715 && prog->check_substr /* Could be deleted already */
cad2e5aa 716 && --BmUSEFUL(prog->check_substr) < 0
66e933ab
GS
717 && prog->check_substr == prog->float_substr)
718 {
cad2e5aa 719 /* If flags & SOMETHING - do not do it many times on the same match */
f5952150 720 DEBUG_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
cad2e5aa
JH
721 SvREFCNT_dec(prog->check_substr);
722 prog->check_substr = Nullsv; /* disable */
723 prog->float_substr = Nullsv; /* clear */
5e39e1e5 724 check = Nullsv; /* abort */
cad2e5aa 725 s = strpos;
3cf5c195
IZ
726 /* XXXX This is a remnant of the old implementation. It
727 looks wasteful, since now INTUIT can use many
6eb5f6b9 728 other heuristics. */
cad2e5aa
JH
729 prog->reganch &= ~RE_USE_INTUIT;
730 }
731 else
732 s = strpos;
733 }
734
6eb5f6b9
JH
735 /* Last resort... */
736 /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
737 if (prog->regstclass) {
738 /* minlen == 0 is possible if regstclass is \b or \B,
739 and the fixed substr is ''$.
740 Since minlen is already taken into account, s+1 is before strend;
741 accidentally, minlen >= 1 guaranties no false positives at s + 1
742 even for \b or \B. But (minlen? 1 : 0) below assumes that
743 regstclass does not come from lookahead... */
744 /* If regstclass takes bytelength more than 1: If charlength==1, OK.
745 This leaves EXACTF only, which is dealt with in find_byclass(). */
1aa99e6b 746 U8* str = (U8*)STRING(prog->regstclass);
66e933ab 747 int cl_l = (PL_regkind[(U8)OP(prog->regstclass)] == EXACT
1aa99e6b 748 ? CHR_DIST(str+STR_LEN(prog->regstclass), str)
66e933ab 749 : 1);
6eb5f6b9 750 char *endpos = (prog->anchored_substr || ml_anch)
1aa99e6b
IH
751 ? HOP3c(s, (prog->minlen ? cl_l : 0), strend)
752 : (prog->float_substr
753 ? HOP3c(HOP3c(check_at, -start_shift, strbeg),
754 cl_l, strend)
755 : strend);
a1933d95 756 char *startpos = strbeg;
6eb5f6b9
JH
757
758 t = s;
76384e4a 759 if (prog->reganch & ROPT_UTF8) {
ffc61ed2 760 PL_regdata = prog->data;
76384e4a
GS
761 PL_bostr = startpos;
762 }
6eb5f6b9
JH
763 s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
764 if (!s) {
765#ifdef DEBUGGING
766 char *what;
767#endif
768 if (endpos == strend) {
769 DEBUG_r( PerlIO_printf(Perl_debug_log,
770 "Could not match STCLASS...\n") );
771 goto fail;
772 }
66e933ab
GS
773 DEBUG_r( PerlIO_printf(Perl_debug_log,
774 "This position contradicts STCLASS...\n") );
653099ff
GS
775 if ((prog->reganch & ROPT_ANCH) && !ml_anch)
776 goto fail;
6eb5f6b9
JH
777 /* Contradict one of substrings */
778 if (prog->anchored_substr) {
6eb5f6b9
JH
779 if (prog->anchored_substr == check) {
780 DEBUG_r( what = "anchored" );
781 hop_and_restart:
1aa99e6b 782 s = HOP3c(t, 1, strend);
66e933ab
GS
783 if (s + start_shift + end_shift > strend) {
784 /* XXXX Should be taken into account earlier? */
785 DEBUG_r( PerlIO_printf(Perl_debug_log,
786 "Could not match STCLASS...\n") );
787 goto fail;
788 }
5e39e1e5
HS
789 if (!check)
790 goto giveup;
6eb5f6b9 791 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 792 "Looking for %s substr starting at offset %ld...\n",
6eb5f6b9
JH
793 what, (long)(s + start_shift - i_strpos)) );
794 goto restart;
795 }
66e933ab 796 /* Have both, check_string is floating */
6eb5f6b9
JH
797 if (t + start_shift >= check_at) /* Contradicts floating=check */
798 goto retry_floating_check;
799 /* Recheck anchored substring, but not floating... */
9041c2e3 800 s = check_at;
5e39e1e5
HS
801 if (!check)
802 goto giveup;
6eb5f6b9 803 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150 804 "Looking for anchored substr starting at offset %ld...\n",
6eb5f6b9
JH
805 (long)(other_last - i_strpos)) );
806 goto do_other_anchored;
807 }
60e71179
GS
808 /* Another way we could have checked stclass at the
809 current position only: */
810 if (ml_anch) {
811 s = t = t + 1;
5e39e1e5
HS
812 if (!check)
813 goto giveup;
60e71179 814 DEBUG_r( PerlIO_printf(Perl_debug_log,
f5952150
GS
815 "Looking for /%s^%s/m starting at offset %ld...\n",
816 PL_colors[0],PL_colors[1], (long)(t - i_strpos)) );
60e71179 817 goto try_at_offset;
66e933ab 818 }
60e71179
GS
819 if (!prog->float_substr) /* Could have been deleted */
820 goto fail;
6eb5f6b9
JH
821 /* Check is floating subtring. */
822 retry_floating_check:
823 t = check_at - start_shift;
824 DEBUG_r( what = "floating" );
825 goto hop_and_restart;
826 }
827 DEBUG_r( if (t != s)
9041c2e3 828 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
829 "By STCLASS: moving %ld --> %ld\n",
830 (long)(t - i_strpos), (long)(s - i_strpos));
831 else
9041c2e3 832 PerlIO_printf(Perl_debug_log,
6eb5f6b9
JH
833 "Does not contradict STCLASS...\n") );
834 }
5e39e1e5
HS
835 giveup:
836 DEBUG_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
837 PL_colors[4], (check ? "Guessed" : "Giving up"),
838 PL_colors[5], (long)(s - i_strpos)) );
cad2e5aa 839 return s;
2c2d71f5
JH
840
841 fail_finish: /* Substring not found */
66e933ab
GS
842 if (prog->check_substr) /* could be removed already */
843 BmUSEFUL(prog->check_substr) += 5; /* hooray */
cad2e5aa 844 fail:
2c2d71f5 845 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
cad2e5aa
JH
846 PL_colors[4],PL_colors[5]));
847 return Nullch;
848}
9661b544 849
6eb5f6b9 850/* We know what class REx starts with. Try to find this position... */
3c3eec57
GS
851STATIC char *
852S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *startpos, I32 norun)
a687059c 853{
6eb5f6b9
JH
854 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
855 char *m;
d8093b23
G
856 STRLEN ln;
857 unsigned int c1;
858 unsigned int c2;
6eb5f6b9
JH
859 char *e;
860 register I32 tmp = 1; /* Scratch variable? */
ffc61ed2 861 register bool do_utf8 = DO_UTF8(PL_reg_sv);
cad2e5aa 862
6eb5f6b9
JH
863 /* We know what class it must start with. */
864 switch (OP(c)) {
6eb5f6b9 865 case ANYOF:
a0ed51b3 866 while (s < strend) {
ffc61ed2 867 if (reginclass(c, (U8*)s, do_utf8)) {
6eb5f6b9
JH
868 if (tmp && (norun || regtry(prog, s)))
869 goto got_it;
870 else
871 tmp = doevery;
a0ed51b3 872 }
6eb5f6b9
JH
873 else
874 tmp = 1;
ffc61ed2 875 s += do_utf8 ? UTF8SKIP(s) : 1;
a0d0e21e 876 }
6eb5f6b9
JH
877 break;
878 case EXACTF:
879 m = STRING(c);
880 ln = STR_LEN(c);
1aa99e6b
IH
881 if (UTF) {
882 c1 = to_utf8_lower((U8*)m);
883 c2 = to_utf8_upper((U8*)m);
884 }
885 else {
886 c1 = *(U8*)m;
887 c2 = PL_fold[c1];
888 }
6eb5f6b9
JH
889 goto do_exactf;
890 case EXACTFL:
891 m = STRING(c);
892 ln = STR_LEN(c);
d8093b23 893 c1 = *(U8*)m;
6eb5f6b9
JH
894 c2 = PL_fold_locale[c1];
895 do_exactf:
896 e = strend - ln;
b3c9acc1 897
6eb5f6b9
JH
898 if (norun && e < s)
899 e = s; /* Due to minlen logic of intuit() */
1aa99e6b
IH
900
901 if (do_utf8) {
902 STRLEN len;
903 if (c1 == c2)
904 while (s <= e) {
9041c2e3 905 if ( utf8_to_uvchr((U8*)s, &len) == c1
1aa99e6b
IH
906 && regtry(prog, s) )
907 goto got_it;
908 s += len;
909 }
910 else
911 while (s <= e) {
9041c2e3 912 UV c = utf8_to_uvchr((U8*)s, &len);
1aa99e6b
IH
913 if ( (c == c1 || c == c2) && regtry(prog, s) )
914 goto got_it;
915 s += len;
916 }
917 }
918 else {
919 if (c1 == c2)
920 while (s <= e) {
921 if ( *(U8*)s == c1
922 && (ln == 1 || !(OP(c) == EXACTF
923 ? ibcmp(s, m, ln)
924 : ibcmp_locale(s, m, ln)))
925 && (norun || regtry(prog, s)) )
926 goto got_it;
927 s++;
928 }
929 else
930 while (s <= e) {
931 if ( (*(U8*)s == c1 || *(U8*)s == c2)
932 && (ln == 1 || !(OP(c) == EXACTF
933 ? ibcmp(s, m, ln)
934 : ibcmp_locale(s, m, ln)))
935 && (norun || regtry(prog, s)) )
936 goto got_it;
937 s++;
938 }
b3c9acc1
IZ
939 }
940 break;
bbce6d69 941 case BOUNDL:
3280af22 942 PL_reg_flags |= RF_tainted;
bbce6d69 943 /* FALL THROUGH */
a0d0e21e 944 case BOUND:
ffc61ed2
JH
945 if (do_utf8) {
946 if (s == startpos)
947 tmp = '\n';
948 else {
1aa99e6b 949 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 950
2b9d42f0 951 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
952 }
953 tmp = ((OP(c) == BOUND ?
9041c2e3 954 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 955 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
956 while (s < strend) {
957 if (tmp == !(OP(c) == BOUND ?
958 swash_fetch(PL_utf8_alnum, (U8*)s) :
959 isALNUM_LC_utf8((U8*)s)))
960 {
961 tmp = !tmp;
962 if ((norun || regtry(prog, s)))
963 goto got_it;
964 }
965 s += UTF8SKIP(s);
a687059c 966 }
a0d0e21e 967 }
667bb95a 968 else {
ffc61ed2
JH
969 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
970 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
971 while (s < strend) {
972 if (tmp ==
973 !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
974 tmp = !tmp;
975 if ((norun || regtry(prog, s)))
976 goto got_it;
977 }
978 s++;
a0ed51b3 979 }
a0ed51b3 980 }
6eb5f6b9 981 if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
982 goto got_it;
983 break;
bbce6d69 984 case NBOUNDL:
3280af22 985 PL_reg_flags |= RF_tainted;
bbce6d69 986 /* FALL THROUGH */
a0d0e21e 987 case NBOUND:
ffc61ed2
JH
988 if (do_utf8) {
989 if (s == startpos)
990 tmp = '\n';
991 else {
1aa99e6b 992 U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
9041c2e3 993
2b9d42f0 994 tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
995 }
996 tmp = ((OP(c) == NBOUND ?
9041c2e3 997 isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
8269fa76 998 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
999 while (s < strend) {
1000 if (tmp == !(OP(c) == NBOUND ?
1001 swash_fetch(PL_utf8_alnum, (U8*)s) :
1002 isALNUM_LC_utf8((U8*)s)))
1003 tmp = !tmp;
1004 else if ((norun || regtry(prog, s)))
1005 goto got_it;
1006 s += UTF8SKIP(s);
1007 }
a0d0e21e 1008 }
667bb95a 1009 else {
ffc61ed2
JH
1010 tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
1011 tmp = ((OP(c) == NBOUND ?
1012 isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1013 while (s < strend) {
1014 if (tmp ==
1015 !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1016 tmp = !tmp;
1017 else if ((norun || regtry(prog, s)))
1018 goto got_it;
1019 s++;
1020 }
a0ed51b3 1021 }
6eb5f6b9 1022 if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
a0ed51b3
LW
1023 goto got_it;
1024 break;
a0d0e21e 1025 case ALNUM:
ffc61ed2 1026 if (do_utf8) {
8269fa76 1027 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1028 while (s < strend) {
1029 if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
1030 if (tmp && (norun || regtry(prog, s)))
1031 goto got_it;
1032 else
1033 tmp = doevery;
1034 }
bbce6d69 1035 else
ffc61ed2
JH
1036 tmp = 1;
1037 s += UTF8SKIP(s);
bbce6d69 1038 }
bbce6d69 1039 }
ffc61ed2
JH
1040 else {
1041 while (s < strend) {
1042 if (isALNUM(*s)) {
1043 if (tmp && (norun || regtry(prog, s)))
1044 goto got_it;
1045 else
1046 tmp = doevery;
1047 }
a0ed51b3 1048 else
ffc61ed2
JH
1049 tmp = 1;
1050 s++;
a0ed51b3 1051 }
a0ed51b3
LW
1052 }
1053 break;
bbce6d69 1054 case ALNUML:
3280af22 1055 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1056 if (do_utf8) {
1057 while (s < strend) {
1058 if (isALNUM_LC_utf8((U8*)s)) {
1059 if (tmp && (norun || regtry(prog, s)))
1060 goto got_it;
1061 else
1062 tmp = doevery;
1063 }
a687059c 1064 else
ffc61ed2
JH
1065 tmp = 1;
1066 s += UTF8SKIP(s);
a0d0e21e 1067 }
a0d0e21e 1068 }
ffc61ed2
JH
1069 else {
1070 while (s < strend) {
1071 if (isALNUM_LC(*s)) {
1072 if (tmp && (norun || regtry(prog, s)))
1073 goto got_it;
1074 else
1075 tmp = doevery;
1076 }
a0ed51b3 1077 else
ffc61ed2
JH
1078 tmp = 1;
1079 s++;
a0ed51b3 1080 }
a0ed51b3
LW
1081 }
1082 break;
a0d0e21e 1083 case NALNUM:
ffc61ed2 1084 if (do_utf8) {
8269fa76 1085 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
1086 while (s < strend) {
1087 if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
1088 if (tmp && (norun || regtry(prog, s)))
1089 goto got_it;
1090 else
1091 tmp = doevery;
1092 }
bbce6d69 1093 else
ffc61ed2
JH
1094 tmp = 1;
1095 s += UTF8SKIP(s);
bbce6d69 1096 }
bbce6d69 1097 }
ffc61ed2
JH
1098 else {
1099 while (s < strend) {
1100 if (!isALNUM(*s)) {
1101 if (tmp && (norun || regtry(prog, s)))
1102 goto got_it;
1103 else
1104 tmp = doevery;
1105 }
a0ed51b3 1106 else
ffc61ed2
JH
1107 tmp = 1;
1108 s++;
a0ed51b3 1109 }
a0ed51b3
LW
1110 }
1111 break;
bbce6d69 1112 case NALNUML:
3280af22 1113 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1114 if (do_utf8) {
1115 while (s < strend) {
1116 if (!isALNUM_LC_utf8((U8*)s)) {
1117 if (tmp && (norun || regtry(prog, s)))
1118 goto got_it;
1119 else
1120 tmp = doevery;
1121 }
a687059c 1122 else
ffc61ed2
JH
1123 tmp = 1;
1124 s += UTF8SKIP(s);
a687059c 1125 }
a0d0e21e 1126 }
ffc61ed2
JH
1127 else {
1128 while (s < strend) {
1129 if (!isALNUM_LC(*s)) {
1130 if (tmp && (norun || regtry(prog, s)))
1131 goto got_it;
1132 else
1133 tmp = doevery;
1134 }
a0ed51b3 1135 else
ffc61ed2
JH
1136 tmp = 1;
1137 s++;
a0ed51b3 1138 }
a0ed51b3
LW
1139 }
1140 break;
a0d0e21e 1141 case SPACE:
ffc61ed2 1142 if (do_utf8) {
8269fa76 1143 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2
JH
1144 while (s < strend) {
1145 if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
1146 if (tmp && (norun || regtry(prog, s)))
1147 goto got_it;
1148 else
1149 tmp = doevery;
1150 }
a0d0e21e 1151 else
ffc61ed2
JH
1152 tmp = 1;
1153 s += UTF8SKIP(s);
2304df62 1154 }
a0d0e21e 1155 }
ffc61ed2
JH
1156 else {
1157 while (s < strend) {
1158 if (isSPACE(*s)) {
1159 if (tmp && (norun || regtry(prog, s)))
1160 goto got_it;
1161 else
1162 tmp = doevery;
1163 }
a0ed51b3 1164 else
ffc61ed2
JH
1165 tmp = 1;
1166 s++;
a0ed51b3 1167 }
a0ed51b3
LW
1168 }
1169 break;
bbce6d69 1170 case SPACEL:
3280af22 1171 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1172 if (do_utf8) {
1173 while (s < strend) {
1174 if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
1175 if (tmp && (norun || regtry(prog, s)))
1176 goto got_it;
1177 else
1178 tmp = doevery;
1179 }
bbce6d69 1180 else
ffc61ed2
JH
1181 tmp = 1;
1182 s += UTF8SKIP(s);
bbce6d69 1183 }
bbce6d69 1184 }
ffc61ed2
JH
1185 else {
1186 while (s < strend) {
1187 if (isSPACE_LC(*s)) {
1188 if (tmp && (norun || regtry(prog, s)))
1189 goto got_it;
1190 else
1191 tmp = doevery;
1192 }
a0ed51b3 1193 else
ffc61ed2
JH
1194 tmp = 1;
1195 s++;
a0ed51b3 1196 }
a0ed51b3
LW
1197 }
1198 break;
a0d0e21e 1199 case NSPACE:
ffc61ed2 1200 if (do_utf8) {
8269fa76 1201 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2
JH
1202 while (s < strend) {
1203 if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
1204 if (tmp && (norun || regtry(prog, s)))
1205 goto got_it;
1206 else
1207 tmp = doevery;
1208 }
a0d0e21e 1209 else
ffc61ed2
JH
1210 tmp = 1;
1211 s += UTF8SKIP(s);
a687059c 1212 }
a0d0e21e 1213 }
ffc61ed2
JH
1214 else {
1215 while (s < strend) {
1216 if (!isSPACE(*s)) {
1217 if (tmp && (norun || regtry(prog, s)))
1218 goto got_it;
1219 else
1220 tmp = doevery;
1221 }
a0ed51b3 1222 else
ffc61ed2
JH
1223 tmp = 1;
1224 s++;
a0ed51b3 1225 }
a0ed51b3
LW
1226 }
1227 break;
bbce6d69 1228 case NSPACEL:
3280af22 1229 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1230 if (do_utf8) {
1231 while (s < strend) {
1232 if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
1233 if (tmp && (norun || regtry(prog, s)))
1234 goto got_it;
1235 else
1236 tmp = doevery;
1237 }
bbce6d69 1238 else
ffc61ed2
JH
1239 tmp = 1;
1240 s += UTF8SKIP(s);
bbce6d69 1241 }
bbce6d69 1242 }
ffc61ed2
JH
1243 else {
1244 while (s < strend) {
1245 if (!isSPACE_LC(*s)) {
1246 if (tmp && (norun || regtry(prog, s)))
1247 goto got_it;
1248 else
1249 tmp = doevery;
1250 }
a0ed51b3 1251 else
ffc61ed2
JH
1252 tmp = 1;
1253 s++;
a0ed51b3 1254 }
a0ed51b3
LW
1255 }
1256 break;
a0d0e21e 1257 case DIGIT:
ffc61ed2 1258 if (do_utf8) {
8269fa76 1259 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2
JH
1260 while (s < strend) {
1261 if (swash_fetch(PL_utf8_digit,(U8*)s)) {
1262 if (tmp && (norun || regtry(prog, s)))
1263 goto got_it;
1264 else
1265 tmp = doevery;
1266 }
a0d0e21e 1267 else
ffc61ed2
JH
1268 tmp = 1;
1269 s += UTF8SKIP(s);
2b69d0c2 1270 }
a0d0e21e 1271 }
ffc61ed2
JH
1272 else {
1273 while (s < strend) {
1274 if (isDIGIT(*s)) {
1275 if (tmp && (norun || regtry(prog, s)))
1276 goto got_it;
1277 else
1278 tmp = doevery;
1279 }
a0ed51b3 1280 else
ffc61ed2
JH
1281 tmp = 1;
1282 s++;
a0ed51b3 1283 }
a0ed51b3
LW
1284 }
1285 break;
b8c5462f
JH
1286 case DIGITL:
1287 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1288 if (do_utf8) {
1289 while (s < strend) {
1290 if (isDIGIT_LC_utf8((U8*)s)) {
1291 if (tmp && (norun || regtry(prog, s)))
1292 goto got_it;
1293 else
1294 tmp = doevery;
1295 }
b8c5462f 1296 else
ffc61ed2
JH
1297 tmp = 1;
1298 s += UTF8SKIP(s);
b8c5462f 1299 }
b8c5462f 1300 }
ffc61ed2
JH
1301 else {
1302 while (s < strend) {
1303 if (isDIGIT_LC(*s)) {
1304 if (tmp && (norun || regtry(prog, s)))
1305 goto got_it;
1306 else
1307 tmp = doevery;
1308 }
b8c5462f 1309 else
ffc61ed2
JH
1310 tmp = 1;
1311 s++;
b8c5462f 1312 }
b8c5462f
JH
1313 }
1314 break;
a0d0e21e 1315 case NDIGIT:
ffc61ed2 1316 if (do_utf8) {
8269fa76 1317 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2
JH
1318 while (s < strend) {
1319 if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
1320 if (tmp && (norun || regtry(prog, s)))
1321 goto got_it;
1322 else
1323 tmp = doevery;
1324 }
a0d0e21e 1325 else
ffc61ed2
JH
1326 tmp = 1;
1327 s += UTF8SKIP(s);
a687059c 1328 }
a0d0e21e 1329 }
ffc61ed2
JH
1330 else {
1331 while (s < strend) {
1332 if (!isDIGIT(*s)) {
1333 if (tmp && (norun || regtry(prog, s)))
1334 goto got_it;
1335 else
1336 tmp = doevery;
1337 }
a0ed51b3 1338 else
ffc61ed2
JH
1339 tmp = 1;
1340 s++;
a0ed51b3 1341 }
a0ed51b3
LW
1342 }
1343 break;
b8c5462f
JH
1344 case NDIGITL:
1345 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
1346 if (do_utf8) {
1347 while (s < strend) {
1348 if (!isDIGIT_LC_utf8((U8*)s)) {
1349 if (tmp && (norun || regtry(prog, s)))
1350 goto got_it;
1351 else
1352 tmp = doevery;
1353 }
b8c5462f 1354 else
ffc61ed2
JH
1355 tmp = 1;
1356 s += UTF8SKIP(s);
b8c5462f 1357 }
a0ed51b3 1358 }
ffc61ed2
JH
1359 else {
1360 while (s < strend) {
1361 if (!isDIGIT_LC(*s)) {
1362 if (tmp && (norun || regtry(prog, s)))
1363 goto got_it;
1364 else
1365 tmp = doevery;
1366 }
cf93c79d 1367 else
ffc61ed2
JH
1368 tmp = 1;
1369 s++;
b8c5462f 1370 }
b8c5462f
JH
1371 }
1372 break;
b3c9acc1 1373 default:
3c3eec57
GS
1374 Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1375 break;
d6a28714 1376 }
6eb5f6b9
JH
1377 return 0;
1378 got_it:
1379 return s;
1380}
1381
1382/*
1383 - regexec_flags - match a regexp against a string
1384 */
1385I32
1386Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
1387 char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1388/* strend: pointer to null at end of string */
1389/* strbeg: real beginning of string */
1390/* minend: end of match must be >=minend after stringarg. */
1391/* data: May be used for some additional optimizations. */
1392/* nosave: For optimizations. */
1393{
6eb5f6b9
JH
1394 register char *s;
1395 register regnode *c;
1396 register char *startpos = stringarg;
6eb5f6b9
JH
1397 I32 minlen; /* must match at least this many chars */
1398 I32 dontbother = 0; /* how many characters not to try at end */
155aba94 1399 /* I32 start_shift = 0; */ /* Offset of the start to find
6eb5f6b9
JH
1400 constant substr. */ /* CC */
1401 I32 end_shift = 0; /* Same for the end. */ /* CC */
1402 I32 scream_pos = -1; /* Internal iterator of scream. */
1403 char *scream_olds;
1404 SV* oreplsv = GvSV(PL_replgv);
1aa99e6b 1405 bool do_utf8 = DO_UTF8(sv);
6eb5f6b9
JH
1406
1407 PL_regcc = 0;
1408
1409 cache_re(prog);
1410#ifdef DEBUGGING
aea4f609 1411 PL_regnarrate = DEBUG_r_TEST;
6eb5f6b9
JH
1412#endif
1413
1414 /* Be paranoid... */
1415 if (prog == NULL || startpos == NULL) {
1416 Perl_croak(aTHX_ "NULL regexp parameter");
1417 return 0;
1418 }
1419
1420 minlen = prog->minlen;
1aa99e6b 1421 if (do_utf8) {
3baa4c62
JH
1422 if (!(prog->reganch & ROPT_SANY_SEEN))
1423 if (utf8_distance((U8*)strend, (U8*)startpos) < minlen) goto phooey;
1aa99e6b
IH
1424 }
1425 else {
1426 if (strend - startpos < minlen) goto phooey;
1427 }
6eb5f6b9
JH
1428
1429 if (startpos == strbeg) /* is ^ valid at stringarg? */
1430 PL_regprev = '\n';
1431 else {
1aa99e6b
IH
1432 if (prog->reganch & ROPT_UTF8 && do_utf8) {
1433 U8 *s = reghop3((U8*)stringarg, -1, (U8*)strbeg);
2b9d42f0 1434 PL_regprev = utf8n_to_uvchr(s, (U8*)stringarg - s, NULL, 0);
1aa99e6b
IH
1435 }
1436 else
1437 PL_regprev = (U32)stringarg[-1];
6eb5f6b9
JH
1438 if (!PL_multiline && PL_regprev == '\n')
1439 PL_regprev = '\0'; /* force ^ to NOT match */
1440 }
1441
1442 /* Check validity of program. */
1443 if (UCHARAT(prog->program) != REG_MAGIC) {
1444 Perl_croak(aTHX_ "corrupted regexp program");
1445 }
1446
1447 PL_reg_flags = 0;
1448 PL_reg_eval_set = 0;
1449 PL_reg_maxiter = 0;
1450
1451 if (prog->reganch & ROPT_UTF8)
1452 PL_reg_flags |= RF_utf8;
1453
1454 /* Mark beginning of line for ^ and lookbehind. */
1455 PL_regbol = startpos;
1456 PL_bostr = strbeg;
1457 PL_reg_sv = sv;
1458
1459 /* Mark end of line for $ (and such) */
1460 PL_regeol = strend;
1461
1462 /* see how far we have to get to not match where we matched before */
1463 PL_regtill = startpos+minend;
1464
1465 /* We start without call_cc context. */
1466 PL_reg_call_cc = 0;
1467
1468 /* If there is a "must appear" string, look for it. */
1469 s = startpos;
1470
1471 if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
1472 MAGIC *mg;
1473
1474 if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1475 PL_reg_ganch = startpos;
1476 else if (sv && SvTYPE(sv) >= SVt_PVMG
1477 && SvMAGIC(sv)
1478 && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
1479 PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
1480 if (prog->reganch & ROPT_ANCH_GPOS) {
1481 if (s > PL_reg_ganch)
1482 goto phooey;
1483 s = PL_reg_ganch;
1484 }
1485 }
1486 else /* pos() not defined */
1487 PL_reg_ganch = strbeg;
1488 }
1489
699c3c34
JH
1490 if (do_utf8 == (UTF!=0) &&
1491 !(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
6eb5f6b9
JH
1492 re_scream_pos_data d;
1493
1494 d.scream_olds = &scream_olds;
1495 d.scream_pos = &scream_pos;
1496 s = re_intuit_start(prog, sv, s, strend, flags, &d);
1497 if (!s)
1498 goto phooey; /* not present */
1499 }
1500
1501 DEBUG_r( if (!PL_colorset) reginitcolors() );
1502 DEBUG_r(PerlIO_printf(Perl_debug_log,
1503 "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
1504 PL_colors[4],PL_colors[5],PL_colors[0],
1505 prog->precomp,
1506 PL_colors[1],
1507 (strlen(prog->precomp) > 60 ? "..." : ""),
1508 PL_colors[0],
1509 (int)(strend - startpos > 60 ? 60 : strend - startpos),
1510 startpos, PL_colors[1],
1511 (strend - startpos > 60 ? "..." : ""))
1512 );
1513
1514 /* Simplest case: anchored match need be tried only once. */
1515 /* [unless only anchor is BOL and multiline is set] */
1516 if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
1517 if (s == startpos && regtry(prog, startpos))
1518 goto got_it;
1519 else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
1520 || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
1521 {
1522 char *end;
1523
1524 if (minlen)
1525 dontbother = minlen - 1;
1aa99e6b 1526 end = HOP3c(strend, -dontbother, strbeg) - 1;
6eb5f6b9
JH
1527 /* for multiline we only have to try after newlines */
1528 if (prog->check_substr) {
1529 if (s == startpos)
1530 goto after_try;
1531 while (1) {
1532 if (regtry(prog, s))
1533 goto got_it;
1534 after_try:
1535 if (s >= end)
1536 goto phooey;
1537 if (prog->reganch & RE_USE_INTUIT) {
1538 s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1539 if (!s)
1540 goto phooey;
1541 }
1542 else
1543 s++;
1544 }
1545 } else {
1546 if (s > startpos)
1547 s--;
1548 while (s < end) {
1549 if (*s++ == '\n') { /* don't need PL_utf8skip here */
1550 if (regtry(prog, s))
1551 goto got_it;
1552 }
1553 }
1554 }
1555 }
1556 goto phooey;
1557 } else if (prog->reganch & ROPT_ANCH_GPOS) {
1558 if (regtry(prog, PL_reg_ganch))
1559 goto got_it;
1560 goto phooey;
1561 }
1562
1563 /* Messy cases: unanchored match. */
9041c2e3 1564 if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
6eb5f6b9
JH
1565 /* we have /x+whatever/ */
1566 /* it must be a one character string (XXXX Except UTF?) */
1567 char ch = SvPVX(prog->anchored_substr)[0];
bf93d4cc
GS
1568#ifdef DEBUGGING
1569 int did_match = 0;
1570#endif
1571
1aa99e6b 1572 if (do_utf8) {
6eb5f6b9
JH
1573 while (s < strend) {
1574 if (*s == ch) {
bf93d4cc 1575 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1576 if (regtry(prog, s)) goto got_it;
1577 s += UTF8SKIP(s);
1578 while (s < strend && *s == ch)
1579 s += UTF8SKIP(s);
1580 }
1581 s += UTF8SKIP(s);
1582 }
1583 }
1584 else {
1585 while (s < strend) {
1586 if (*s == ch) {
bf93d4cc 1587 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1588 if (regtry(prog, s)) goto got_it;
1589 s++;
1590 while (s < strend && *s == ch)
1591 s++;
1592 }
1593 s++;
1594 }
1595 }
bf93d4cc
GS
1596 DEBUG_r(did_match ||
1597 PerlIO_printf(Perl_debug_log,
1598 "Did not find anchored character...\n"));
6eb5f6b9
JH
1599 }
1600 /*SUPPRESS 560*/
1aa99e6b
IH
1601 else if (do_utf8 == (UTF!=0) &&
1602 (prog->anchored_substr != Nullsv
9041c2e3 1603 || (prog->float_substr != Nullsv
1aa99e6b 1604 && prog->float_max_offset < strend - s))) {
9041c2e3 1605 SV *must = prog->anchored_substr
6eb5f6b9 1606 ? prog->anchored_substr : prog->float_substr;
9041c2e3 1607 I32 back_max =
6eb5f6b9 1608 prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
9041c2e3 1609 I32 back_min =
6eb5f6b9 1610 prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
1aa99e6b 1611 char *last = HOP3c(strend, /* Cannot start after this */
6eb5f6b9 1612 -(I32)(CHR_SVLEN(must)
1aa99e6b 1613 - (SvTAIL(must) != 0) + back_min), strbeg);
6eb5f6b9 1614 char *last1; /* Last position checked before */
bf93d4cc
GS
1615#ifdef DEBUGGING
1616 int did_match = 0;
1617#endif
6eb5f6b9
JH
1618
1619 if (s > PL_bostr)
1620 last1 = HOPc(s, -1);
1621 else
1622 last1 = s - 1; /* bogus */
1623
1624 /* XXXX check_substr already used to find `s', can optimize if
1625 check_substr==must. */
1626 scream_pos = -1;
1627 dontbother = end_shift;
1628 strend = HOPc(strend, -dontbother);
1629 while ( (s <= last) &&
9041c2e3 1630 ((flags & REXEC_SCREAM)
1aa99e6b 1631 ? (s = screaminstr(sv, must, HOP3c(s, back_min, strend) - strbeg,
6eb5f6b9 1632 end_shift, &scream_pos, 0))
1aa99e6b 1633 : (s = fbm_instr((unsigned char*)HOP3(s, back_min, strend),
9041c2e3 1634 (unsigned char*)strend, must,
6eb5f6b9 1635 PL_multiline ? FBMrf_MULTILINE : 0))) ) {
bf93d4cc 1636 DEBUG_r( did_match = 1 );
6eb5f6b9
JH
1637 if (HOPc(s, -back_max) > last1) {
1638 last1 = HOPc(s, -back_min);
1639 s = HOPc(s, -back_max);
1640 }
1641 else {
1642 char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
1643
1644 last1 = HOPc(s, -back_min);
1645 s = t;
1646 }
1aa99e6b 1647 if (do_utf8) {
6eb5f6b9
JH
1648 while (s <= last1) {
1649 if (regtry(prog, s))
1650 goto got_it;
1651 s += UTF8SKIP(s);
1652 }
1653 }
1654 else {
1655 while (s <= last1) {
1656 if (regtry(prog, s))
1657 goto got_it;
1658 s++;
1659 }
1660 }
1661 }
bf93d4cc
GS
1662 DEBUG_r(did_match ||
1663 PerlIO_printf(Perl_debug_log, "Did not find %s substr `%s%.*s%s'%s...\n",
1664 ((must == prog->anchored_substr)
1665 ? "anchored" : "floating"),
1666 PL_colors[0],
1667 (int)(SvCUR(must) - (SvTAIL(must)!=0)),
1668 SvPVX(must),
1669 PL_colors[1], (SvTAIL(must) ? "$" : "")));
6eb5f6b9
JH
1670 goto phooey;
1671 }
155aba94 1672 else if ((c = prog->regstclass)) {
66e933ab
GS
1673 if (minlen && PL_regkind[(U8)OP(prog->regstclass)] != EXACT)
1674 /* don't bother with what can't match */
6eb5f6b9 1675 strend = HOPc(strend, -(minlen - 1));
ffc61ed2
JH
1676 DEBUG_r({
1677 SV *prop = sv_newmortal();
1678 regprop(prop, c);
1679 PerlIO_printf(Perl_debug_log, "Matching stclass `%s' against `%s'\n", SvPVX(prop), s);
1680 });
6eb5f6b9
JH
1681 if (find_byclass(prog, c, s, strend, startpos, 0))
1682 goto got_it;
bf93d4cc 1683 DEBUG_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass...\n"));
d6a28714
JH
1684 }
1685 else {
1686 dontbother = 0;
1687 if (prog->float_substr != Nullsv) { /* Trim the end. */
1688 char *last;
d6a28714
JH
1689
1690 if (flags & REXEC_SCREAM) {
1691 last = screaminstr(sv, prog->float_substr, s - strbeg,
1692 end_shift, &scream_pos, 1); /* last one */
1693 if (!last)
ffc61ed2 1694 last = scream_olds; /* Only one occurrence. */
b8c5462f 1695 }
d6a28714
JH
1696 else {
1697 STRLEN len;
1698 char *little = SvPV(prog->float_substr, len);
1699
1700 if (SvTAIL(prog->float_substr)) {
1701 if (memEQ(strend - len + 1, little, len - 1))
1702 last = strend - len + 1;
1703 else if (!PL_multiline)
9041c2e3 1704 last = memEQ(strend - len, little, len)
d6a28714 1705 ? strend - len : Nullch;
b8c5462f 1706 else
d6a28714
JH
1707 goto find_last;
1708 } else {
1709 find_last:
9041c2e3 1710 if (len)
d6a28714 1711 last = rninstr(s, strend, little, little + len);
b8c5462f 1712 else
d6a28714 1713 last = strend; /* matching `$' */
b8c5462f 1714 }
b8c5462f 1715 }
bf93d4cc
GS
1716 if (last == NULL) {
1717 DEBUG_r(PerlIO_printf(Perl_debug_log,
1718 "%sCan't trim the tail, match fails (should not happen)%s\n",
1719 PL_colors[4],PL_colors[5]));
1720 goto phooey; /* Should not happen! */
1721 }
d6a28714
JH
1722 dontbother = strend - last + prog->float_min_offset;
1723 }
1724 if (minlen && (dontbother < minlen))
1725 dontbother = minlen - 1;
1726 strend -= dontbother; /* this one's always in bytes! */
1727 /* We don't know much -- general case. */
1aa99e6b 1728 if (do_utf8) {
d6a28714
JH
1729 for (;;) {
1730 if (regtry(prog, s))
1731 goto got_it;
1732 if (s >= strend)
1733 break;
b8c5462f 1734 s += UTF8SKIP(s);
d6a28714
JH
1735 };
1736 }
1737 else {
1738 do {
1739 if (regtry(prog, s))
1740 goto got_it;
1741 } while (s++ < strend);
1742 }
1743 }
1744
1745 /* Failure. */
1746 goto phooey;
1747
1748got_it:
1749 RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
1750
1751 if (PL_reg_eval_set) {
1752 /* Preserve the current value of $^R */
1753 if (oreplsv != GvSV(PL_replgv))
1754 sv_setsv(oreplsv, GvSV(PL_replgv));/* So that when GvSV(replgv) is
1755 restored, the value remains
1756 the same. */
1757 restore_pos(aTHXo_ 0);
1758 }
1759
1760 /* make sure $`, $&, $', and $digit will work later */
1761 if ( !(flags & REXEC_NOT_FIRST) ) {
1762 if (RX_MATCH_COPIED(prog)) {
1763 Safefree(prog->subbeg);
1764 RX_MATCH_COPIED_off(prog);
1765 }
1766 if (flags & REXEC_COPY_STR) {
1767 I32 i = PL_regeol - startpos + (stringarg - strbeg);
1768
1769 s = savepvn(strbeg, i);
1770 prog->subbeg = s;
1771 prog->sublen = i;
1772 RX_MATCH_COPIED_on(prog);
1773 }
1774 else {
1775 prog->subbeg = strbeg;
1776 prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
1777 }
1778 }
9041c2e3 1779
d6a28714
JH
1780 return 1;
1781
1782phooey:
bf93d4cc
GS
1783 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
1784 PL_colors[4],PL_colors[5]));
d6a28714
JH
1785 if (PL_reg_eval_set)
1786 restore_pos(aTHXo_ 0);
1787 return 0;
1788}
1789
1790/*
1791 - regtry - try match at specific point
1792 */
1793STATIC I32 /* 0 failure, 1 success */
1794S_regtry(pTHX_ regexp *prog, char *startpos)
1795{
d6a28714
JH
1796 register I32 i;
1797 register I32 *sp;
1798 register I32 *ep;
1799 CHECKPOINT lastcp;
1800
02db2b7b
IZ
1801#ifdef DEBUGGING
1802 PL_regindent = 0; /* XXXX Not good when matches are reenterable... */
1803#endif
d6a28714
JH
1804 if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
1805 MAGIC *mg;
1806
1807 PL_reg_eval_set = RS_init;
1808 DEBUG_r(DEBUG_s(
b900a521
JH
1809 PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %"IVdf"\n",
1810 (IV)(PL_stack_sp - PL_stack_base));
d6a28714 1811 ));
e8347627 1812 SAVEI32(cxstack[cxstack_ix].blk_oldsp);
d6a28714
JH
1813 cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
1814 /* Otherwise OP_NEXTSTATE will free whatever on stack now. */
1815 SAVETMPS;
1816 /* Apparently this is not needed, judging by wantarray. */
e8347627 1817 /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
d6a28714
JH
1818 cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
1819
1820 if (PL_reg_sv) {
1821 /* Make $_ available to executed code. */
1822 if (PL_reg_sv != DEFSV) {
1823 /* SAVE_DEFSV does *not* suffice here for USE_THREADS */
1824 SAVESPTR(DEFSV);
1825 DEFSV = PL_reg_sv;
b8c5462f 1826 }
d6a28714 1827
9041c2e3 1828 if (!(SvTYPE(PL_reg_sv) >= SVt_PVMG && SvMAGIC(PL_reg_sv)
d6a28714
JH
1829 && (mg = mg_find(PL_reg_sv, 'g')))) {
1830 /* prepare for quick setting of pos */
1831 sv_magic(PL_reg_sv, (SV*)0, 'g', Nullch, 0);
1832 mg = mg_find(PL_reg_sv, 'g');
1833 mg->mg_len = -1;
b8c5462f 1834 }
d6a28714
JH
1835 PL_reg_magic = mg;
1836 PL_reg_oldpos = mg->mg_len;
c76ac1ee 1837 SAVEDESTRUCTOR_X(restore_pos, 0);
d6a28714
JH
1838 }
1839 if (!PL_reg_curpm)
0f79a09d 1840 Newz(22,PL_reg_curpm, 1, PMOP);
d6a28714
JH
1841 PL_reg_curpm->op_pmregexp = prog;
1842 PL_reg_oldcurpm = PL_curpm;
1843 PL_curpm = PL_reg_curpm;
1844 if (RX_MATCH_COPIED(prog)) {
1845 /* Here is a serious problem: we cannot rewrite subbeg,
1846 since it may be needed if this match fails. Thus
1847 $` inside (?{}) could fail... */
1848 PL_reg_oldsaved = prog->subbeg;
1849 PL_reg_oldsavedlen = prog->sublen;
1850 RX_MATCH_COPIED_off(prog);
1851 }
1852 else
1853 PL_reg_oldsaved = Nullch;
1854 prog->subbeg = PL_bostr;
1855 prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
1856 }
1857 prog->startp[0] = startpos - PL_bostr;
1858 PL_reginput = startpos;
1859 PL_regstartp = prog->startp;
1860 PL_regendp = prog->endp;
1861 PL_reglastparen = &prog->lastparen;
1862 prog->lastparen = 0;
1863 PL_regsize = 0;
1864 DEBUG_r(PL_reg_starttry = startpos);
1865 if (PL_reg_start_tmpl <= prog->nparens) {
1866 PL_reg_start_tmpl = prog->nparens*3/2 + 3;
1867 if(PL_reg_start_tmp)
1868 Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1869 else
1870 New(22,PL_reg_start_tmp, PL_reg_start_tmpl, char*);
1871 }
1872
1873 /* XXXX What this code is doing here?!!! There should be no need
1874 to do this again and again, PL_reglastparen should take care of
3dd2943c 1875 this! --ilya*/
dafc8851
JH
1876
1877 /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
1878 * Actually, the code in regcppop() (which Ilya may be meaning by
daf18116
JH
1879 * PL_reglastparen), is not needed at all by the test suite
1880 * (op/regexp, op/pat, op/split), but that code is needed, oddly
1881 * enough, for building DynaLoader, or otherwise this
1882 * "Error: '*' not in typemap in DynaLoader.xs, line 164"
1883 * will happen. Meanwhile, this code *is* needed for the
1884 * above-mentioned test suite tests to succeed. The common theme
1885 * on those tests seems to be returning null fields from matches.
1886 * --jhi */
dafc8851 1887#if 1
d6a28714
JH
1888 sp = prog->startp;
1889 ep = prog->endp;
1890 if (prog->nparens) {
09e8ae3b 1891 for (i = prog->nparens; i > *PL_reglastparen; i--) {
d6a28714
JH
1892 *++sp = -1;
1893 *++ep = -1;
1894 }
1895 }
dafc8851 1896#endif
02db2b7b 1897 REGCP_SET(lastcp);
d6a28714
JH
1898 if (regmatch(prog->program + 1)) {
1899 prog->endp[0] = PL_reginput - PL_bostr;
1900 return 1;
1901 }
02db2b7b 1902 REGCP_UNWIND(lastcp);
d6a28714
JH
1903 return 0;
1904}
1905
02db2b7b
IZ
1906#define RE_UNWIND_BRANCH 1
1907#define RE_UNWIND_BRANCHJ 2
1908
1909union re_unwind_t;
1910
1911typedef struct { /* XX: makes sense to enlarge it... */
1912 I32 type;
1913 I32 prev;
1914 CHECKPOINT lastcp;
1915} re_unwind_generic_t;
1916
1917typedef struct {
1918 I32 type;
1919 I32 prev;
1920 CHECKPOINT lastcp;
1921 I32 lastparen;
1922 regnode *next;
1923 char *locinput;
1924 I32 nextchr;
1925#ifdef DEBUGGING
1926 int regindent;
1927#endif
1928} re_unwind_branch_t;
1929
1930typedef union re_unwind_t {
1931 I32 type;
1932 re_unwind_generic_t generic;
1933 re_unwind_branch_t branch;
1934} re_unwind_t;
1935
d6a28714
JH
1936/*
1937 - regmatch - main matching routine
1938 *
1939 * Conceptually the strategy is simple: check to see whether the current
1940 * node matches, call self recursively to see whether the rest matches,
1941 * and then act accordingly. In practice we make some effort to avoid
1942 * recursion, in particular by going through "ordinary" nodes (that don't
1943 * need to know whether the rest of the match failed) by a loop instead of
1944 * by recursion.
1945 */
1946/* [lwall] I've hoisted the register declarations to the outer block in order to
1947 * maybe save a little bit of pushing and popping on the stack. It also takes
1948 * advantage of machines that use a register save mask on subroutine entry.
1949 */
1950STATIC I32 /* 0 failure, 1 success */
1951S_regmatch(pTHX_ regnode *prog)
1952{
d6a28714
JH
1953 register regnode *scan; /* Current node. */
1954 regnode *next; /* Next node. */
1955 regnode *inner; /* Next node in internal branch. */
1956 register I32 nextchr; /* renamed nextchr - nextchar colides with
1957 function of same name */
1958 register I32 n; /* no or next */
1959 register I32 ln; /* len or last */
1960 register char *s; /* operand or save */
1961 register char *locinput = PL_reginput;
1962 register I32 c1, c2, paren; /* case fold search, parenth */
1963 int minmod = 0, sw = 0, logical = 0;
02db2b7b
IZ
1964 I32 unwind = 0;
1965 I32 firstcp = PL_savestack_ix;
ffc61ed2 1966 register bool do_utf8 = DO_UTF8(PL_reg_sv);
02db2b7b 1967
d6a28714
JH
1968#ifdef DEBUGGING
1969 PL_regindent++;
1970#endif
1971
1972 /* Note that nextchr is a byte even in UTF */
1973 nextchr = UCHARAT(locinput);
1974 scan = prog;
1975 while (scan != NULL) {
1976#define sayNO_L (logical ? (logical = 0, sw = 0, goto cont) : sayNO)
02db2b7b 1977#if 1
d6a28714
JH
1978# define sayYES goto yes
1979# define sayNO goto no
7821416a
IZ
1980# define sayYES_FINAL goto yes_final
1981# define sayYES_LOUD goto yes_loud
1982# define sayNO_FINAL goto no_final
1983# define sayNO_SILENT goto do_no
d6a28714
JH
1984# define saySAME(x) if (x) goto yes; else goto no
1985# define REPORT_CODE_OFF 24
1986#else
1987# define sayYES return 1
1988# define sayNO return 0
7821416a
IZ
1989# define sayYES_FINAL return 1
1990# define sayYES_LOUD return 1
1991# define sayNO_FINAL return 0
1992# define sayNO_SILENT return 0
d6a28714
JH
1993# define saySAME(x) return x
1994#endif
1995 DEBUG_r( {
1996 SV *prop = sv_newmortal();
1997 int docolor = *PL_colors[0];
1998 int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
1aa99e6b 1999 int l = (PL_regeol - locinput) > taill ? taill : (PL_regeol - locinput);
d6a28714
JH
2000 /* The part of the string before starttry has one color
2001 (pref0_len chars), between starttry and current
2002 position another one (pref_len - pref0_len chars),
2003 after the current position the third one.
2004 We assume that pref0_len <= pref_len, otherwise we
2005 decrease pref0_len. */
9041c2e3 2006 int pref_len = (locinput - PL_bostr) > (5 + taill) - l
1aa99e6b
IH
2007 ? (5 + taill) - l : locinput - PL_bostr;
2008 int pref0_len;
d6a28714 2009
1aa99e6b
IH
2010 while (UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2011 pref_len++;
2012 pref0_len = pref_len - (locinput - PL_reg_starttry);
d6a28714 2013 if (l + pref_len < (5 + taill) && l < PL_regeol - locinput)
9041c2e3 2014 l = ( PL_regeol - locinput > (5 + taill) - pref_len
d6a28714 2015 ? (5 + taill) - pref_len : PL_regeol - locinput);
1aa99e6b
IH
2016 while (UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2017 l--;
d6a28714
JH
2018 if (pref0_len < 0)
2019 pref0_len = 0;
2020 if (pref0_len > pref_len)
2021 pref0_len = pref_len;
2022 regprop(prop, scan);
9041c2e3 2023 PerlIO_printf(Perl_debug_log,
b900a521 2024 "%4"IVdf" <%s%.*s%s%s%.*s%s%s%s%.*s%s>%*s|%3"IVdf":%*s%s\n",
9041c2e3
NIS
2025 (IV)(locinput - PL_bostr),
2026 PL_colors[4], pref0_len,
d6a28714 2027 locinput - pref_len, PL_colors[5],
9041c2e3 2028 PL_colors[2], pref_len - pref0_len,
d6a28714
JH
2029 locinput - pref_len + pref0_len, PL_colors[3],
2030 (docolor ? "" : "> <"),
2031 PL_colors[0], l, locinput, PL_colors[1],
2032 15 - l - pref_len + 1,
2033 "",
b900a521 2034 (IV)(scan - PL_regprogram), PL_regindent*2, "",
d6a28714
JH
2035 SvPVX(prop));
2036 } );
2037
2038 next = scan + NEXT_OFF(scan);
2039 if (next == scan)
2040 next = NULL;
2041
2042 switch (OP(scan)) {
2043 case BOL:
2044 if (locinput == PL_bostr
2045 ? PL_regprev == '\n'
9041c2e3 2046 : (PL_multiline &&
d6a28714
JH
2047 (nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2048 {
2049 /* regtill = regbol; */
b8c5462f
JH
2050 break;
2051 }
d6a28714
JH
2052 sayNO;
2053 case MBOL:
2054 if (locinput == PL_bostr
2055 ? PL_regprev == '\n'
2056 : ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n') )
2057 {
b8c5462f
JH
2058 break;
2059 }
d6a28714
JH
2060 sayNO;
2061 case SBOL:
c2a73568 2062 if (locinput == PL_bostr)
b8c5462f 2063 break;
d6a28714
JH
2064 sayNO;
2065 case GPOS:
2066 if (locinput == PL_reg_ganch)
2067 break;
2068 sayNO;
2069 case EOL:
2070 if (PL_multiline)
2071 goto meol;
2072 else
2073 goto seol;
2074 case MEOL:
2075 meol:
2076 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2077 sayNO;
b8c5462f 2078 break;
d6a28714
JH
2079 case SEOL:
2080 seol:
2081 if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
b8c5462f 2082 sayNO;
d6a28714 2083 if (PL_regeol - locinput > 1)
b8c5462f 2084 sayNO;
b8c5462f 2085 break;
d6a28714
JH
2086 case EOS:
2087 if (PL_regeol != locinput)
b8c5462f 2088 sayNO;
d6a28714 2089 break;
ffc61ed2 2090 case SANY:
d6a28714 2091 if (!nextchr && locinput >= PL_regeol)
4633a7c4 2092 sayNO;
b8c5462f 2093 nextchr = UCHARAT(++locinput);
a0d0e21e 2094 break;
ffc61ed2 2095 case REG_ANY:
1aa99e6b
IH
2096 if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2097 sayNO;
2098 if (do_utf8) {
b8c5462f 2099 locinput += PL_utf8skip[nextchr];
d6a28714
JH
2100 if (locinput > PL_regeol)
2101 sayNO;
a0ed51b3 2102 nextchr = UCHARAT(locinput);
a0ed51b3 2103 }
1aa99e6b
IH
2104 else
2105 nextchr = UCHARAT(++locinput);
a0ed51b3 2106 break;
d6a28714 2107 case EXACT:
cd439c50
IZ
2108 s = STRING(scan);
2109 ln = STR_LEN(scan);
1aa99e6b
IH
2110 if (do_utf8 != (UTF!=0)) {
2111 char *l = locinput;
2112 char *e = s + ln;
2113 STRLEN len;
2114 if (do_utf8)
2115 while (s < e) {
2116 if (l >= PL_regeol)
2117 sayNO;
9041c2e3 2118 if (*((U8*)s) != utf8_to_uvchr((U8*)l, &len))
1aa99e6b
IH
2119 sayNO;
2120 s++;
2121 l += len;
2122 }
2123 else
2124 while (s < e) {
2125 if (l >= PL_regeol)
2126 sayNO;
9041c2e3 2127 if (*((U8*)l) != utf8_to_uvchr((U8*)s, &len))
1aa99e6b
IH
2128 sayNO;
2129 s += len;
2130 l++;
2131 }
2132 locinput = l;
2133 nextchr = UCHARAT(locinput);
2134 break;
2135 }
d6a28714
JH
2136 /* Inline the first character, for speed. */
2137 if (UCHARAT(s) != nextchr)
2138 sayNO;
2139 if (PL_regeol - locinput < ln)
2140 sayNO;
2141 if (ln > 1 && memNE(s, locinput, ln))
2142 sayNO;
2143 locinput += ln;
2144 nextchr = UCHARAT(locinput);
2145 break;
2146 case EXACTFL:
b8c5462f
JH
2147 PL_reg_flags |= RF_tainted;
2148 /* FALL THROUGH */
d6a28714 2149 case EXACTF:
cd439c50
IZ
2150 s = STRING(scan);
2151 ln = STR_LEN(scan);
d6a28714 2152
1aa99e6b 2153 if (do_utf8) {
d6a28714 2154 char *l = locinput;
1aa99e6b
IH
2155 char *e;
2156 e = s + ln;
d6a28714
JH
2157 c1 = OP(scan) == EXACTF;
2158 while (s < e) {
1aa99e6b 2159 if (l >= PL_regeol) {
d6a28714
JH
2160 sayNO;
2161 }
2b9d42f0 2162 if ((UTF ? utf8n_to_uvchr((U8*)s, e - s, 0, 0) : *((U8*)s)) !=
1aa99e6b
IH
2163 (c1 ? toLOWER_utf8((U8*)l) : toLOWER_LC_utf8((U8*)l)))
2164 sayNO;
2165 s += UTF ? UTF8SKIP(s) : 1;
d6a28714 2166 l += UTF8SKIP(l);
b8c5462f 2167 }
d6a28714 2168 locinput = l;
a0ed51b3
LW
2169 nextchr = UCHARAT(locinput);
2170 break;
2171 }
d6a28714
JH
2172
2173 /* Inline the first character, for speed. */
2174 if (UCHARAT(s) != nextchr &&
2175 UCHARAT(s) != ((OP(scan) == EXACTF)
2176 ? PL_fold : PL_fold_locale)[nextchr])
a0ed51b3 2177 sayNO;
d6a28714 2178 if (PL_regeol - locinput < ln)
b8c5462f 2179 sayNO;
d6a28714
JH
2180 if (ln > 1 && (OP(scan) == EXACTF
2181 ? ibcmp(s, locinput, ln)
2182 : ibcmp_locale(s, locinput, ln)))
4633a7c4 2183 sayNO;
d6a28714
JH
2184 locinput += ln;
2185 nextchr = UCHARAT(locinput);
a0d0e21e 2186 break;
d6a28714 2187 case ANYOF:
ffc61ed2
JH
2188 if (do_utf8) {
2189 if (!reginclass(scan, (U8*)locinput, do_utf8))
2190 sayNO;
2191 if (locinput >= PL_regeol)
2192 sayNO;
2193 locinput += PL_utf8skip[nextchr];
b8c5462f 2194 nextchr = UCHARAT(locinput);
ffc61ed2
JH
2195 }
2196 else {
2197 if (nextchr < 0)
2198 nextchr = UCHARAT(locinput);
2199 if (!reginclass(scan, (U8*)locinput, do_utf8))
2200 sayNO;
2201 if (!nextchr && locinput >= PL_regeol)
2202 sayNO;
2203 nextchr = UCHARAT(++locinput);
2204 }
b8c5462f 2205 break;
d6a28714 2206 case ALNUML:
b8c5462f
JH
2207 PL_reg_flags |= RF_tainted;
2208 /* FALL THROUGH */
d6a28714 2209 case ALNUM:
b8c5462f 2210 if (!nextchr)
4633a7c4 2211 sayNO;
ffc61ed2
JH
2212 if (do_utf8) {
2213 if (!(OP(scan) == ALNUM
d6a28714
JH
2214 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2215 : isALNUM_LC_utf8((U8*)locinput)))
b8c5462f
JH
2216 {
2217 sayNO;
a0ed51b3 2218 }
b8c5462f 2219 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2220 nextchr = UCHARAT(locinput);
2221 break;
2222 }
ffc61ed2 2223 if (!(OP(scan) == ALNUM
d6a28714 2224 ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
bbce6d69 2225 sayNO;
b8c5462f 2226 nextchr = UCHARAT(++locinput);
a0d0e21e 2227 break;
d6a28714 2228 case NALNUML:
b8c5462f
JH
2229 PL_reg_flags |= RF_tainted;
2230 /* FALL THROUGH */
d6a28714
JH
2231 case NALNUM:
2232 if (!nextchr && locinput >= PL_regeol)
a0ed51b3 2233 sayNO;
ffc61ed2 2234 if (do_utf8) {
8269fa76 2235 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2 2236 if (OP(scan) == NALNUM
d6a28714
JH
2237 ? swash_fetch(PL_utf8_alnum, (U8*)locinput)
2238 : isALNUM_LC_utf8((U8*)locinput))
2239 {
b8c5462f 2240 sayNO;
d6a28714 2241 }
b8c5462f
JH
2242 locinput += PL_utf8skip[nextchr];
2243 nextchr = UCHARAT(locinput);
2244 break;
2245 }
ffc61ed2 2246 if (OP(scan) == NALNUM
d6a28714 2247 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
b8c5462f 2248 sayNO;
76e3520e 2249 nextchr = UCHARAT(++locinput);
a0d0e21e 2250 break;
d6a28714
JH
2251 case BOUNDL:
2252 case NBOUNDL:
3280af22 2253 PL_reg_flags |= RF_tainted;
bbce6d69 2254 /* FALL THROUGH */
d6a28714
JH
2255 case BOUND:
2256 case NBOUND:
2257 /* was last char in word? */
ffc61ed2
JH
2258 if (do_utf8) {
2259 if (locinput == PL_regbol)
2260 ln = PL_regprev;
2261 else {
2262 U8 *r = reghop((U8*)locinput, -1);
9041c2e3 2263
2b9d42f0 2264 ln = utf8n_to_uvchr(r, s - (char*)r, 0, 0);
ffc61ed2
JH
2265 }
2266 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2267 ln = isALNUM_uni(ln);
8269fa76 2268 LOAD_UTF8_CHARCLASS(alnum,"a");
ffc61ed2
JH
2269 n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
2270 }
2271 else {
9041c2e3 2272 ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
ffc61ed2
JH
2273 n = isALNUM_LC_utf8((U8*)locinput);
2274 }
a0ed51b3 2275 }
d6a28714 2276 else {
ffc61ed2
JH
2277 ln = (locinput != PL_regbol) ?
2278 UCHARAT(locinput - 1) : PL_regprev;
2279 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
2280 ln = isALNUM(ln);
2281 n = isALNUM(nextchr);
2282 }
2283 else {
2284 ln = isALNUM_LC(ln);
2285 n = isALNUM_LC(nextchr);
2286 }
d6a28714 2287 }
ffc61ed2
JH
2288 if (((!ln) == (!n)) == (OP(scan) == BOUND ||
2289 OP(scan) == BOUNDL))
2290 sayNO;
a0ed51b3 2291 break;
d6a28714 2292 case SPACEL:
3280af22 2293 PL_reg_flags |= RF_tainted;
bbce6d69 2294 /* FALL THROUGH */
d6a28714 2295 case SPACE:
9442cb0e 2296 if (!nextchr)
4633a7c4 2297 sayNO;
1aa99e6b 2298 if (do_utf8) {
fd400ab9 2299 if (UTF8_IS_CONTINUED(nextchr)) {
8269fa76 2300 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2
JH
2301 if (!(OP(scan) == SPACE
2302 ? swash_fetch(PL_utf8_space, (U8*)locinput)
2303 : isSPACE_LC_utf8((U8*)locinput)))
2304 {
2305 sayNO;
2306 }
2307 locinput += PL_utf8skip[nextchr];
2308 nextchr = UCHARAT(locinput);
2309 break;
d6a28714 2310 }
ffc61ed2
JH
2311 if (!(OP(scan) == SPACE
2312 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2313 sayNO;
2314 nextchr = UCHARAT(++locinput);
2315 }
2316 else {
2317 if (!(OP(scan) == SPACE
2318 ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
2319 sayNO;
2320 nextchr = UCHARAT(++locinput);
a0ed51b3 2321 }
a0ed51b3 2322 break;
d6a28714 2323 case NSPACEL:
3280af22 2324 PL_reg_flags |= RF_tainted;
bbce6d69 2325 /* FALL THROUGH */
d6a28714 2326 case NSPACE:
9442cb0e 2327 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2328 sayNO;
1aa99e6b 2329 if (do_utf8) {
8269fa76 2330 LOAD_UTF8_CHARCLASS(space," ");
ffc61ed2 2331 if (OP(scan) == NSPACE
9442cb0e 2332 ? swash_fetch(PL_utf8_space, (U8*)locinput)
d6a28714 2333 : isSPACE_LC_utf8((U8*)locinput))
b8c5462f
JH
2334 {
2335 sayNO;
2336 }
2337 locinput += PL_utf8skip[nextchr];
2338 nextchr = UCHARAT(locinput);
2339 break;
a0ed51b3 2340 }
ffc61ed2 2341 if (OP(scan) == NSPACE
d6a28714 2342 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
4633a7c4 2343 sayNO;
76e3520e 2344 nextchr = UCHARAT(++locinput);
a0d0e21e 2345 break;
d6a28714 2346 case DIGITL:
a0ed51b3
LW
2347 PL_reg_flags |= RF_tainted;
2348 /* FALL THROUGH */
d6a28714 2349 case DIGIT:
9442cb0e 2350 if (!nextchr)
a0ed51b3 2351 sayNO;
1aa99e6b 2352 if (do_utf8) {
8269fa76 2353 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2354 if (!(OP(scan) == DIGIT
9442cb0e
GS
2355 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2356 : isDIGIT_LC_utf8((U8*)locinput)))
dfe13c55 2357 {
a0ed51b3 2358 sayNO;
dfe13c55 2359 }
6f06b55f 2360 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2361 nextchr = UCHARAT(locinput);
2362 break;
2363 }
ffc61ed2 2364 if (!(OP(scan) == DIGIT
9442cb0e 2365 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
a0ed51b3
LW
2366 sayNO;
2367 nextchr = UCHARAT(++locinput);
2368 break;
d6a28714 2369 case NDIGITL:
b8c5462f
JH
2370 PL_reg_flags |= RF_tainted;
2371 /* FALL THROUGH */
d6a28714 2372 case NDIGIT:
9442cb0e 2373 if (!nextchr && locinput >= PL_regeol)
b8c5462f 2374 sayNO;
1aa99e6b 2375 if (do_utf8) {
8269fa76 2376 LOAD_UTF8_CHARCLASS(digit,"0");
ffc61ed2 2377 if (OP(scan) == NDIGIT
9442cb0e
GS
2378 ? swash_fetch(PL_utf8_digit, (U8*)locinput)
2379 : isDIGIT_LC_utf8((U8*)locinput))
2380 {
a0ed51b3 2381 sayNO;
9442cb0e 2382 }
6f06b55f 2383 locinput += PL_utf8skip[nextchr];
a0ed51b3
LW
2384 nextchr = UCHARAT(locinput);
2385 break;
2386 }
ffc61ed2 2387 if (OP(scan) == NDIGIT
9442cb0e 2388 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
a0ed51b3
LW
2389 sayNO;
2390 nextchr = UCHARAT(++locinput);
2391 break;
2392 case CLUMP:
8269fa76 2393 LOAD_UTF8_CHARCLASS(mark,"~");
dfe13c55 2394 if (locinput >= PL_regeol || swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3 2395 sayNO;
6f06b55f 2396 locinput += PL_utf8skip[nextchr];
dfe13c55 2397 while (locinput < PL_regeol && swash_fetch(PL_utf8_mark,(U8*)locinput))
a0ed51b3
LW
2398 locinput += UTF8SKIP(locinput);
2399 if (locinput > PL_regeol)
2400 sayNO;
2401 nextchr = UCHARAT(locinput);
2402 break;
c8756f30 2403 case REFFL:
3280af22 2404 PL_reg_flags |= RF_tainted;
c8756f30 2405 /* FALL THROUGH */
c277df42 2406 case REF:
c8756f30 2407 case REFF:
c277df42 2408 n = ARG(scan); /* which paren pair */
cf93c79d 2409 ln = PL_regstartp[n];
2c2d71f5 2410 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
cf93c79d 2411 if (*PL_reglastparen < n || ln == -1)
af3f8c16 2412 sayNO; /* Do not match unless seen CLOSEn. */
cf93c79d 2413 if (ln == PL_regendp[n])
a0d0e21e 2414 break;
a0ed51b3 2415
cf93c79d 2416 s = PL_bostr + ln;
1aa99e6b 2417 if (do_utf8 && OP(scan) != REF) { /* REF can do byte comparison */
a0ed51b3 2418 char *l = locinput;
cf93c79d 2419 char *e = PL_bostr + PL_regendp[n];
a0ed51b3
LW
2420 /*
2421 * Note that we can't do the "other character" lookup trick as
2422 * in the 8-bit case (no pun intended) because in Unicode we
2423 * have to map both upper and title case to lower case.
2424 */
2425 if (OP(scan) == REFF) {
2426 while (s < e) {
2427 if (l >= PL_regeol)
2428 sayNO;
dfe13c55 2429 if (toLOWER_utf8((U8*)s) != toLOWER_utf8((U8*)l))
a0ed51b3
LW
2430 sayNO;
2431 s += UTF8SKIP(s);
2432 l += UTF8SKIP(l);
2433 }
2434 }
2435 else {
2436 while (s < e) {
2437 if (l >= PL_regeol)
2438 sayNO;
dfe13c55 2439 if (toLOWER_LC_utf8((U8*)s) != toLOWER_LC_utf8((U8*)l))
a0ed51b3
LW
2440 sayNO;
2441 s += UTF8SKIP(s);
2442 l += UTF8SKIP(l);
2443 }
2444 }
2445 locinput = l;
2446 nextchr = UCHARAT(locinput);
2447 break;
2448 }
2449
a0d0e21e 2450 /* Inline the first character, for speed. */
76e3520e 2451 if (UCHARAT(s) != nextchr &&
c8756f30
AK
2452 (OP(scan) == REF ||
2453 (UCHARAT(s) != ((OP(scan) == REFF
22c35a8c 2454 ? PL_fold : PL_fold_locale)[nextchr]))))
4633a7c4 2455 sayNO;
cf93c79d 2456 ln = PL_regendp[n] - ln;
3280af22 2457 if (locinput + ln > PL_regeol)
4633a7c4 2458 sayNO;
c8756f30
AK
2459 if (ln > 1 && (OP(scan) == REF
2460 ? memNE(s, locinput, ln)
2461 : (OP(scan) == REFF
2462 ? ibcmp(s, locinput, ln)
2463 : ibcmp_locale(s, locinput, ln))))
4633a7c4 2464 sayNO;
a0d0e21e 2465 locinput += ln;
76e3520e 2466 nextchr = UCHARAT(locinput);
a0d0e21e
LW
2467 break;
2468
2469 case NOTHING:
c277df42 2470 case TAIL:
a0d0e21e
LW
2471 break;
2472 case BACK:
2473 break;
c277df42
IZ
2474 case EVAL:
2475 {
2476 dSP;
533c011a 2477 OP_4tree *oop = PL_op;
3280af22
NIS
2478 COP *ocurcop = PL_curcop;
2479 SV **ocurpad = PL_curpad;
c277df42 2480 SV *ret;
9041c2e3 2481
c277df42 2482 n = ARG(scan);
533c011a 2483 PL_op = (OP_4tree*)PL_regdata->data[n];
d7d93a81 2484 DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
dfad63ad 2485 PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
cf93c79d 2486 PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
c277df42 2487
cea2e8a9 2488 CALLRUNOPS(aTHX); /* Scalar context. */
c277df42
IZ
2489 SPAGAIN;
2490 ret = POPs;
2491 PUTBACK;
9041c2e3 2492
0f5d15d6
IZ
2493 PL_op = oop;
2494 PL_curpad = ocurpad;
2495 PL_curcop = ocurcop;
c277df42 2496 if (logical) {
0f5d15d6
IZ
2497 if (logical == 2) { /* Postponed subexpression. */
2498 regexp *re;
22c35a8c 2499 MAGIC *mg = Null(MAGIC*);
0f5d15d6 2500 re_cc_state state;
0f5d15d6
IZ
2501 CHECKPOINT cp, lastcp;
2502
2503 if(SvROK(ret) || SvRMAGICAL(ret)) {
2504 SV *sv = SvROK(ret) ? SvRV(ret) : ret;
2505
2506 if(SvMAGICAL(sv))
2507 mg = mg_find(sv, 'r');
2508 }
2509 if (mg) {
2510 re = (regexp *)mg->mg_obj;
df0003d4 2511 (void)ReREFCNT_inc(re);
0f5d15d6
IZ
2512 }
2513 else {
2514 STRLEN len;
2515 char *t = SvPV(ret, len);
2516 PMOP pm;
2517 char *oprecomp = PL_regprecomp;
2518 I32 osize = PL_regsize;
2519 I32 onpar = PL_regnpar;
2520
5fcd1c1b 2521 Zero(&pm, 1, PMOP);
cea2e8a9 2522 re = CALLREGCOMP(aTHX_ t, t + len, &pm);
9041c2e3 2523 if (!(SvFLAGS(ret)
0f5d15d6
IZ
2524 & (SVs_TEMP | SVs_PADTMP | SVf_READONLY)))
2525 sv_magic(ret,(SV*)ReREFCNT_inc(re),'r',0,0);
2526 PL_regprecomp = oprecomp;
2527 PL_regsize = osize;
2528 PL_regnpar = onpar;
2529 }
2530 DEBUG_r(
9041c2e3 2531 PerlIO_printf(Perl_debug_log,
0f5d15d6
IZ
2532 "Entering embedded `%s%.60s%s%s'\n",
2533 PL_colors[0],
2534 re->precomp,
2535 PL_colors[1],
2536 (strlen(re->precomp) > 60 ? "..." : ""))
2537 );
2538 state.node = next;
2539 state.prev = PL_reg_call_cc;
2540 state.cc = PL_regcc;
2541 state.re = PL_reg_re;
2542
2ab05381 2543 PL_regcc = 0;
9041c2e3 2544
0f5d15d6 2545 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 2546 REGCP_SET(lastcp);
0f5d15d6
IZ
2547 cache_re(re);
2548 state.ss = PL_savestack_ix;
2549 *PL_reglastparen = 0;
2550 PL_reg_call_cc = &state;
2551 PL_reginput = locinput;
2c2d71f5
JH
2552
2553 /* XXXX This is too dramatic a measure... */
2554 PL_reg_maxiter = 0;
2555
0f5d15d6 2556 if (regmatch(re->program + 1)) {
2c914db6
IZ
2557 /* Even though we succeeded, we need to restore
2558 global variables, since we may be wrapped inside
2559 SUSPEND, thus the match may be not finished yet. */
2560
2561 /* XXXX Do this only if SUSPENDed? */
2562 PL_reg_call_cc = state.prev;
2563 PL_regcc = state.cc;
2564 PL_reg_re = state.re;
2565 cache_re(PL_reg_re);
2566
2567 /* XXXX This is too dramatic a measure... */
2568 PL_reg_maxiter = 0;
2569
2570 /* These are needed even if not SUSPEND. */
0f5d15d6
IZ
2571 ReREFCNT_dec(re);
2572 regcpblow(cp);
2573 sayYES;
2574 }
0f5d15d6 2575 ReREFCNT_dec(re);
02db2b7b 2576 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
2577 regcppop();
2578 PL_reg_call_cc = state.prev;
2579 PL_regcc = state.cc;
2580 PL_reg_re = state.re;
d3790889 2581 cache_re(PL_reg_re);
2c2d71f5
JH
2582
2583 /* XXXX This is too dramatic a measure... */
2584 PL_reg_maxiter = 0;
2585
0f5d15d6
IZ
2586 sayNO;
2587 }
c277df42 2588 sw = SvTRUE(ret);
0f5d15d6 2589 logical = 0;
a0ed51b3
LW
2590 }
2591 else
3280af22 2592 sv_setsv(save_scalar(PL_replgv), ret);
c277df42
IZ
2593 break;
2594 }
a0d0e21e 2595 case OPEN:
c277df42 2596 n = ARG(scan); /* which paren pair */
3280af22
NIS
2597 PL_reg_start_tmp[n] = locinput;
2598 if (n > PL_regsize)
2599 PL_regsize = n;
a0d0e21e
LW
2600 break;
2601 case CLOSE:
c277df42 2602 n = ARG(scan); /* which paren pair */
cf93c79d
IZ
2603 PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
2604 PL_regendp[n] = locinput - PL_bostr;
3280af22
NIS
2605 if (n > *PL_reglastparen)
2606 *PL_reglastparen = n;
a0d0e21e 2607 break;
c277df42
IZ
2608 case GROUPP:
2609 n = ARG(scan); /* which paren pair */
cf93c79d 2610 sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
c277df42
IZ
2611 break;
2612 case IFTHEN:
2c2d71f5 2613 PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
c277df42
IZ
2614 if (sw)
2615 next = NEXTOPER(NEXTOPER(scan));
2616 else {
2617 next = scan + ARG(scan);
2618 if (OP(next) == IFTHEN) /* Fake one. */
2619 next = NEXTOPER(NEXTOPER(next));
2620 }
2621 break;
2622 case LOGICAL:
0f5d15d6 2623 logical = scan->flags;
c277df42 2624 break;
2ab05381
IZ
2625/*******************************************************************
2626 PL_regcc contains infoblock about the innermost (...)* loop, and
2627 a pointer to the next outer infoblock.
2628
2629 Here is how Y(A)*Z is processed (if it is compiled into CURLYX/WHILEM):
2630
2631 1) After matching X, regnode for CURLYX is processed;
2632
9041c2e3 2633 2) This regnode creates infoblock on the stack, and calls
2ab05381
IZ
2634 regmatch() recursively with the starting point at WHILEM node;
2635
2636 3) Each hit of WHILEM node tries to match A and Z (in the order
2637 depending on the current iteration, min/max of {min,max} and
2638 greediness). The information about where are nodes for "A"
2639 and "Z" is read from the infoblock, as is info on how many times "A"
2640 was already matched, and greediness.
2641
2642 4) After A matches, the same WHILEM node is hit again.
2643
2644 5) Each time WHILEM is hit, PL_regcc is the infoblock created by CURLYX
2645 of the same pair. Thus when WHILEM tries to match Z, it temporarily
2646 resets PL_regcc, since this Y(A)*Z can be a part of some other loop:
2647 as in (Y(A)*Z)*. If Z matches, the automaton will hit the WHILEM node
2648 of the external loop.
2649
2650 Currently present infoblocks form a tree with a stem formed by PL_curcc
2651 and whatever it mentions via ->next, and additional attached trees
2652 corresponding to temporarily unset infoblocks as in "5" above.
2653
9041c2e3 2654 In the following picture infoblocks for outer loop of
2ab05381
IZ
2655 (Y(A)*?Z)*?T are denoted O, for inner I. NULL starting block
2656 is denoted by x. The matched string is YAAZYAZT. Temporarily postponed
2657 infoblocks are drawn below the "reset" infoblock.
2658
2659 In fact in the picture below we do not show failed matches for Z and T
2660 by WHILEM blocks. [We illustrate minimal matches, since for them it is
2661 more obvious *why* one needs to *temporary* unset infoblocks.]
2662
2663 Matched REx position InfoBlocks Comment
2664 (Y(A)*?Z)*?T x
2665 Y(A)*?Z)*?T x <- O
2666 Y (A)*?Z)*?T x <- O
2667 Y A)*?Z)*?T x <- O <- I
2668 YA )*?Z)*?T x <- O <- I
2669 YA A)*?Z)*?T x <- O <- I
2670 YAA )*?Z)*?T x <- O <- I
2671 YAA Z)*?T x <- O # Temporary unset I
2672 I
2673
2674 YAAZ Y(A)*?Z)*?T x <- O
2675 I
2676
2677 YAAZY (A)*?Z)*?T x <- O
2678 I
2679
2680 YAAZY A)*?Z)*?T x <- O <- I
2681 I
2682
2683 YAAZYA )*?Z)*?T x <- O <- I
2684 I
2685
2686 YAAZYA Z)*?T x <- O # Temporary unset I
2687 I,I
2688
2689 YAAZYAZ )*?T x <- O
2690 I,I
2691
2692 YAAZYAZ T x # Temporary unset O
2693 O
2694 I,I
2695
2696 YAAZYAZT x
2697 O
2698 I,I
2699 *******************************************************************/
a0d0e21e
LW
2700 case CURLYX: {
2701 CURCUR cc;
3280af22 2702 CHECKPOINT cp = PL_savestack_ix;
cb434fcc
IZ
2703 /* No need to save/restore up to this paren */
2704 I32 parenfloor = scan->flags;
c277df42
IZ
2705
2706 if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
2707 next += ARG(next);
3280af22
NIS
2708 cc.oldcc = PL_regcc;
2709 PL_regcc = &cc;
cb434fcc
IZ
2710 /* XXXX Probably it is better to teach regpush to support
2711 parenfloor > PL_regsize... */
2712 if (parenfloor > *PL_reglastparen)
2713 parenfloor = *PL_reglastparen; /* Pessimization... */
2714 cc.parenfloor = parenfloor;
a0d0e21e
LW
2715 cc.cur = -1;
2716 cc.min = ARG1(scan);
2717 cc.max = ARG2(scan);
c277df42 2718 cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
a0d0e21e
LW
2719 cc.next = next;
2720 cc.minmod = minmod;
2721 cc.lastloc = 0;
3280af22 2722 PL_reginput = locinput;
a0d0e21e
LW
2723 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
2724 regcpblow(cp);
3280af22 2725 PL_regcc = cc.oldcc;
4633a7c4 2726 saySAME(n);
a0d0e21e
LW
2727 }
2728 /* NOT REACHED */
2729 case WHILEM: {
2730 /*
2731 * This is really hard to understand, because after we match
2732 * what we're trying to match, we must make sure the rest of
2c2d71f5 2733 * the REx is going to match for sure, and to do that we have
a0d0e21e
LW
2734 * to go back UP the parse tree by recursing ever deeper. And
2735 * if it fails, we have to reset our parent's current state
2736 * that we can try again after backing off.
2737 */
2738
c277df42 2739 CHECKPOINT cp, lastcp;
3280af22 2740 CURCUR* cc = PL_regcc;
c277df42
IZ
2741 char *lastloc = cc->lastloc; /* Detection of 0-len. */
2742
4633a7c4 2743 n = cc->cur + 1; /* how many we know we matched */
3280af22 2744 PL_reginput = locinput;
a0d0e21e 2745
c277df42 2746 DEBUG_r(
9041c2e3
NIS
2747 PerlIO_printf(Perl_debug_log,
2748 "%*s %ld out of %ld..%ld cc=%lx\n",
3280af22 2749 REPORT_CODE_OFF+PL_regindent*2, "",
9041c2e3 2750 (long)n, (long)cc->min,
c277df42
IZ
2751 (long)cc->max, (long)cc)
2752 );
4633a7c4 2753
a0d0e21e
LW
2754 /* If degenerate scan matches "", assume scan done. */
2755
579cf2c3 2756 if (locinput == cc->lastloc && n >= cc->min) {
3280af22 2757 PL_regcc = cc->oldcc;
2ab05381
IZ
2758 if (PL_regcc)
2759 ln = PL_regcc->cur;
c277df42 2760 DEBUG_r(
c3464db5
DD
2761 PerlIO_printf(Perl_debug_log,
2762 "%*s empty match detected, try continuation...\n",
3280af22 2763 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2764 );
a0d0e21e 2765 if (regmatch(cc->next))
4633a7c4 2766 sayYES;
2ab05381
IZ
2767 if (PL_regcc)
2768 PL_regcc->cur = ln;
3280af22 2769 PL_regcc = cc;
4633a7c4 2770 sayNO;
a0d0e21e
LW
2771 }
2772
2773 /* First just match a string of min scans. */
2774
2775 if (n < cc->min) {
2776 cc->cur = n;
2777 cc->lastloc = locinput;
4633a7c4
LW
2778 if (regmatch(cc->scan))
2779 sayYES;
2780 cc->cur = n - 1;
c277df42 2781 cc->lastloc = lastloc;
4633a7c4 2782 sayNO;
a0d0e21e
LW
2783 }
2784
2c2d71f5
JH
2785 if (scan->flags) {
2786 /* Check whether we already were at this position.
2787 Postpone detection until we know the match is not
2788 *that* much linear. */
2789 if (!PL_reg_maxiter) {
2790 PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
2791 PL_reg_leftiter = PL_reg_maxiter;
2792 }
2793 if (PL_reg_leftiter-- == 0) {
2794 I32 size = (PL_reg_maxiter + 7)/8;
2795 if (PL_reg_poscache) {
2796 if (PL_reg_poscache_size < size) {
2797 Renew(PL_reg_poscache, size, char);
2798 PL_reg_poscache_size = size;
2799 }
2800 Zero(PL_reg_poscache, size, char);
2801 }
2802 else {
2803 PL_reg_poscache_size = size;
2804 Newz(29, PL_reg_poscache, size, char);
2805 }
2806 DEBUG_r(
2807 PerlIO_printf(Perl_debug_log,
2808 "%sDetected a super-linear match, switching on caching%s...\n",
2809 PL_colors[4], PL_colors[5])
2810 );
2811 }
2812 if (PL_reg_leftiter < 0) {
2813 I32 o = locinput - PL_bostr, b;
2814
2815 o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
2816 b = o % 8;
2817 o /= 8;
2818 if (PL_reg_poscache[o] & (1<<b)) {
2819 DEBUG_r(
2820 PerlIO_printf(Perl_debug_log,
2821 "%*s already tried at this position...\n",
2822 REPORT_CODE_OFF+PL_regindent*2, "")
2823 );
7821416a 2824 sayNO_SILENT;
2c2d71f5
JH
2825 }
2826 PL_reg_poscache[o] |= (1<<b);
2827 }
2828 }
2829
a0d0e21e
LW
2830 /* Prefer next over scan for minimal matching. */
2831
2832 if (cc->minmod) {
3280af22 2833 PL_regcc = cc->oldcc;
2ab05381
IZ
2834 if (PL_regcc)
2835 ln = PL_regcc->cur;
5f05dabc 2836 cp = regcppush(cc->parenfloor);
02db2b7b 2837 REGCP_SET(lastcp);
5f05dabc 2838 if (regmatch(cc->next)) {
c277df42 2839 regcpblow(cp);
4633a7c4 2840 sayYES; /* All done. */
5f05dabc 2841 }
02db2b7b 2842 REGCP_UNWIND(lastcp);
5f05dabc 2843 regcppop();
2ab05381
IZ
2844 if (PL_regcc)
2845 PL_regcc->cur = ln;
3280af22 2846 PL_regcc = cc;
a0d0e21e 2847
c277df42 2848 if (n >= cc->max) { /* Maximum greed exceeded? */
9041c2e3 2849 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
3280af22
NIS
2850 && !(PL_reg_flags & RF_warned)) {
2851 PL_reg_flags |= RF_warned;
e476b1b5 2852 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
2f3ca594
GS
2853 "Complex regular subexpression recursion",
2854 REG_INFTY - 1);
c277df42 2855 }
4633a7c4 2856 sayNO;
c277df42 2857 }
a687059c 2858
c277df42 2859 DEBUG_r(
c3464db5
DD
2860 PerlIO_printf(Perl_debug_log,
2861 "%*s trying longer...\n",
3280af22 2862 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42 2863 );
a0d0e21e 2864 /* Try scanning more and see if it helps. */
3280af22 2865 PL_reginput = locinput;
a0d0e21e
LW
2866 cc->cur = n;
2867 cc->lastloc = locinput;
5f05dabc 2868 cp = regcppush(cc->parenfloor);
02db2b7b 2869 REGCP_SET(lastcp);
5f05dabc 2870 if (regmatch(cc->scan)) {
c277df42 2871 regcpblow(cp);
4633a7c4 2872 sayYES;
5f05dabc 2873 }
02db2b7b 2874 REGCP_UNWIND(lastcp);
5f05dabc 2875 regcppop();
4633a7c4 2876 cc->cur = n - 1;
c277df42 2877 cc->lastloc = lastloc;
4633a7c4 2878 sayNO;
a0d0e21e
LW
2879 }
2880
2881 /* Prefer scan over next for maximal matching. */
2882
2883 if (n < cc->max) { /* More greed allowed? */
5f05dabc 2884 cp = regcppush(cc->parenfloor);
a0d0e21e
LW
2885 cc->cur = n;
2886 cc->lastloc = locinput;
02db2b7b 2887 REGCP_SET(lastcp);
5f05dabc 2888 if (regmatch(cc->scan)) {
c277df42 2889 regcpblow(cp);
4633a7c4 2890 sayYES;
5f05dabc 2891 }
02db2b7b 2892 REGCP_UNWIND(lastcp);
a0d0e21e 2893 regcppop(); /* Restore some previous $<digit>s? */
3280af22 2894 PL_reginput = locinput;
c277df42 2895 DEBUG_r(
c3464db5
DD
2896 PerlIO_printf(Perl_debug_log,
2897 "%*s failed, try continuation...\n",
3280af22 2898 REPORT_CODE_OFF+PL_regindent*2, "")
c277df42
IZ
2899 );
2900 }
9041c2e3 2901 if (ckWARN(WARN_REGEXP) && n >= REG_INFTY
599cee73 2902 && !(PL_reg_flags & RF_warned)) {
3280af22 2903 PL_reg_flags |= RF_warned;
e476b1b5 2904 Perl_warner(aTHX_ WARN_REGEXP, "%s limit (%d) exceeded",
cb5d145d
GS
2905 "Complex regular subexpression recursion",
2906 REG_INFTY - 1);
a0d0e21e
LW
2907 }
2908
2909 /* Failed deeper matches of scan, so see if this one works. */
3280af22 2910 PL_regcc = cc->oldcc;
2ab05381
IZ
2911 if (PL_regcc)
2912 ln = PL_regcc->cur;
a0d0e21e 2913 if (regmatch(cc->next))
4633a7c4 2914 sayYES;
2ab05381
IZ
2915 if (PL_regcc)
2916 PL_regcc->cur = ln;
3280af22 2917 PL_regcc = cc;
4633a7c4 2918 cc->cur = n - 1;
c277df42 2919 cc->lastloc = lastloc;
4633a7c4 2920 sayNO;
a0d0e21e
LW
2921 }
2922 /* NOT REACHED */
9041c2e3 2923 case BRANCHJ:
c277df42
IZ
2924 next = scan + ARG(scan);
2925 if (next == scan)
2926 next = NULL;
2927 inner = NEXTOPER(NEXTOPER(scan));
2928 goto do_branch;
9041c2e3 2929 case BRANCH:
c277df42
IZ
2930 inner = NEXTOPER(scan);
2931 do_branch:
2932 {
2933 CHECKPOINT lastcp;
2934 c1 = OP(scan);
2935 if (OP(next) != c1) /* No choice. */
2936 next = inner; /* Avoid recursion. */
a0d0e21e 2937 else {
02db2b7b
IZ
2938 I32 lastparen = *PL_reglastparen;
2939 I32 unwind1;
2940 re_unwind_branch_t *uw;
2941
2942 /* Put unwinding data on stack */
2943 unwind1 = SSNEWt(1,re_unwind_branch_t);
2944 uw = SSPTRt(unwind1,re_unwind_branch_t);
2945 uw->prev = unwind;
2946 unwind = unwind1;
2947 uw->type = ((c1 == BRANCH)
2948 ? RE_UNWIND_BRANCH
2949 : RE_UNWIND_BRANCHJ);
2950 uw->lastparen = lastparen;
2951 uw->next = next;
2952 uw->locinput = locinput;
2953 uw->nextchr = nextchr;
2954#ifdef DEBUGGING
2955 uw->regindent = ++PL_regindent;
2956#endif
c277df42 2957
02db2b7b
IZ
2958 REGCP_SET(uw->lastcp);
2959
2960 /* Now go into the first branch */
2961 next = inner;
a687059c 2962 }
a0d0e21e
LW
2963 }
2964 break;
2965 case MINMOD:
2966 minmod = 1;
2967 break;
c277df42
IZ
2968 case CURLYM:
2969 {
00db4c45 2970 I32 l = 0;
c277df42 2971 CHECKPOINT lastcp;
9041c2e3 2972
c277df42
IZ
2973 /* We suppose that the next guy does not need
2974 backtracking: in particular, it is of constant length,
2975 and has no parenths to influence future backrefs. */
2976 ln = ARG1(scan); /* min to match */
2977 n = ARG2(scan); /* max to match */
c277df42
IZ
2978 paren = scan->flags;
2979 if (paren) {
3280af22
NIS
2980 if (paren > PL_regsize)
2981 PL_regsize = paren;
2982 if (paren > *PL_reglastparen)
2983 *PL_reglastparen = paren;
c277df42 2984 }
dc45a647 2985 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
c277df42
IZ
2986 if (paren)
2987 scan += NEXT_OFF(scan); /* Skip former OPEN. */
3280af22 2988 PL_reginput = locinput;
c277df42
IZ
2989 if (minmod) {
2990 minmod = 0;
2991 if (ln && regrepeat_hard(scan, ln, &l) < ln)
2992 sayNO;
5f4b28b2 2993 if (ln && l == 0 && n >= ln
c277df42
IZ
2994 /* In fact, this is tricky. If paren, then the
2995 fact that we did/didnot match may influence
2996 future execution. */
2997 && !(paren && ln == 0))
2998 ln = n;
3280af22 2999 locinput = PL_reginput;
22c35a8c 3000 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3001 c1 = (U8)*STRING(next);
c277df42 3002 if (OP(next) == EXACTF)
22c35a8c 3003 c2 = PL_fold[c1];
c277df42 3004 else if (OP(next) == EXACTFL)
22c35a8c 3005 c2 = PL_fold_locale[c1];
c277df42
IZ
3006 else
3007 c2 = c1;
a0ed51b3
LW
3008 }
3009 else
c277df42 3010 c1 = c2 = -1000;
02db2b7b 3011 REGCP_SET(lastcp);
5f4b28b2 3012 /* This may be improved if l == 0. */
c277df42
IZ
3013 while (n >= ln || (n == REG_INFTY && ln > 0 && l)) { /* ln overflow ? */
3014 /* If it could work, try it. */
3015 if (c1 == -1000 ||
3280af22
NIS
3016 UCHARAT(PL_reginput) == c1 ||
3017 UCHARAT(PL_reginput) == c2)
c277df42
IZ
3018 {
3019 if (paren) {
3020 if (n) {
cf93c79d
IZ
3021 PL_regstartp[paren] =
3022 HOPc(PL_reginput, -l) - PL_bostr;
3023 PL_regendp[paren] = PL_reginput - PL_bostr;
a0ed51b3
LW
3024 }
3025 else
cf93c79d 3026 PL_regendp[paren] = -1;
c277df42
IZ
3027 }
3028 if (regmatch(next))
3029 sayYES;
02db2b7b 3030 REGCP_UNWIND(lastcp);
c277df42
IZ
3031 }
3032 /* Couldn't or didn't -- move forward. */
3280af22 3033 PL_reginput = locinput;
c277df42
IZ
3034 if (regrepeat_hard(scan, 1, &l)) {
3035 ln++;
3280af22 3036 locinput = PL_reginput;
c277df42
IZ
3037 }
3038 else
3039 sayNO;
3040 }
a0ed51b3
LW
3041 }
3042 else {
c277df42
IZ
3043 n = regrepeat_hard(scan, n, &l);
3044 if (n != 0 && l == 0
3045 /* In fact, this is tricky. If paren, then the
3046 fact that we did/didnot match may influence
3047 future execution. */
3048 && !(paren && ln == 0))
3049 ln = n;
3280af22 3050 locinput = PL_reginput;
c277df42 3051 DEBUG_r(
5c0ca799 3052 PerlIO_printf(Perl_debug_log,
faccc32b 3053 "%*s matched %"IVdf" times, len=%"IVdf"...\n",
7b0972df 3054 (int)(REPORT_CODE_OFF+PL_regindent*2), "",
faccc32b 3055 (IV) n, (IV)l)
c277df42
IZ
3056 );
3057 if (n >= ln) {
22c35a8c 3058 if (PL_regkind[(U8)OP(next)] == EXACT) {
cd439c50 3059 c1 = (U8)*STRING(next);
c277df42 3060 if (OP(next) == EXACTF)
22c35a8c 3061 c2 = PL_fold[c1];
c277df42 3062 else if (OP(next) == EXACTFL)
22c35a8c 3063 c2 = PL_fold_locale[c1];
c277df42
IZ
3064 else
3065 c2 = c1;
a0ed51b3
LW
3066 }
3067 else
c277df42
IZ
3068 c1 = c2 = -1000;
3069 }
02db2b7b 3070 REGCP_SET(lastcp);
c277df42
IZ
3071 while (n >= ln) {
3072 /* If it could work, try it. */
3073 if (c1 == -1000 ||
3280af22
NIS
3074 UCHARAT(PL_reginput) == c1 ||
3075 UCHARAT(PL_reginput) == c2)
a0ed51b3
LW
3076 {
3077 DEBUG_r(
c3464db5 3078 PerlIO_printf(Perl_debug_log,
7b0972df
JH
3079 "%*s trying tail with n=%"IVdf"...\n",
3080 (int)(REPORT_CODE_OFF+PL_regindent*2), "", (IV)n)
a0ed51b3
LW
3081 );
3082 if (paren) {
3083 if (n) {
cf93c79d
IZ
3084 PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
3085 PL_regendp[paren] = PL_reginput - PL_bostr;
c277df42 3086 }
a0ed51b3 3087 else
cf93c79d 3088 PL_regendp[paren] = -1;
c277df42 3089 }
a0ed51b3
LW
3090 if (regmatch(next))
3091 sayYES;
02db2b7b 3092 REGCP_UNWIND(lastcp);
a0ed51b3 3093 }
c277df42
IZ
3094 /* Couldn't or didn't -- back up. */
3095 n--;
dfe13c55 3096 locinput = HOPc(locinput, -l);
3280af22 3097 PL_reginput = locinput;
c277df42
IZ
3098 }
3099 }
3100 sayNO;
3101 break;
3102 }
3103 case CURLYN:
3104 paren = scan->flags; /* Which paren to set */
3280af22
NIS
3105 if (paren > PL_regsize)
3106 PL_regsize = paren;
3107 if (paren > *PL_reglastparen)
3108 *PL_reglastparen = paren;
c277df42
IZ
3109 ln = ARG1(scan); /* min to match */
3110 n = ARG2(scan); /* max to match */
dc45a647 3111 scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
c277df42 3112 goto repeat;
a0d0e21e 3113 case CURLY:
c277df42 3114 paren = 0;
a0d0e21e
LW
3115 ln = ARG1(scan); /* min to match */
3116 n = ARG2(scan); /* max to match */
dc45a647 3117 scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
a0d0e21e
LW
3118 goto repeat;
3119 case STAR:
3120 ln = 0;
c277df42 3121 n = REG_INFTY;
a0d0e21e 3122 scan = NEXTOPER(scan);
c277df42 3123 paren = 0;
a0d0e21e
LW
3124 goto repeat;
3125 case PLUS:
c277df42
IZ
3126 ln = 1;
3127 n = REG_INFTY;
3128 scan = NEXTOPER(scan);
3129 paren = 0;
3130 repeat:
a0d0e21e
LW
3131 /*
3132 * Lookahead to avoid useless match attempts
3133 * when we know what character comes next.
3134 */
22c35a8c 3135 if (PL_regkind[(U8)OP(next)] == EXACT) {
1aa99e6b
IH
3136 U8 *s = (U8*)STRING(next);
3137 if (!UTF) {
3138 c2 = c1 = *s;
3139 if (OP(next) == EXACTF)
3140 c2 = PL_fold[c1];
3141 else if (OP(next) == EXACTFL)
3142 c2 = PL_fold_locale[c1];
3143 }
3144 else { /* UTF */
3145 if (OP(next) == EXACTF) {
3146 c1 = to_utf8_lower(s);
3147 c2 = to_utf8_upper(s);
3148 }
3149 else {
9041c2e3 3150 c2 = c1 = utf8_to_uvchr(s, NULL);
1aa99e6b
IH
3151 }
3152 }
bbce6d69 3153 }
a0d0e21e 3154 else
bbce6d69 3155 c1 = c2 = -1000;
3280af22 3156 PL_reginput = locinput;
a0d0e21e 3157 if (minmod) {
c277df42 3158 CHECKPOINT lastcp;
a0d0e21e
LW
3159 minmod = 0;
3160 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 3161 sayNO;
a0ed51b3 3162 locinput = PL_reginput;
02db2b7b 3163 REGCP_SET(lastcp);
0fe9bf95 3164 if (c1 != -1000) {
1aa99e6b 3165 char *e; /* Should not check after this */
0fe9bf95
IZ
3166 char *old = locinput;
3167
1aa99e6b 3168 if (n == REG_INFTY) {
0fe9bf95 3169 e = PL_regeol - 1;
1aa99e6b
IH
3170 if (do_utf8)
3171 while (UTF8_IS_CONTINUATION(*(U8*)e))
3172 e--;
3173 }
3174 else if (do_utf8) {
3175 int m = n - ln;
3176 for (e = locinput;
3177 m >0 && e + UTF8SKIP(e) <= PL_regeol; m--)
3178 e += UTF8SKIP(e);
3179 }
3180 else {
3181 e = locinput + n - ln;
3182 if (e >= PL_regeol)
3183 e = PL_regeol - 1;
3184 }
0fe9bf95 3185 while (1) {
1aa99e6b 3186 int count;
0fe9bf95 3187 /* Find place 'next' could work */
1aa99e6b
IH
3188 if (!do_utf8) {
3189 if (c1 == c2) {
3190 while (locinput <= e && *locinput != c1)
3191 locinput++;
3192 } else {
9041c2e3 3193 while (locinput <= e
1aa99e6b
IH
3194 && *locinput != c1
3195 && *locinput != c2)
3196 locinput++;
3197 }
3198 count = locinput - old;
3199 }
3200 else {
3201 STRLEN len;
3202 if (c1 == c2) {
3203 for (count = 0;
3204 locinput <= e &&
9041c2e3 3205 utf8_to_uvchr((U8*)locinput, &len) != c1;
1aa99e6b
IH
3206 count++)
3207 locinput += len;
3208
3209 } else {
3210 for (count = 0; locinput <= e; count++) {
9041c2e3 3211 UV c = utf8_to_uvchr((U8*)locinput, &len);
1aa99e6b
IH
3212 if (c == c1 || c == c2)
3213 break;
9041c2e3 3214 locinput += len;
1aa99e6b
IH
3215 }
3216 }
0fe9bf95 3217 }
9041c2e3 3218 if (locinput > e)
0fe9bf95
IZ
3219 sayNO;
3220 /* PL_reginput == old now */
3221 if (locinput != old) {
3222 ln = 1; /* Did some */
1aa99e6b 3223 if (regrepeat(scan, count) < count)
0fe9bf95
IZ
3224 sayNO;
3225 }
3226 /* PL_reginput == locinput now */
29d1e993 3227 TRYPAREN(paren, ln, locinput);
0fe9bf95 3228 PL_reginput = locinput; /* Could be reset... */
02db2b7b 3229 REGCP_UNWIND(lastcp);
0fe9bf95 3230 /* Couldn't or didn't -- move forward. */
1aa99e6b
IH
3231 old = locinput;
3232 if (do_utf8)
3233 locinput += UTF8SKIP(locinput);
3234 else
3235 locinput++;
0fe9bf95
IZ
3236 }
3237 }
3238 else
c277df42 3239 while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
1aa99e6b
IH
3240 UV c;
3241 if (c1 != -1000) {
3242 if (do_utf8)
9041c2e3 3243 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3244 else
9041c2e3 3245 c = UCHARAT(PL_reginput);
2390ecbc
PP
3246 /* If it could work, try it. */
3247 if (c == c1 || c == c2)
3248 {
3249 TRYPAREN(paren, n, PL_reginput);
3250 REGCP_UNWIND(lastcp);
3251 }
1aa99e6b 3252 }
a0d0e21e 3253 /* If it could work, try it. */
2390ecbc 3254 else if (c1 == -1000)
bbce6d69 3255 {
29d1e993 3256 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3257 REGCP_UNWIND(lastcp);
bbce6d69 3258 }
c277df42 3259 /* Couldn't or didn't -- move forward. */
a0ed51b3 3260 PL_reginput = locinput;
a0d0e21e
LW
3261 if (regrepeat(scan, 1)) {
3262 ln++;
a0ed51b3
LW
3263 locinput = PL_reginput;
3264 }
3265 else
4633a7c4 3266 sayNO;
a0d0e21e
LW
3267 }
3268 }
3269 else {
c277df42 3270 CHECKPOINT lastcp;
a0d0e21e 3271 n = regrepeat(scan, n);
a0ed51b3 3272 locinput = PL_reginput;
22c35a8c 3273 if (ln < n && PL_regkind[(U8)OP(next)] == EOL &&
1aeab75a 3274 (!PL_multiline || OP(next) == SEOL || OP(next) == EOS)) {
a0d0e21e 3275 ln = n; /* why back off? */
1aeab75a
GS
3276 /* ...because $ and \Z can match before *and* after
3277 newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
3278 We should back off by one in this case. */
3279 if (UCHARAT(PL_reginput - 1) == '\n' && OP(next) != EOS)
3280 ln--;
3281 }
02db2b7b 3282 REGCP_SET(lastcp);
c277df42 3283 if (paren) {
1aa99e6b 3284 UV c;
c277df42 3285 while (n >= ln) {
1aa99e6b
IH
3286 if (c1 != -1000) {
3287 if (do_utf8)
9041c2e3 3288 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3289 else
9041c2e3 3290 c = UCHARAT(PL_reginput);
1aa99e6b 3291 }
c277df42 3292 /* If it could work, try it. */
1aa99e6b 3293 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3294 {
29d1e993 3295 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3296 REGCP_UNWIND(lastcp);
c277df42
IZ
3297 }
3298 /* Couldn't or didn't -- back up. */
3299 n--;
dfe13c55 3300 PL_reginput = locinput = HOPc(locinput, -1);
c277df42 3301 }
a0ed51b3
LW
3302 }
3303 else {
1aa99e6b 3304 UV c;
c277df42 3305 while (n >= ln) {
1aa99e6b
IH
3306 if (c1 != -1000) {
3307 if (do_utf8)
9041c2e3 3308 c = utf8_to_uvchr((U8*)PL_reginput, NULL);
1aa99e6b 3309 else
9041c2e3 3310 c = UCHARAT(PL_reginput);
1aa99e6b 3311 }
c277df42 3312 /* If it could work, try it. */
1aa99e6b 3313 if (c1 == -1000 || c == c1 || c == c2)
c277df42 3314 {
29d1e993 3315 TRYPAREN(paren, n, PL_reginput);
02db2b7b 3316 REGCP_UNWIND(lastcp);
c277df42
IZ
3317 }
3318 /* Couldn't or didn't -- back up. */
3319 n--;
dfe13c55 3320 PL_reginput = locinput = HOPc(locinput, -1);
bbce6d69 3321 }
a0d0e21e
LW
3322 }
3323 }
4633a7c4 3324 sayNO;
c277df42 3325 break;
a0d0e21e 3326 case END:
0f5d15d6
IZ
3327 if (PL_reg_call_cc) {
3328 re_cc_state *cur_call_cc = PL_reg_call_cc;
3329 CURCUR *cctmp = PL_regcc;
3330 regexp *re = PL_reg_re;
3331 CHECKPOINT cp, lastcp;
3332
3333 cp = regcppush(0); /* Save *all* the positions. */
02db2b7b 3334 REGCP_SET(lastcp);
0f5d15d6
IZ
3335 regcp_set_to(PL_reg_call_cc->ss); /* Restore parens of
3336 the caller. */
3337 PL_reginput = locinput; /* Make position available to
3338 the callcc. */
3339 cache_re(PL_reg_call_cc->re);
3340 PL_regcc = PL_reg_call_cc->cc;
3341 PL_reg_call_cc = PL_reg_call_cc->prev;
3342 if (regmatch(cur_call_cc->node)) {
3343 PL_reg_call_cc = cur_call_cc;
3344 regcpblow(cp);
3345 sayYES;
3346 }
02db2b7b 3347 REGCP_UNWIND(lastcp);
0f5d15d6
IZ
3348 regcppop();
3349 PL_reg_call_cc = cur_call_cc;
3350 PL_regcc = cctmp;
3351 PL_reg_re = re;
3352 cache_re(re);
3353
3354 DEBUG_r(
3355 PerlIO_printf(Perl_debug_log,
3356 "%*s continuation failed...\n",
3357 REPORT_CODE_OFF+PL_regindent*2, "")
3358 );
7821416a 3359 sayNO_SILENT;
0f5d15d6 3360 }
7821416a
IZ
3361 if (locinput < PL_regtill) {
3362 DEBUG_r(PerlIO_printf(Perl_debug_log,
3363 "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
3364 PL_colors[4],
3365 (long)(locinput - PL_reg_starttry),
3366 (long)(PL_regtill - PL_reg_starttry),
3367 PL_colors[5]));
3368 sayNO_FINAL; /* Cannot match: too short. */
3369 }
3370 PL_reginput = locinput; /* put where regtry can find it */
3371 sayYES_FINAL; /* Success! */
7e5428c5 3372 case SUCCEED:
3280af22 3373 PL_reginput = locinput; /* put where regtry can find it */
7821416a 3374 sayYES_LOUD; /* Success! */
c277df42
IZ
3375 case SUSPEND:
3376 n = 1;
9fe1d20c 3377 PL_reginput = locinput;
9041c2e3 3378 goto do_ifmatch;
a0d0e21e 3379 case UNLESSM:
c277df42 3380 n = 0;
a0ed51b3 3381 if (scan->flags) {
0fe9bf95
IZ
3382 if (UTF) { /* XXXX This is absolutely
3383 broken, we read before
3384 start of string. */
3385 s = HOPMAYBEc(locinput, -scan->flags);
3386 if (!s)
3387 goto say_yes;
3388 PL_reginput = s;
3389 }
3390 else {
9041c2e3 3391 if (locinput < PL_bostr + scan->flags)
0fe9bf95
IZ
3392 goto say_yes;
3393 PL_reginput = locinput - scan->flags;
3394 goto do_ifmatch;
3395 }
a0ed51b3
LW
3396 }
3397 else
3398 PL_reginput = locinput;
c277df42
IZ
3399 goto do_ifmatch;
3400 case IFMATCH:
3401 n = 1;
a0ed51b3 3402 if (scan->flags) {
0fe9bf95
IZ
3403 if (UTF) { /* XXXX This is absolutely
3404 broken, we read before
3405 start of string. */
3406 s = HOPMAYBEc(locinput, -scan->flags);
3407 if (!s || s < PL_bostr)
3408 goto say_no;
3409 PL_reginput = s;
3410 }
3411 else {
9041c2e3 3412 if (locinput < PL_bostr + scan->flags)
0fe9bf95
IZ
3413 goto say_no;
3414 PL_reginput = locinput - scan->flags;
3415 goto do_ifmatch;
3416 }
a0ed51b3
LW
3417 }
3418 else
3419 PL_reginput = locinput;
3420
c277df42 3421 do_ifmatch:
c277df42
IZ
3422 inner = NEXTOPER(NEXTOPER(scan));
3423 if (regmatch(inner) != n) {
3424 say_no:
3425 if (logical) {
3426 logical = 0;
3427 sw = 0;
3428 goto do_longjump;
a0ed51b3
LW
3429 }
3430 else
c277df42
IZ
3431 sayNO;
3432 }
3433 say_yes:
3434 if (logical) {
3435 logical = 0;
3436 sw = 1;
3437 }
fe44a5e8 3438 if (OP(scan) == SUSPEND) {
3280af22 3439 locinput = PL_reginput;
565764a8 3440 nextchr = UCHARAT(locinput);
fe44a5e8 3441 }
c277df42
IZ
3442 /* FALL THROUGH. */
3443 case LONGJMP:
3444 do_longjump:
3445 next = scan + ARG(scan);
3446 if (next == scan)
3447 next = NULL;
a0d0e21e
LW
3448 break;
3449 default:
b900a521 3450 PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
d7d93a81 3451 PTR2UV(scan), OP(scan));
cea2e8a9 3452 Perl_croak(aTHX_ "regexp memory corruption");
a687059c 3453 }
02db2b7b 3454 reenter:
a0d0e21e
LW
3455 scan = next;
3456 }
a687059c 3457
a0d0e21e
LW
3458 /*
3459 * We get here only if there's trouble -- normally "case END" is
3460 * the terminating point.
3461 */
cea2e8a9 3462 Perl_croak(aTHX_ "corrupted regexp pointers");
a0d0e21e 3463 /*NOTREACHED*/
4633a7c4
LW
3464 sayNO;
3465
7821416a
IZ
3466yes_loud:
3467 DEBUG_r(
3468 PerlIO_printf(Perl_debug_log,
3469 "%*s %scould match...%s\n",
3470 REPORT_CODE_OFF+PL_regindent*2, "", PL_colors[4],PL_colors[5])
3471 );
3472 goto yes;
3473yes_final:
3474 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
3475 PL_colors[4],PL_colors[5]));
4633a7c4
LW
3476yes:
3477#ifdef DEBUGGING
3280af22 3478 PL_regindent--;
4633a7c4 3479#endif
02db2b7b
IZ
3480
3481#if 0 /* Breaks $^R */
3482 if (unwind)
3483 regcpblow(firstcp);
3484#endif
4633a7c4
LW
3485 return 1;
3486
3487no:
7821416a
IZ
3488 DEBUG_r(
3489 PerlIO_printf(Perl_debug_log,
3490 "%*s %sfailed...%s\n",
3491 REPORT_CODE_OFF+PL_regindent*2, "",PL_colors[4],PL_colors[5])
3492 );
3493 goto do_no;
3494no_final:
3495do_no:
02db2b7b
IZ
3496 if (unwind) {
3497 re_unwind_t *uw = SSPTRt(unwind,re_unwind_t);
3498
3499 switch (uw->type) {
3500 case RE_UNWIND_BRANCH:
3501 case RE_UNWIND_BRANCHJ:
3502 {
3503 re_unwind_branch_t *uwb = &(uw->branch);
3504 I32 lastparen = uwb->lastparen;
9041c2e3 3505
02db2b7b
IZ
3506 REGCP_UNWIND(uwb->lastcp);
3507 for (n = *PL_reglastparen; n > lastparen; n--)
3508 PL_regendp[n] = -1;
3509 *PL_reglastparen = n;
3510 scan = next = uwb->next;
9041c2e3
NIS
3511 if ( !scan ||
3512 OP(scan) != (uwb->type == RE_UNWIND_BRANCH
02db2b7b
IZ
3513 ? BRANCH : BRANCHJ) ) { /* Failure */
3514 unwind = uwb->prev;
3515#ifdef DEBUGGING
3516 PL_regindent--;
3517#endif
3518 goto do_no;
3519 }
3520 /* Have more choice yet. Reuse the same uwb. */
3521 /*SUPPRESS 560*/
3522 if ((n = (uwb->type == RE_UNWIND_BRANCH
3523 ? NEXT_OFF(next) : ARG(next))))
3524 next += n;
3525 else
3526 next = NULL; /* XXXX Needn't unwinding in this case... */
3527 uwb->next = next;
3528 next = NEXTOPER(scan);
3529 if (uwb->type == RE_UNWIND_BRANCHJ)
3530 next = NEXTOPER(next);
3531 locinput = uwb->locinput;
3532 nextchr = uwb->nextchr;
3533#ifdef DEBUGGING
3534 PL_regindent = uwb->regindent;
3535#endif
3536
3537 goto reenter;
3538 }
3539 /* NOT REACHED */
3540 default:
3541 Perl_croak(aTHX_ "regexp unwind memory corruption");
3542 }
3543 /* NOT REACHED */
3544 }
4633a7c4 3545#ifdef DEBUGGING
3280af22 3546 PL_regindent--;
4633a7c4 3547#endif
a0d0e21e 3548 return 0;
a687059c
LW
3549}
3550
3551/*
3552 - regrepeat - repeatedly match something simple, report how many
3553 */
3554/*
3555 * [This routine now assumes that it will only match on things of length 1.
3556 * That was true before, but now we assume scan - reginput is the count,
a0ed51b3 3557 * rather than incrementing count on every character. [Er, except utf8.]]
a687059c 3558 */
76e3520e 3559STATIC I32
cea2e8a9 3560S_regrepeat(pTHX_ regnode *p, I32 max)
a687059c 3561{
a0d0e21e 3562 register char *scan;
a0d0e21e 3563 register I32 c;
3280af22 3564 register char *loceol = PL_regeol;
a0ed51b3 3565 register I32 hardcount = 0;
ffc61ed2 3566 register bool do_utf8 = DO_UTF8(PL_reg_sv);
a0d0e21e 3567
3280af22 3568 scan = PL_reginput;
c277df42 3569 if (max != REG_INFTY && max < loceol - scan)
a0d0e21e 3570 loceol = scan + max;
a0d0e21e 3571 switch (OP(p)) {
22c35a8c 3572 case REG_ANY:
1aa99e6b 3573 if (do_utf8) {
ffc61ed2 3574 loceol = PL_regeol;
1aa99e6b 3575 while (scan < loceol && hardcount < max && *scan != '\n') {
ffc61ed2
JH
3576 scan += UTF8SKIP(scan);
3577 hardcount++;
3578 }
3579 } else {
3580 while (scan < loceol && *scan != '\n')
3581 scan++;
a0ed51b3
LW
3582 }
3583 break;
ffc61ed2 3584 case SANY:
3baa4c62 3585 scan = loceol;
a0ed51b3 3586 break;
bbce6d69 3587 case EXACT: /* length of string is 1 */
cd439c50 3588 c = (U8)*STRING(p);
bbce6d69 3589 while (scan < loceol && UCHARAT(scan) == c)
3590 scan++;
3591 break;
3592 case EXACTF: /* length of string is 1 */
cd439c50 3593 c = (U8)*STRING(p);
bbce6d69 3594 while (scan < loceol &&
22c35a8c 3595 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
bbce6d69 3596 scan++;
3597 break;
3598 case EXACTFL: /* length of string is 1 */
3280af22 3599 PL_reg_flags |= RF_tainted;
cd439c50 3600 c = (U8)*STRING(p);
bbce6d69 3601 while (scan < loceol &&
22c35a8c 3602 (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
a0d0e21e
LW
3603 scan++;
3604 break;
3605 case ANYOF:
ffc61ed2
JH
3606 if (do_utf8) {
3607 loceol = PL_regeol;
1aa99e6b
IH
3608 while (hardcount < max && scan < loceol &&
3609 reginclass(p, (U8*)scan, do_utf8)) {
ffc61ed2
JH
3610 scan += UTF8SKIP(scan);
3611 hardcount++;
3612 }
3613 } else {
3614 while (scan < loceol && reginclass(p, (U8*)scan, do_utf8))
3615 scan++;
3616 }
a0d0e21e
LW
3617 break;
3618 case ALNUM:
1aa99e6b 3619 if (do_utf8) {
ffc61ed2 3620 loceol = PL_regeol;
8269fa76 3621 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b
IH
3622 while (hardcount < max && scan < loceol &&
3623 swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3624 scan += UTF8SKIP(scan);
3625 hardcount++;
3626 }
3627 } else {
3628 while (scan < loceol && isALNUM(*scan))
3629 scan++;
a0ed51b3
LW
3630 }
3631 break;
bbce6d69 3632 case ALNUML:
3280af22 3633 PL_reg_flags |= RF_tainted;
1aa99e6b 3634 if (do_utf8) {
ffc61ed2 3635 loceol = PL_regeol;
1aa99e6b
IH
3636 while (hardcount < max && scan < loceol &&
3637 isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3638 scan += UTF8SKIP(scan);
3639 hardcount++;
3640 }
3641 } else {
3642 while (scan < loceol && isALNUM_LC(*scan))
3643 scan++;
a0ed51b3
LW
3644 }
3645 break;
a0d0e21e 3646 case NALNUM:
1aa99e6b 3647 if (do_utf8) {
ffc61ed2 3648 loceol = PL_regeol;
8269fa76 3649 LOAD_UTF8_CHARCLASS(alnum,"a");
1aa99e6b
IH
3650 while (hardcount < max && scan < loceol &&
3651 !swash_fetch(PL_utf8_alnum, (U8*)scan)) {
ffc61ed2
JH
3652 scan += UTF8SKIP(scan);
3653 hardcount++;
3654 }
3655 } else {
3656 while (scan < loceol && !isALNUM(*scan))
3657 scan++;
a0ed51b3
LW
3658 }
3659 break;
bbce6d69 3660 case NALNUML:
3280af22 3661 PL_reg_flags |= RF_tainted;
1aa99e6b 3662 if (do_utf8) {
ffc61ed2 3663 loceol = PL_regeol;
1aa99e6b
IH
3664 while (hardcount < max && scan < loceol &&
3665 !isALNUM_LC_utf8((U8*)scan)) {
ffc61ed2
JH
3666 scan += UTF8SKIP(scan);
3667 hardcount++;
3668 }
3669 } else {
3670 while (scan < loceol && !isALNUM_LC(*scan))
3671 scan++;
a0ed51b3
LW
3672 }
3673 break;
a0d0e21e 3674 case SPACE:
1aa99e6b 3675 if (do_utf8) {
ffc61ed2 3676 loceol = PL_regeol;
8269fa76 3677 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3678 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3679 (*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3680 scan += UTF8SKIP(scan);
3681 hardcount++;
3682 }
3683 } else {
3684 while (scan < loceol && isSPACE(*scan))
3685 scan++;
a0ed51b3
LW
3686 }
3687 break;
bbce6d69 3688 case SPACEL:
3280af22 3689 PL_reg_flags |= RF_tainted;
1aa99e6b 3690 if (do_utf8) {
ffc61ed2 3691 loceol = PL_regeol;
1aa99e6b 3692 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3693 (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3694 scan += UTF8SKIP(scan);
3695 hardcount++;
3696 }
3697 } else {
3698 while (scan < loceol && isSPACE_LC(*scan))
3699 scan++;
a0ed51b3
LW
3700 }
3701 break;
a0d0e21e 3702 case NSPACE:
1aa99e6b 3703 if (do_utf8) {
ffc61ed2 3704 loceol = PL_regeol;
8269fa76 3705 LOAD_UTF8_CHARCLASS(space," ");
1aa99e6b 3706 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3707 !(*scan == ' ' || swash_fetch(PL_utf8_space,(U8*)scan))) {
3708 scan += UTF8SKIP(scan);
3709 hardcount++;
3710 }
3711 } else {
3712 while (scan < loceol && !isSPACE(*scan))
3713 scan++;
3714 break;
a0ed51b3 3715 }
bbce6d69 3716 case NSPACEL:
3280af22 3717 PL_reg_flags |= RF_tainted;
1aa99e6b 3718 if (do_utf8) {
ffc61ed2 3719 loceol = PL_regeol;
1aa99e6b 3720 while (hardcount < max && scan < loceol &&
ffc61ed2
JH
3721 !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
3722 scan += UTF8SKIP(scan);
3723 hardcount++;
3724 }
3725 } else {
3726 while (scan < loceol && !isSPACE_LC(*scan))
3727 scan++;
a0ed51b3
LW
3728 }
3729 break;
a0d0e21e 3730 case DIGIT:
1aa99e6b 3731 if (do_utf8) {
ffc61ed2 3732 loceol = PL_regeol;
8269fa76 3733 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b
IH
3734 while (hardcount < max && scan < loceol &&
3735 swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3736 scan += UTF8SKIP(scan);
3737 hardcount++;
3738 }
3739 } else {
3740 while (scan < loceol && isDIGIT(*scan))
3741 scan++;
a0ed51b3
LW
3742 }
3743 break;
a0d0e21e 3744 case NDIGIT:
1aa99e6b 3745 if (do_utf8) {
ffc61ed2 3746 loceol = PL_regeol;
8269fa76 3747 LOAD_UTF8_CHARCLASS(digit,"0");
1aa99e6b
IH
3748 while (hardcount < max && scan < loceol &&
3749 !swash_fetch(PL_utf8_digit,(U8*)scan)) {
ffc61ed2
JH
3750 scan += UTF8SKIP(scan);
3751 hardcount++;
3752 }
3753 } else {
3754 while (scan < loceol && !isDIGIT(*scan))
3755 scan++;
a0ed51b3
LW
3756 }
3757 break;
a0d0e21e
LW
3758 default: /* Called on something of 0 width. */
3759 break; /* So match right here or not at all. */
3760 }
a687059c 3761
a0ed51b3
LW
3762 if (hardcount)
3763 c = hardcount;
3764 else
3765 c = scan - PL_reginput;
3280af22 3766 PL_reginput = scan;
a687059c 3767
9041c2e3 3768 DEBUG_r(
c277df42
IZ
3769 {
3770 SV *prop = sv_newmortal();
3771
3772 regprop(prop, p);
9041c2e3
NIS
3773 PerlIO_printf(Perl_debug_log,
3774 "%*s %s can match %"IVdf" times out of %"IVdf"...\n",
7b0972df 3775 REPORT_CODE_OFF+1, "", SvPVX(prop),(IV)c,(IV)max);
c277df42 3776 });
9041c2e3 3777
a0d0e21e 3778 return(c);
a687059c
LW
3779}
3780
3781/*
c277df42 3782 - regrepeat_hard - repeatedly match something, report total lenth and length
9041c2e3 3783 *
c277df42
IZ
3784 * The repeater is supposed to have constant length.
3785 */
3786
76e3520e 3787STATIC I32
cea2e8a9 3788S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp)
c277df42
IZ
3789{
3790 register char *scan;
3791 register char *start;
3280af22 3792 register char *loceol = PL_regeol;
a0ed51b3 3793 I32 l = 0;
708e3b05 3794 I32 count = 0, res = 1;
a0ed51b3
LW
3795
3796 if (!max)
3797 return 0;
c277df42 3798
3280af22 3799 start = PL_reginput;
1aa99e6b 3800 if (DO_UTF8(PL_reg_sv)) {
708e3b05 3801 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3802 if (!count++) {
3803 l = 0;
3804 while (start < PL_reginput) {
3805 l++;
3806 start += UTF8SKIP(start);
3807 }
3808 *lp = l;
3809 if (l == 0)
3810 return max;
3811 }
3812 if (count == max)
3813 return count;
3814 }
3815 }
3816 else {
708e3b05 3817 while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) {
a0ed51b3
LW
3818 if (!count++) {
3819 *lp = l = PL_reginput - start;
3820 if (max != REG_INFTY && l*max < loceol - scan)
3821 loceol = scan + l*max;
3822 if (l == 0)
3823 return max;
c277df42
IZ
3824 }
3825 }
3826 }
708e3b05 3827 if (!res)
3280af22 3828 PL_reginput = scan;
9041c2e3 3829
a0ed51b3 3830 return count;
c277df42
IZ
3831}
3832
3833/*
ffc61ed2
JH
3834- regclass_swash - prepare the utf8 swash
3835*/
3836
3837SV *
3838Perl_regclass_swash(pTHX_ register regnode* node, bool doinit, SV** initsvp)
3839{
3840 SV *sw = NULL;
3841 SV *si = NULL;
3842
3843 if (PL_regdata && PL_regdata->count) {
3844 U32 n = ARG(node);
3845
3846 if (PL_regdata->what[n] == 's') {
3847 SV *rv = (SV*)PL_regdata->data[n];
3848 AV *av = (AV*)SvRV((SV*)rv);
3849 SV **a;
9041c2e3 3850
ffc61ed2
JH
3851 si = *av_fetch(av, 0, FALSE);
3852 a = av_fetch(av, 1, FALSE);
9041c2e3 3853
ffc61ed2
JH
3854 if (a)
3855 sw = *a;
3856 else if (si && doinit) {
3857 sw = swash_init("utf8", "", si, 1, 0);
3858 (void)av_store(av, 1, sw);
3859 }
3860 }
3861 }
3862
3863 if (initsvp)
3864 *initsvp = si;
3865
3866 return sw;
3867}
3868
3869/*
cb8d8820 3870 - reginclass - determine if a character falls into a character class
bbce6d69 3871 */
3872
76e3520e 3873STATIC bool
ffc61ed2 3874S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8)
bbce6d69 3875{
ffc61ed2 3876 char flags = ANYOF_FLAGS(n);
bbce6d69 3877 bool match = FALSE;
1aa99e6b
IH
3878 UV c;
3879 STRLEN len;
3880
3881 if (do_utf8)
9041c2e3 3882 c = utf8_to_uvchr(p, &len);
1aa99e6b
IH
3883 else
3884 c = *p;
bbce6d69 3885
ffc61ed2
JH
3886 if (do_utf8 || (flags & ANYOF_UNICODE)) {
3887 if (do_utf8 && !ANYOF_RUNTIME(n)) {
ffc61ed2
JH
3888 if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
3889 match = TRUE;
bbce6d69 3890 }
1aa99e6b
IH
3891 if (do_utf8 && flags & ANYOF_UNICODE_ALL && c >= 256)
3892 match = TRUE;
ffc61ed2
JH
3893 if (!match) {
3894 SV *sw = regclass_swash(n, TRUE, 0);
3895
3896 if (sw) {
3897 if (swash_fetch(sw, p))
3898 match = TRUE;
3899 else if (flags & ANYOF_FOLD) {
3900 U8 tmpbuf[UTF8_MAXLEN+1];
9041c2e3 3901
ffc61ed2
JH
3902 if (flags & ANYOF_LOCALE) {
3903 PL_reg_flags |= RF_tainted;
9041c2e3 3904 uvchr_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
ffc61ed2 3905 }
1aa99e6b 3906 else
9041c2e3 3907 uvchr_to_utf8(tmpbuf, toLOWER_utf8(p));
ffc61ed2
JH
3908 if (swash_fetch(sw, tmpbuf))
3909 match = TRUE;
3910 }
3911 }
bbce6d69 3912 }
3913 }
1aa99e6b 3914 if (!match && c < 256) {
ffc61ed2
JH
3915 if (ANYOF_BITMAP_TEST(n, c))
3916 match = TRUE;
3917 else if (flags & ANYOF_FOLD) {
3918 I32 f;
a0ed51b3 3919
ffc61ed2
JH
3920 if (flags & ANYOF_LOCALE) {
3921 PL_reg_flags |= RF_tainted;
3922 f = PL_fold_locale[c];
3923 }
3924 else
3925 f = PL_fold[c];
3926 if (f != c && ANYOF_BITMAP_TEST(n, f))
3927 match = TRUE;
3928 }
3929
3930 if (!match && (flags & ANYOF_CLASS)) {
a0ed51b3 3931 PL_reg_flags |= RF_tainted;
ffc61ed2
JH
3932 if (
3933 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM) && isALNUM_LC(c)) ||
3934 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM) && !isALNUM_LC(c)) ||
3935 (ANYOF_CLASS_TEST(n, ANYOF_SPACE) && isSPACE_LC(c)) ||
3936 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE) && !isSPACE_LC(c)) ||
3937 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT) && isDIGIT_LC(c)) ||
3938 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT) && !isDIGIT_LC(c)) ||
3939 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC) && isALNUMC_LC(c)) ||
3940 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
3941 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
3942 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
3943 (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
3944 (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
3945 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
3946 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
3947 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
3948 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH) && !isGRAPH_LC(c)) ||
3949 (ANYOF_CLASS_TEST(n, ANYOF_LOWER) && isLOWER_LC(c)) ||
3950 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER) && !isLOWER_LC(c)) ||
3951 (ANYOF_CLASS_TEST(n, ANYOF_PRINT) && isPRINT_LC(c)) ||
3952 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT) && !isPRINT_LC(c)) ||
3953 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT) && isPUNCT_LC(c)) ||
3954 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT) && !isPUNCT_LC(c)) ||
3955 (ANYOF_CLASS_TEST(n, ANYOF_UPPER) && isUPPER_LC(c)) ||
3956 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER) && !isUPPER_LC(c)) ||
3957 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT) && isXDIGIT(c)) ||
3958 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
3959 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
3960 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
3961 (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
3962 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
3963 ) /* How's that for a conditional? */
3964 {
3965 match = TRUE;
3966 }
a0ed51b3 3967 }
a0ed51b3
LW
3968 }
3969
a0ed51b3
LW
3970 return (flags & ANYOF_INVERT) ? !match : match;
3971}
161b471a 3972
dfe13c55 3973STATIC U8 *
cea2e8a9 3974S_reghop(pTHX_ U8 *s, I32 off)
9041c2e3 3975{
1aa99e6b
IH
3976 return S_reghop3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
3977}
3978
3979STATIC U8 *
3980S_reghop3(pTHX_ U8 *s, I32 off, U8* lim)
9041c2e3 3981{
a0ed51b3 3982 if (off >= 0) {
1aa99e6b 3983 while (off-- && s < lim) {
ffc61ed2 3984 /* XXX could check well-formedness here */
a0ed51b3 3985 s += UTF8SKIP(s);
ffc61ed2 3986 }
a0ed51b3
LW
3987 }
3988 else {
3989 while (off++) {
1aa99e6b 3990 if (s > lim) {
a0ed51b3 3991 s--;
ffc61ed2 3992 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 3993 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 3994 s--;
ffc61ed2
JH
3995 }
3996 /* XXX could check well-formedness here */
a0ed51b3
LW
3997 }
3998 }
3999 }
4000 return s;
4001}
161b471a 4002
dfe13c55 4003STATIC U8 *
1aa99e6b 4004S_reghopmaybe(pTHX_ U8 *s, I32 off)
9041c2e3 4005{
1aa99e6b
IH
4006 return S_reghopmaybe3(aTHX_ s, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr));
4007}
4008
4009STATIC U8 *
4010S_reghopmaybe3(pTHX_ U8* s, I32 off, U8* lim)
a0ed51b3
LW
4011{
4012 if (off >= 0) {
1aa99e6b 4013 while (off-- && s < lim) {
ffc61ed2 4014 /* XXX could check well-formedness here */
a0ed51b3 4015 s += UTF8SKIP(s);
ffc61ed2 4016 }
a0ed51b3
LW
4017 if (off >= 0)
4018 return 0;
4019 }
4020 else {
4021 while (off++) {
1aa99e6b 4022 if (s > lim) {
a0ed51b3 4023 s--;
ffc61ed2 4024 if (UTF8_IS_CONTINUED(*s)) {
1aa99e6b 4025 while (s > (U8*)lim && UTF8_IS_CONTINUATION(*s))
a0ed51b3 4026 s--;
ffc61ed2
JH
4027 }
4028 /* XXX could check well-formedness here */
a0ed51b3
LW
4029 }
4030 else
4031 break;
4032 }
4033 if (off <= 0)
4034 return 0;
4035 }
4036 return s;
4037}
51371543
GS
4038
4039#ifdef PERL_OBJECT
51371543
GS
4040#include "XSUB.h"
4041#endif
4042
4043static void
4044restore_pos(pTHXo_ void *arg)
4045{
51371543
GS
4046 if (PL_reg_eval_set) {
4047 if (PL_reg_oldsaved) {
4048 PL_reg_re->subbeg = PL_reg_oldsaved;
4049 PL_reg_re->sublen = PL_reg_oldsavedlen;
4050 RX_MATCH_COPIED_on(PL_reg_re);
4051 }
4052 PL_reg_magic->mg_len = PL_reg_oldpos;
4053 PL_reg_eval_set = 0;
4054 PL_curpm = PL_reg_oldcurpm;
4055 }
4056}